3 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
17 module TH
= Token_helpers
18 module LP
= Lexer_parser
20 module Stat
= Parsing_stat
22 (*****************************************************************************)
24 (*****************************************************************************)
25 let pr2_err, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
27 (*****************************************************************************)
29 (*****************************************************************************)
31 let lexbuf_to_strpos lexbuf
=
32 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
34 let token_to_strpos tok
=
35 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
38 let error_msg_tok tok
=
39 let file = TH.file_of_tok tok
in
40 if !Flag_parsing_c.verbose_parsing
41 then Common.error_message
file (token_to_strpos tok
)
42 else ("error in " ^
file ^
"; set verbose_parsing for more info")
45 let print_bad line_error
(start_line
, end_line
) filelines
=
47 pr2
("badcount: " ^ i_to_s
(end_line
- start_line
));
49 for i
= start_line
to end_line
do
50 let line = filelines
.(i
) in
53 then pr2
("BAD:!!!!!" ^
" " ^
line)
54 else pr2
("bad:" ^
" " ^
line)
60 let mk_info_item2 filename toks
=
61 let buf = Buffer.create
100 in
63 (* old: get_slice_file filename (line1, line2) *)
65 toks
+> List.iter
(fun tok
->
66 match TH.pinfo_of_tok tok
with
67 | Ast_c.OriginTok _
->
68 Buffer.add_string
buf (TH.str_of_tok tok
)
69 | Ast_c.AbstractLineTok _
->
78 let mk_info_item a b
=
79 Common.profile_code
"C parsing.mk_info_item"
80 (fun () -> mk_info_item2 a b
)
83 let info_same_line line xs
=
84 xs
+> List.filter
(fun info
-> Ast_c.line_of_info info
=|= line)
87 (*****************************************************************************)
88 (* Stats on what was passed/commentized *)
89 (*****************************************************************************)
91 let commentized xs
= xs
+> Common.map_filter
(function
92 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
93 let s = Ast_c.str_of_info ii
in
95 match !Flag_parsing_c.filter_passed_level
with
98 List.mem cppkind
[Token_c.CppAttr
]
102 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
]
106 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
]
110 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppMacro
]
116 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
;Token_c.CppMacro
]
123 | _
-> failwith
"not valid level passing number"
125 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
130 | s when s =~ "KERN_.*" -> None
131 | s when s =~ "__.*" -> None
133 Some (ii.Ast_c.pinfo)
138 | Parser_c.TCommentMisc ii
139 | Parser_c.TAction ii
141 Some
(ii
.Ast_c.pinfo
)
146 let count_lines_commentized xs
=
147 let line = ref (-1) in
153 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
154 let newline = pinfo
.Common.line in
166 let print_commentized xs
=
167 let line = ref (-1) in
169 let ys = commentized xs
in
173 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
174 let newline = pinfo
.Common.line in
175 let s = pinfo
.Common.str
in
176 let s = Str.global_substitute
177 (Str.regexp
"\n") (fun s -> "") s
180 then prerr_string
(s ^
" ")
183 then pr2_no_nl
"passed:"
184 else pr2_no_nl
"\npassed:";
189 if not
(null
ys) then pr2
"";
195 (*****************************************************************************)
197 (*****************************************************************************)
199 (* called by parse_print_error_heuristic *)
201 let table = Common.full_charpos_to_pos_large
file in
203 Common.with_open_infile
file (fun chan
->
204 let lexbuf = Lexing.from_channel chan
in
206 let rec tokens_aux acc
=
207 let tok = Lexer_c.token
lexbuf in
208 (* fill in the line and col information *)
209 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
210 { ii
with Ast_c.pinfo
=
211 (* could assert pinfo.filename = file ? *)
212 match Ast_c.pinfo_of_info ii
with
213 Ast_c.OriginTok pi
->
214 Ast_c.OriginTok
(Common.complete_parse_info_large
file table pi
)
215 | Ast_c.ExpandedTok
(pi
,vpi
) ->
216 Ast_c.ExpandedTok
((Common.complete_parse_info_large
file table pi
),vpi
)
217 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
218 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
223 then List.rev
(tok::acc
)
224 else tokens_aux (tok::acc
)
228 | Lexer_c.Lexical
s ->
229 failwith
("lexical error " ^
s ^
"\n =" ^
230 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
234 let time_lexing ?
(profile
=true) a
=
236 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
238 let tokens ?profile a
=
239 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
242 let tokens_of_string string =
243 let lexbuf = Lexing.from_string
string in
245 let rec tokens_s_aux () =
246 let tok = Lexer_c.token
lexbuf in
249 else tok::(tokens_s_aux ())
253 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
257 (*****************************************************************************)
258 (* Parsing, but very basic, no more used *)
259 (*****************************************************************************)
262 * !!!Those function use refs, and are not reentrant !!! so take care.
263 * It use globals defined in Lexer_parser.
265 * update: because now lexer return comments tokens, those functions
266 * may not work anymore.
270 let lexbuf = Lexing.from_channel
(open_in
file) in
271 let result = Parser_c.main
Lexer_c.token
lexbuf in
275 let parse_print_error file =
276 let chan = (open_in
file) in
277 let lexbuf = Lexing.from_channel
chan in
279 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
281 lexbuf +> Parser_c.main
Lexer_c.token
283 | Lexer_c.Lexical
s ->
284 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
285 | Parsing.Parse_error
->
286 failwith
("parse error \n = " ^
error_msg ())
287 | Semantic_c.Semantic
(s, i
) ->
288 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
294 (*****************************************************************************)
295 (* Parsing subelements, useful to debug parser *)
296 (*****************************************************************************)
299 * !!!Those function use refs, and are not reentrant !!! so take care.
300 * It use globals defined in Lexer_parser.
305 * let parse_gen parsefunc s =
306 * let lexbuf = Lexing.from_string s in
307 * let result = parsefunc Lexer_c.token lexbuf in
311 let parse_gen parsefunc
s =
312 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
315 (* Why use this lexing scheme ? Why not classically give lexer func
316 * to parser ? Because I now keep comments in lexer. Could
317 * just do a simple wrapper that when comment ask again for a token,
318 * but maybe simpler to use cur_tok technique.
320 let all_tokens = ref toks in
321 let cur_tok = ref (List.hd
!all_tokens) in
325 if TH.is_eof
!cur_tok
326 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
328 let v = Common.pop2
all_tokens in
333 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
334 let result = parsefunc
lexer_function lexbuf_fake in
338 let type_of_string = parse_gen Parser_c.type_name
339 let statement_of_string = parse_gen Parser_c.statement
340 let expression_of_string = parse_gen Parser_c.expr
342 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
348 (*****************************************************************************)
349 (* Consistency checking *)
350 (*****************************************************************************)
353 * could check that an ident has always the same class, be it a typedef
354 * (but sometimes do 'acpi_val acpi_val;'), an ident, a TMacroStatement,
359 | CIdent
(* can be var, func, field, tag, enum constant *)
362 let str_of_class_ident = function
364 | CTypedef
-> "Typedef"
374 (* but take care that must still be able to use '=' *)
375 type context
= InFunction
| InEnum
| InStruct
| InInitializer
| InParams
377 | CIdent
of class_ident
381 | CCommentCpp
of cppkind
391 | CReservedKwd
(type | decl
| qualif
| flow
| misc
| attr
)
394 let ident_to_typename ident
: Ast_c.fullType
=
395 Ast_c.mk_ty
(Ast_c.TypeName
(ident
, Ast_c.noTypedefDef
())) Ast_c.noii
398 (* parse_typedef_fix4 *)
399 let consistency_checking2 xs
=
401 (* first phase, gather data *)
402 let stat = Hashtbl.create
101 in
404 (* default value for hash *)
405 let v1 () = Hashtbl.create
101 in
408 let bigf = { Visitor_c.default_visitor_c
with
410 Visitor_c.kexpr
= (fun (k
,bigf) x
->
411 match Ast_c.unwrap_expr x
with
412 | Ast_c.Ident
(id
) ->
413 let s = Ast_c.str_of_name id
in
415 Common.hfind_default
s v1 +> Common.hfind_default CIdent
v2 +>
416 (fun aref
-> incr aref
)
420 Visitor_c.ktype
= (fun (k
,bigf) t
->
421 match Ast_c.unwrap_typeC t
with
422 | Ast_c.TypeName
(name
,_typ
) ->
423 let s = Ast_c.str_of_name name
in
425 Common.hfind_default
s v1 +> Common.hfind_default CTypedef
v2 +>
426 (fun aref
-> incr aref
)
432 xs
+> List.iter
(fun (p
, info_item
) -> Visitor_c.vk_toplevel
bigf p
);
435 let ident_to_type = ref [] in
438 (* second phase, analyze data *)
439 stat +> Hashtbl.iter
(fun k
v ->
440 let xs = Common.hash_to_list
v in
441 if List.length
xs >= 2
443 pr2_err ("CONFLICT:" ^ k
);
444 let sorted = xs +> List.sort
(fun (ka
,va
) (kb
,vb
) ->
447 | CTypedef
, _
-> 1 (* first is smaller *)
453 let sorted = List.rev
sorted in
455 | [CTypedef
, i1
;CIdent
, i2
] ->
456 pr2_err ("transforming some ident in typedef");
457 push2 k
ident_to_type;
459 pr2_err ("TODO:other transforming?");
464 (* third phase, update ast.
465 * todo? but normally should try to handle correctly scope ? maybe sometime
466 * sizeof(id) and even if id was for a long time an identifier, maybe
467 * a few time, because of the scope it's actually really a type.
469 if (null
!ident_to_type)
472 let bigf = { Visitor_c.default_visitor_c_s
with
473 Visitor_c.kdefineval_s
= (fun (k
,bigf) x
->
475 | Ast_c.DefineExpr e
->
476 (match Ast_c.unwrap_expr e
with
477 | Ast_c.Ident
(ident
) ->
478 let s = Ast_c.str_of_name ident
in
479 if List.mem
s !ident_to_type
481 let t = ident_to_typename ident
in
488 Visitor_c.kexpr_s
= (fun (k
, bigf) x
->
489 match Ast_c.get_e_and_ii x
with
490 | (Ast_c.SizeOfExpr e
, tref
), isizeof
->
491 let i1 = tuple_of_list1 isizeof
in
492 (match Ast_c.get_e_and_ii e
with
493 | (Ast_c.ParenExpr e
, _
), iiparen
->
494 let (i2
, i3
) = tuple_of_list2 iiparen
in
495 (match Ast_c.get_e_and_ii e
with
496 | (Ast_c.Ident
(ident
), _
), _ii
->
498 let s = Ast_c.str_of_name ident
in
499 if List.mem
s !ident_to_type
501 let t = ident_to_typename ident
in
502 (Ast_c.SizeOfType
t, tref
),[i1;i2
;i3
]
511 xs +> List.map
(fun (p
, info_item
) ->
512 Visitor_c.vk_toplevel_s
bigf p
, info_item
516 let consistency_checking a
=
517 Common.profile_code
"C consistencycheck" (fun () -> consistency_checking2 a
)
521 (*****************************************************************************)
523 (*****************************************************************************)
525 let is_define_passed passed
=
526 let xs = passed
+> List.rev
+> List.filter
TH.is_not_comment
in
527 if List.length
xs >= 2
529 (match Common.head_middle_tail
xs with
530 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
535 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
539 let is_defined_passed_bis last_round
=
540 let xs = last_round
+> List.filter
TH.is_not_comment
in
542 | Parser_c.TDefine _
::_
-> true
545 (* ---------------------------------------------------------------------- *)
548 (* todo: do something if find Parser_c.Eof ? *)
549 let rec find_next_synchro next already_passed
=
551 (* Maybe because not enough }, because for example an ifdef contains
552 * in both branch some opening {, we later eat too much, "on deborde
553 * sur la fonction d'apres". So already_passed may be too big and
554 * looking for next synchro point starting from next may not be the
555 * best. So maybe we can find synchro point inside already_passed
556 * instead of looking in next.
558 * But take care! must progress. We must not stay in infinite loop!
559 * For instance now I have as a error recovery to look for
560 * a "start of something", corresponding to start of function,
561 * but must go beyond this start otherwise will loop.
562 * So look at premier(external_declaration2) in parser.output and
563 * pass at least those first tokens.
565 * I have chosen to start search for next synchro point after the
566 * first { I found, so quite sure we will not loop. *)
568 let last_round = List.rev already_passed
in
569 if is_defined_passed_bis last_round
570 then find_next_synchro_define
(last_round ++ next
) []
573 let (before
, after
) =
574 last_round +> Common.span
(fun tok ->
576 (* by looking at TOBrace we are sure that the "start of something"
577 * will not arrive too early
579 | Parser_c.TOBrace _
-> false
580 | Parser_c.TDefine _
-> false
584 find_next_synchro_orig
(after
++ next
) (List.rev before
)
588 and find_next_synchro_define next already_passed
=
591 pr2_err "ERROR-RECOV: end of file while in recovery mode";
593 | (Parser_c.TDefEOL i
as v)::xs ->
594 pr2_err ("ERROR-RECOV: found sync end of #define, line "^i_to_s
(TH.line_of_tok
v));
595 v::already_passed
, xs
597 find_next_synchro_define
xs (v::already_passed
)
602 and find_next_synchro_orig next already_passed
=
605 pr2_err "ERROR-RECOV: end of file while in recovery mode";
608 | (Parser_c.TCBrace i
as v)::xs when TH.col_of_tok
v =|= 0 ->
609 pr2_err ("ERROR-RECOV: found sync '}' at line "^i_to_s
(TH.line_of_tok
v));
612 | [] -> raise Impossible
(* there is a EOF token normally *)
614 (* still useful: now parser.mly allow empty ';' so normally no pb *)
615 | Parser_c.TPtVirg iptvirg
::xs ->
616 pr2_err "ERROR-RECOV: found sync bis, eating } and ;";
617 (Parser_c.TPtVirg iptvirg
)::v::already_passed
, xs
619 | Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
::xs ->
620 pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;";
621 (Parser_c.TPtVirg iptvirg
)::(Parser_c.TIdent x
)::v::already_passed
,
624 | Parser_c.TCommentSpace sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
626 pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;";
627 (Parser_c.TCommentSpace sp
)::
628 (Parser_c.TPtVirg iptvirg
)::
629 (Parser_c.TIdent x
)::
634 | Parser_c.TCommentNewline sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
636 pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;";
637 (Parser_c.TCommentNewline sp
)::
638 (Parser_c.TPtVirg iptvirg
)::
639 (Parser_c.TIdent x
)::
645 v::already_passed
, xs
647 | v::xs when TH.col_of_tok
v =|= 0 && TH.is_start_of_something
v ->
648 pr2_err ("ERROR-RECOV: found sync col 0 at line "^ i_to_s
(TH.line_of_tok
v));
649 already_passed
, v::xs
652 find_next_synchro_orig
xs (v::already_passed
)
655 (*****************************************************************************)
656 (* Macro problem recovery *)
657 (*****************************************************************************)
658 module TV
= Token_views_c
660 let candidate_macros_in_passed2 passed defs_optional
=
664 passed
+> List.iter
(function
665 | Parser_c.TIdent
(s,_
)
666 (* bugfix: may have to undo some infered things *)
667 | Parser_c.TMacroIterator
(s,_
)
668 | Parser_c.TypedefIdent
(s,_
)
670 (match Common.hfind_option
s defs_optional
with
672 if s ==~
Parsing_hacks.regexp_macro
674 (* pr2 (spf "candidate: %s" s); *)
675 Common.push2
(s, def
) res
677 Common.push2
(s, def
) res2
687 let candidate_macros_in_passed a b
=
688 Common.profile_code
"MACRO managment" (fun () ->
689 candidate_macros_in_passed2 a b
)
693 let find_optional_macro_to_expand2 ~defs
toks =
695 let defs = Common.hash_of_list
defs in
697 let toks = toks +> Common.map
(function
699 (* special cases to undo *)
700 | Parser_c.TMacroIterator
(s, ii
) ->
701 if Hashtbl.mem
defs s
702 then Parser_c.TIdent
(s, ii
)
703 else Parser_c.TMacroIterator
(s, ii
)
705 | Parser_c.TypedefIdent
(s, ii
) ->
706 if Hashtbl.mem
defs s
707 then Parser_c.TIdent
(s, ii
)
708 else Parser_c.TypedefIdent
(s, ii
)
714 Parsing_hacks.fix_tokens_cpp ~macro_defs
:defs tokens
716 (* just calling apply_macro_defs and having a specialized version
717 * of the code in fix_tokens_cpp is not enough as some work such
718 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
719 * will not get the chance to be run on the new expanded tokens.
720 * Hence even if it's expensive, it's currently better to
721 * just call directly fix_tokens_cpp again here.
723 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
724 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
725 let paren_grouped = TV.mk_parenthised cleaner in
726 Cpp_token_c.apply_macro_defs
727 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
728 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
730 (* because the before field is used by apply_macro_defs *)
731 tokens2 := TV.rebuild_tokens_extented
!tokens2;
732 Parsing_hacks.insert_virtual_positions
733 (!tokens2 +> Common.acc_map
(fun x
-> x
.TV.tok))
735 let find_optional_macro_to_expand ~
defs a
=
736 Common.profile_code
"MACRO managment" (fun () ->
737 find_optional_macro_to_expand2 ~
defs a
)
741 (*****************************************************************************)
742 (* Include/Define hacks *)
743 (*****************************************************************************)
745 (* Sometimes I prefer to generate a single token for a list of things in the
746 * lexer so that if I have to passed them, like for passing TInclude then
747 * it's easy. Also if I don't do a single token, then I need to
748 * parse the rest which may not need special stuff, like detecting
749 * end of line which the parser is not really ready for. So for instance
750 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
751 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
752 * but this kind of token is valid only after a #include and the
753 * lexing and parsing rules are different for such tokens so not that
754 * easy to parse such things in parser_c.mly. Hence the following hacks.
756 * less?: maybe could get rid of this like I get rid of some of fix_define.
759 (* ------------------------------------------------------------------------- *)
761 (* ------------------------------------------------------------------------- *)
763 (* used to generate new token from existing one *)
764 let new_info posadd str ii
=
766 Ast_c.OriginTok
{ (Ast_c.parse_info_of_info ii
) with
767 charpos
= Ast_c.pos_of_info ii
+ posadd
;
769 column
= Ast_c.col_of_info ii
+ posadd
;
771 (* must generate a new ref each time, otherwise share *)
772 cocci_tag
= ref Ast_c.emptyAnnot
;
773 comments_tag
= ref Ast_c.emptyComments
;
777 let rec comment_until_defeol xs =
780 (* job not done in Cpp_token_c.define_parse ? *)
781 failwith
"cant find end of define token TDefEOL"
784 | Parser_c.TDefEOL i
->
785 Parser_c.TCommentCpp
(Token_c.CppDirective
, TH.info_of_tok x
)
789 (* bugfix: otherwise may lose a TComment token *)
790 if TH.is_real_comment
x
792 else Parser_c.TCommentCpp
(Token_c.CppPassingNormal
(*good?*), TH.info_of_tok
x)
794 x'
::comment_until_defeol xs
797 let drop_until_defeol xs =
799 (Common.drop_until
(function Parser_c.TDefEOL _
-> true | _
-> false) xs)
803 (* ------------------------------------------------------------------------- *)
804 (* returns a pair (replaced token, list of next tokens) *)
805 (* ------------------------------------------------------------------------- *)
807 let tokens_include (info
, includes
, filename
, inifdef
) =
808 Parser_c.TIncludeStart
(Ast_c.rewrap_str includes info
, inifdef
),
809 [Parser_c.TIncludeFilename
810 (filename
, (new_info (String.length includes
) filename info
))
813 (*****************************************************************************)
814 (* Parsing default define macros, usually in a standard.h file *)
815 (*****************************************************************************)
817 let parse_cpp_define_file2 file =
818 Common.save_excursion
Flag_parsing_c.verbose_lexing
(fun () ->
819 Flag_parsing_c.verbose_lexing
:= false;
820 let toks = tokens ~profile
:false file in
821 let toks = Cpp_token_c.fix_tokens_define
toks in
822 Cpp_token_c.extract_cpp_define
toks
825 let parse_cpp_define_file a
=
826 Common.profile_code_exclusif
"HACK" (fun () -> parse_cpp_define_file2 a
)
830 let (_defs
: (string, Cpp_token_c.define_def
) Hashtbl.t ref) =
831 ref (Hashtbl.create
101)
833 let (_defs_builtins
: (string, Cpp_token_c.define_def
) Hashtbl.t ref) =
834 ref (Hashtbl.create
101)
837 (* can not be put in parsing_hack, cos then mutually recursive problem as
838 * we also want to parse the standard.h file.
840 let init_defs_macros std_h
=
841 if not
(Common.lfile_exists std_h
)
842 then pr2
("warning: Can't find default macro file: " ^ std_h
)
844 pr2
("init_defs: " ^ std_h
);
845 _defs
:= Common.hash_of_list
(parse_cpp_define_file std_h
);
848 let init_defs_builtins file_h
=
849 if not
(Common.lfile_exists file_h
)
850 then pr2
("warning: Can't find macro file: " ^ file_h
)
852 pr2
("init_defs_builtins: " ^ file_h
);
854 Common.hash_of_list
(parse_cpp_define_file file_h
);
858 (*****************************************************************************)
859 (* Main entry point *)
860 (*****************************************************************************)
862 type info_item
= string * Parser_c.token list
864 type program2
= toplevel2 list
865 and toplevel2
= Ast_c.toplevel
* info_item
867 let program_of_program2 xs =
870 let with_program2 f program2
=
873 +> (fun (program
, infos
) ->
876 +> Common.uncurry
Common.zip
880 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
881 * possible error recovery. Indeed, they allow to skip some tokens and
882 * still be able to call again the ocamlyacc parser. It is ugly code
883 * because we cant modify ocamllex and ocamlyacc. As we want some
884 * extended lexing tricks, we have to use such refs.
886 * Those refs are now also used for my lalr(k) technique. Indeed They
887 * store the futur and previous tokens that were parsed, and so
888 * provide enough context information for powerful lex trick.
890 * - passed_tokens_last_ckp stores the passed tokens since last
891 * checkpoint. Used for NotParsedCorrectly and also to build the
892 * info_item attached to each program_element.
893 * - passed_tokens_clean is used for lookahead, in fact for lookback.
894 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
895 * contain some comments and so would make pattern matching difficult
896 * in lookahead. Hence this variable. We would like also to get rid
897 * of cpp instruction because sometimes a cpp instruction is between
898 * two tokens and makes a pattern matching fail. But lookahead also
899 * transform some cpp instruction (in comment) so can't remove them.
901 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
902 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
906 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
907 * after the call to pop2.
908 * toks = (reverse passed_tok) ++ remaining_tokens
909 * at the and of the lexer_function call.
910 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
911 * At the end of lexer_function call, cur_tok overlap with passed_tok.
913 * convention: I use "tr" for "tokens refs"
915 * I now also need this lexing trick because the lexer return comment
919 type tokens_state
= {
920 mutable rest
: Parser_c.token list
;
921 mutable rest_clean
: Parser_c.token list
;
922 mutable current
: Parser_c.token
;
923 (* it's passed since last "checkpoint", not passed from the beginning *)
924 mutable passed
: Parser_c.token list
;
925 mutable passed_clean
: Parser_c.token list
;
928 let mk_tokens_state toks =
931 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
932 current
= (List.hd
toks);
939 let clone_tokens_state tr
=
941 rest_clean
= tr
.rest_clean
;
942 current
= tr
.current
;
944 passed_clean
= tr
.passed_clean
;
946 let copy_tokens_state ~src ~dst
=
947 dst
.rest
<- src
.rest
;
948 dst
.rest_clean
<- src
.rest_clean
;
949 dst
.current
<- src
.current
;
950 dst
.passed
<- src
.passed
;
951 dst
.passed_clean
<- src
.passed_clean
;
954 (* todo? agglomerate the x##b ? *)
955 let rec filter_noise n
xs =
961 | Parser_c.TMacroAttr _
->
962 filter_noise (n
-1) xs
964 x::filter_noise (n
-1) xs
967 let clean_for_lookahead xs =
972 x::filter_noise 10 xs
976 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
977 * tr means token refs.
979 let rec lexer_function ~pass tr
= fun lexbuf ->
981 | [] -> pr2_err "ALREADY AT END"; tr
.current
986 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
990 tr
.passed
<- v::tr
.passed
;
991 lexer_function ~pass tr
lexbuf
994 let x = List.hd tr
.rest_clean
in
995 tr
.rest_clean
<- List.tl tr
.rest_clean
;
1002 * Why not in parsing_hacks lookahead and do passing like
1003 * I do for some ifdef directives ? Because here I also need to
1004 * generate some tokens sometimes and so I need access to the
1005 * tr.passed, tr.rest, etc.
1007 | Parser_c.TDefine
(tok) ->
1008 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
1009 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
1011 incr
Stat.nDefinePassing
;
1012 pr2_once
("CPP-DEFINE: inside function, I treat it as comment");
1013 let v'
= Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
1015 tr
.passed
<- v'
::tr
.passed
;
1016 tr
.rest
<- comment_until_defeol tr
.rest
;
1017 tr
.rest_clean
<- drop_until_defeol tr
.rest_clean
;
1018 lexer_function ~pass tr
lexbuf
1021 tr
.passed
<- v::tr
.passed
;
1022 tr
.passed_clean
<- v::tr
.passed_clean
;
1026 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
1027 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
1028 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
1030 incr
Stat.nIncludePassing
;
1031 pr2_once
("CPP-INCLUDE: inside function, I treat it as comment");
1032 let v = Parser_c.TCommentCpp
(Token_c.CppDirective
, info
) in
1033 tr
.passed
<- v::tr
.passed
;
1034 lexer_function ~pass tr
lexbuf
1037 let (v,new_tokens
) =
1038 tokens_include (info
, includes
, filename
, inifdef
) in
1039 let new_tokens_clean =
1040 new_tokens
+> List.filter
TH.is_not_comment
in
1042 tr
.passed
<- v::tr
.passed
;
1043 tr
.passed_clean
<- v::tr
.passed_clean
;
1044 tr
.rest
<- new_tokens
++ tr
.rest
;
1045 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
1052 let v = match v with
1053 | Parser_c.TIdent
(s, ii
) ->
1056 not
(!Flag_parsing_c.disable_add_typedef
) &&
1058 then Parser_c.TypedefIdent
(s, ii
)
1059 else Parser_c.TIdent
(s, ii
)
1063 let v = Parsing_hacks.lookahead ~pass
1064 (clean_for_lookahead (v::tr
.rest_clean
))
1067 tr
.passed
<- v::tr
.passed
;
1069 (* the lookahead may have changed the status of the token and
1070 * consider it as a comment, for instance some #include are
1071 * turned into comments, hence this code. *)
1073 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
1075 tr
.passed_clean
<- v::tr
.passed_clean
;
1084 let get_one_elem ~pass tr
(file, filelines
) =
1086 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
1087 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
1089 (* normally have to do that only when come from an exception in which
1090 * case the dt() may not have been done
1091 * TODO but if was in scoped scope ? have to let only the last scope
1092 * so need do a LP.lexer_reset_typedef ();
1094 LP.enable_typedef
();
1095 LP._lexer_hint
:= (LP.default_hint
());
1096 LP.save_typedef_state
();
1100 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
1103 (* -------------------------------------------------- *)
1105 (* -------------------------------------------------- *)
1106 Common.profile_code_exclusif
"YACC" (fun () ->
1107 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
1110 LP.restore_typedef_state
();
1112 (* must keep here, before the code that adjusts the tr fields *)
1113 let line_error = TH.line_of_tok tr
.current
in
1115 let passed_before_error = tr
.passed
in
1116 let current = tr
.current in
1118 (* error recovery, go to next synchro point *)
1119 let (passed'
, rest'
) = find_next_synchro tr
.rest tr
.passed
in
1121 tr
.passed
<- passed'
;
1123 tr
.current <- List.hd passed'
;
1124 tr
.passed_clean
<- []; (* enough ? *)
1125 (* with error recovery, rest and rest_clean may not be in sync *)
1126 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
1129 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
1130 Right
(info_of_bads, line_error,
1131 tr
.passed
, passed_before_error,
1138 (* note: as now we go in 2 passes, there is first all the error message of
1139 * the lexer, and then the error of the parser. It is not anymore
1142 * !!!This function use refs, and is not reentrant !!! so take care.
1143 * It use globals defined in Lexer_parser and also the _defs global
1144 * in parsing_hack.ml.
1146 * This function uses internally some semi globals in the
1147 * tokens_stat record and parsing_stat record.
1150 let parse_print_error_heuristic2 file =
1152 let filelines = Common.cat_array
file in
1153 let stat = Parsing_stat.default_stat
file in
1155 (* -------------------------------------------------- *)
1156 (* call lexer and get all the tokens *)
1157 (* -------------------------------------------------- *)
1158 LP.lexer_reset_typedef
();
1159 Parsing_hacks.ifdef_paren_cnt
:= 0;
1161 let toks_orig = tokens file in
1163 let toks = Cpp_token_c.fix_tokens_define
toks_orig in
1165 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs
:!_defs_builtins
toks in
1167 (* expand macros on demand trick, preparation phase *)
1169 Common.profile_code
"MACRO mgmt prep 1" (fun () ->
1170 let macros = Hashtbl.copy
!_defs
in
1171 (* include also builtins as some macros may generate some builtins too
1172 * like __decl_spec or __stdcall
1174 !_defs_builtins
+> Hashtbl.iter
(fun s def
->
1175 Hashtbl.replace
macros s def
;
1180 Common.profile_code
"MACRO mgmt prep 2" (fun () ->
1181 let local_macros = parse_cpp_define_file file in
1182 local_macros +> List.iter
(fun (s, def
) ->
1183 Hashtbl.replace
macros s def
;
1187 let tr = mk_tokens_state toks in
1191 (* todo?: I am not sure that it represents current_line, cos maybe
1192 * tr.current partipated in the previous parsing phase, so maybe tr.current
1193 * is not the first token of the next parsing phase. Same with checkpoint2.
1194 * It would be better to record when we have a } or ; in parser.mly,
1195 * cos we know that they are the last symbols of external_declaration2.
1197 * bugfix: may not be equal to 'file' as after macro expansions we can
1198 * start to parse a new entity from the body of a macro, for instance
1199 * when parsing a define_machine() body, cf standard.h
1201 let checkpoint = TH.line_of_tok
tr.current in
1202 let checkpoint_file = TH.file_of_tok
tr.current in
1204 (* call the parser *)
1207 Common.profile_code
"Parsing: 1st pass" (fun () ->
1208 get_one_elem ~pass
:1 tr (file, filelines)
1212 | Right
(info
,line_err
, passed
, passed_before_error, cur
, exn
) ->
1213 if !Flag_parsing_c.disable_multi_pass
1216 Common.profile_code
"Parsing: multi pass" (fun () ->
1218 pr2_err "parsing pass2: try again";
1219 let toks = List.rev passed
++ tr.rest
in
1220 let new_tr = mk_tokens_state toks in
1221 copy_tokens_state ~src
:new_tr ~dst
:tr;
1222 let passx = get_one_elem ~pass
:2 tr (file, filelines) in
1226 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
1228 candidate_macros_in_passed passed
macros
1230 if is_define_passed passed
|| null
candidates
1233 (* todo factorize code *)
1235 pr2_err "parsing pass3: try again";
1236 let toks = List.rev passed
++ tr.rest
in
1238 find_optional_macro_to_expand ~
defs:candidates toks in
1239 let new_tr = mk_tokens_state toks'
in
1240 copy_tokens_state ~src
:new_tr ~dst
:tr;
1241 let passx = get_one_elem ~pass
:3 tr (file, filelines) in
1245 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
1246 pr2_err "parsing pass4: try again";
1249 candidate_macros_in_passed passed
macros in
1251 let toks = List.rev passed
++ tr.rest
in
1253 find_optional_macro_to_expand ~
defs:candidates toks in
1254 let new_tr = mk_tokens_state toks'
in
1255 copy_tokens_state ~src
:new_tr ~dst
:tr;
1256 let passx = get_one_elem ~pass
:4 tr (file, filelines) in
1266 (* again not sure if checkpoint2 corresponds to end of bad region *)
1267 let checkpoint2 = TH.line_of_tok
tr.current in (* <> line_error *)
1268 let checkpoint2_file = TH.file_of_tok
tr.current in
1271 if (checkpoint_file =$
= checkpoint2_file) && (checkpoint_file =$
= file)
1272 then (checkpoint2 - checkpoint)
1274 (* TODO? so if error come in middle of something ? where the
1275 * start token was from original file but synchro found in body
1276 * of macro ? then can have wrong number of lines stat.
1277 * Maybe simpler just to look at tr.passed and count
1278 * the lines in the token from the correct file ?
1281 let info = mk_info_item file (List.rev
tr.passed
) in
1283 (* some stat updates *)
1284 stat.Stat.commentized <-
1285 stat.Stat.commentized + count_lines_commentized (snd
info);
1290 stat.Stat.correct
<- stat.Stat.correct
+ diffline;
1292 | Right
(info_of_bads, line_error, toks_of_bads
,
1293 _passed_before_error
, cur
, exn
) ->
1295 let was_define = is_define_passed tr.passed
in
1297 if was_define && !Flag_parsing_c.filter_msg_define_error
1303 | Parsing.Parse_error
1304 | Semantic_c.Semantic _
-> ()
1308 if !Flag_parsing_c.show_parsing_error
1311 (* Lexical is not anymore launched I think *)
1312 | Lexer_c.Lexical
s ->
1313 pr2
("lexical error " ^
s^
"\n =" ^
error_msg_tok cur
)
1314 | Parsing.Parse_error
->
1315 pr2
("parse error \n = " ^
error_msg_tok cur
)
1316 | Semantic_c.Semantic
(s, i
) ->
1317 pr2
("semantic error " ^
s^
"\n ="^
error_msg_tok cur
)
1318 | e
-> raise Impossible
1321 if (checkpoint_file =$
= checkpoint2_file) &&
1322 checkpoint_file =$
= file
1323 then print_bad line_error (checkpoint, checkpoint2) filelines
1324 else pr2
"PB: bad: but on tokens not from original file"
1330 +> Common.filter
(TH.is_same_line_or_close
line_error)
1331 +> Common.filter
TH.is_ident_like
1334 (pbline +> List.map
TH.str_of_tok
), line_error
1336 stat.Stat.problematic_lines
<-
1337 error_info::stat.Stat.problematic_lines
;
1341 if was_define && !Flag_parsing_c.filter_define_error
1342 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1343 else stat.Stat.bad
<- stat.Stat.bad
+ diffline;
1345 Ast_c.NotParsedCorrectly
info_of_bads
1349 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1350 | xs -> (xs, info):: loop tr (* recurse *)
1354 let v = consistency_checking v in
1358 let time_total_parsing a
=
1359 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a
)
1361 let parse_print_error_heuristic a
=
1362 Common.profile_code
"C parsing" (fun () -> time_total_parsing a
)
1366 let parse_c_and_cpp a
= parse_print_error_heuristic a
1368 (*****************************************************************************)
1369 (* Same but faster cos memoize stuff *)
1370 (*****************************************************************************)
1371 let parse_cache file =
1372 if not
!Flag_parsing_c.use_cache
then parse_print_error_heuristic file
1374 let _ = pr2
"TOFIX" in
1375 let need_no_changed_files =
1376 (* should use Sys.argv.(0), would be safer. *)
1380 Config.path ^ "/parsing_c/c_parser.cma";
1381 (* we may also depend now on the semantic patch because
1382 the SP may use macro and so we will disable some of the
1383 macro expansions from standard.h.
1389 let need_no_changed_variables =
1390 (* could add some of the flags of flag_parsing_c.ml *)
1393 Common.cache_computation_robust
1395 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1396 (fun () -> parse_print_error_heuristic file)
1400 (*****************************************************************************)
1401 (* Some special cases *)
1402 (*****************************************************************************)
1404 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1405 let tmpfile = Common.new_temp_file
"cocci_stmt_of_s" "c" in
1406 Common.write_file
tmpfile ("void main() { \n" ^
s ^
"\n}");
1407 let program = parse_c_and_cpp tmpfile +> fst
in
1408 program +> Common.find_some
(fun (e
,_) ->
1410 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1414 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1415 let tmpfile = Common.new_temp_file
"cocci_expr_of_s" "c" in
1416 Common.write_file
tmpfile ("void main() { \n" ^
s ^
";\n}");
1417 let program = parse_c_and_cpp tmpfile +> fst
in
1418 program +> Common.find_some
(fun (e
,_) ->
1420 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1421 (match compound
with
1422 | [Ast_c.StmtElem st
] ->
1423 (match Ast_c.unwrap_st st
with
1424 | Ast_c.ExprStatement
(Some e
) -> Some e