3 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
17 module TH
= Token_helpers
18 module LP
= Lexer_parser
20 module Stat
= Parsing_stat
22 (*****************************************************************************)
24 (*****************************************************************************)
26 if !Flag_parsing_c.verbose_parsing
30 if !Flag_parsing_c.verbose_parsing
31 then Common.pr2_once s
33 (*****************************************************************************)
35 (*****************************************************************************)
37 let lexbuf_to_strpos lexbuf
=
38 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
40 let token_to_strpos tok
=
41 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
44 let error_msg_tok tok
=
45 let file = TH.file_of_tok tok
in
46 if !Flag_parsing_c.verbose_parsing
47 then Common.error_message
file (token_to_strpos tok
)
48 else ("error in " ^
file ^
"set verbose_parsing for more info")
51 let print_bad line_error
(start_line
, end_line
) filelines
=
53 pr2 ("badcount: " ^ i_to_s
(end_line
- start_line
));
55 for i
= start_line
to end_line
do
56 let line = filelines
.(i
) in
59 then pr2 ("BAD:!!!!!" ^
" " ^
line)
60 else pr2 ("bad:" ^
" " ^
line)
66 let mk_info_item2 filename toks
=
67 let buf = Buffer.create
100 in
69 (* old: get_slice_file filename (line1, line2) *)
71 toks
+> List.iter
(fun tok
->
72 match TH.pinfo_of_tok tok
with
73 | Ast_c.OriginTok _
->
74 Buffer.add_string
buf (TH.str_of_tok tok
)
75 | Ast_c.AbstractLineTok _
->
84 let mk_info_item a b
=
85 Common.profile_code
"C parsing.mk_info_item"
86 (fun () -> mk_info_item2 a b
)
89 let info_same_line line xs
=
90 xs
+> List.filter
(fun info
-> Ast_c.line_of_info info
=|= line)
93 (*****************************************************************************)
94 (* Stats on what was passed/commentized *)
95 (*****************************************************************************)
97 let commentized xs
= xs
+> Common.map_filter
(function
98 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
99 let s = Ast_c.str_of_info ii
in
101 match !Flag_parsing_c.filter_passed_level
with
104 List.mem cppkind
[Token_c.CppAttr
]
108 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
]
112 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
]
116 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppMacro
]
122 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
;Token_c.CppMacro
]
129 | _
-> failwith
"not valid level passing number"
131 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
136 | s when s =~ "KERN_.*" -> None
137 | s when s =~ "__.*" -> None
139 Some (ii.Ast_c.pinfo)
144 | Parser_c.TCommentMisc ii
145 | Parser_c.TAction ii
147 Some
(ii
.Ast_c.pinfo
)
152 let count_lines_commentized xs
=
153 let line = ref (-1) in
159 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
160 let newline = pinfo
.Common.line in
172 let print_commentized xs
=
173 let line = ref (-1) in
175 let ys = commentized xs
in
179 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
180 let newline = pinfo
.Common.line in
181 let s = pinfo
.Common.str
in
182 let s = Str.global_substitute
183 (Str.regexp
"\n") (fun s -> "") s
186 then prerr_string
(s ^
" ")
189 then pr2_no_nl
"passed:"
190 else pr2_no_nl
"\npassed:";
195 if not
(null
ys) then pr2 "";
201 (*****************************************************************************)
203 (*****************************************************************************)
205 (* called by parse_print_error_heuristic *)
207 let table = Common.full_charpos_to_pos
file in
209 Common.with_open_infile
file (fun chan
->
210 let lexbuf = Lexing.from_channel chan
in
212 let rec tokens_aux acc
=
213 let tok = Lexer_c.token
lexbuf in
214 (* fill in the line and col information *)
215 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
216 { ii
with Ast_c.pinfo
=
217 (* could assert pinfo.filename = file ? *)
218 match Ast_c.pinfo_of_info ii
with
219 Ast_c.OriginTok pi
->
220 Ast_c.OriginTok
(Common.complete_parse_info
file table pi
)
221 | Ast_c.ExpandedTok
(pi
,vpi
) ->
222 Ast_c.ExpandedTok
((Common.complete_parse_info
file table pi
),vpi
)
223 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
224 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
229 then List.rev
(tok::acc
)
230 else tokens_aux (tok::acc
)
234 | Lexer_c.Lexical
s ->
235 failwith
("lexical error " ^
s ^
"\n =" ^
236 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
240 let time_lexing ?
(profile
=true) a
=
242 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
244 let tokens ?profile a
=
245 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
248 let tokens_of_string string =
249 let lexbuf = Lexing.from_string
string in
251 let rec tokens_s_aux () =
252 let tok = Lexer_c.token
lexbuf in
255 else tok::(tokens_s_aux ())
259 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
263 (*****************************************************************************)
264 (* Parsing, but very basic, no more used *)
265 (*****************************************************************************)
268 * !!!Those function use refs, and are not reentrant !!! so take care.
269 * It use globals defined in Lexer_parser.
271 * update: because now lexer return comments tokens, those functions
272 * may not work anymore.
276 let lexbuf = Lexing.from_channel
(open_in
file) in
277 let result = Parser_c.main
Lexer_c.token
lexbuf in
281 let parse_print_error file =
282 let chan = (open_in
file) in
283 let lexbuf = Lexing.from_channel
chan in
285 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
287 lexbuf +> Parser_c.main
Lexer_c.token
289 | Lexer_c.Lexical
s ->
290 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
291 | Parsing.Parse_error
->
292 failwith
("parse error \n = " ^
error_msg ())
293 | Semantic_c.Semantic
(s, i
) ->
294 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
300 (*****************************************************************************)
301 (* Parsing subelements, useful to debug parser *)
302 (*****************************************************************************)
305 * !!!Those function use refs, and are not reentrant !!! so take care.
306 * It use globals defined in Lexer_parser.
311 * let parse_gen parsefunc s =
312 * let lexbuf = Lexing.from_string s in
313 * let result = parsefunc Lexer_c.token lexbuf in
317 let parse_gen parsefunc
s =
318 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
321 (* Why use this lexing scheme ? Why not classically give lexer func
322 * to parser ? Because I now keep comments in lexer. Could
323 * just do a simple wrapper that when comment ask again for a token,
324 * but maybe simpler to use cur_tok technique.
326 let all_tokens = ref toks in
327 let cur_tok = ref (List.hd
!all_tokens) in
331 if TH.is_eof
!cur_tok
332 then (pr2 "LEXER: ALREADY AT END"; !cur_tok)
334 let v = Common.pop2
all_tokens in
339 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
340 let result = parsefunc
lexer_function lexbuf_fake in
344 let type_of_string = parse_gen Parser_c.type_name
345 let statement_of_string = parse_gen Parser_c.statement
346 let expression_of_string = parse_gen Parser_c.expr
348 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
354 (*****************************************************************************)
355 (* Consistency checking *)
356 (*****************************************************************************)
359 * could check that an ident has always the same class, be it a typedef
360 * (but sometimes do 'acpi_val acpi_val;'), an ident, a TMacroStatement,
365 | CIdent
(* can be var, func, field, tag, enum constant *)
368 let str_of_class_ident = function
370 | CTypedef
-> "Typedef"
380 (* but take care that must still be able to use '=' *)
381 type context
= InFunction
| InEnum
| InStruct
| InInitializer
| InParams
383 | CIdent
of class_ident
387 | CCommentCpp
of cppkind
397 | CReservedKwd
(type | decl
| qualif
| flow
| misc
| attr
)
400 let ident_to_typename ident
=
401 (Ast_c.nQ
, (Ast_c.TypeName
(ident
, Ast_c.noTypedefDef
()), Ast_c.noii
))
404 (* parse_typedef_fix4 *)
405 let consistency_checking2 xs
=
407 (* first phase, gather data *)
408 let stat = Hashtbl.create
101 in
410 (* default value for hash *)
411 let v1 () = Hashtbl.create
101 in
414 let bigf = { Visitor_c.default_visitor_c
with
416 Visitor_c.kexpr
= (fun (k
,bigf) x
->
417 match Ast_c.unwrap_expr x
with
418 | Ast_c.Ident
(id
) ->
419 let s = Ast_c.str_of_name id
in
421 Common.hfind_default
s v1 +> Common.hfind_default CIdent
v2 +>
422 (fun aref
-> incr aref
)
426 Visitor_c.ktype
= (fun (k
,bigf) t
->
427 match Ast_c.unwrap_typeC t
with
428 | Ast_c.TypeName
(name
,_typ
) ->
429 let s = Ast_c.str_of_name name
in
431 Common.hfind_default
s v1 +> Common.hfind_default CTypedef
v2 +>
432 (fun aref
-> incr aref
)
438 xs
+> List.iter
(fun (p
, info_item
) -> Visitor_c.vk_toplevel
bigf p
);
441 let ident_to_type = ref [] in
444 (* second phase, analyze data *)
445 stat +> Hashtbl.iter
(fun k
v ->
446 let xs = Common.hash_to_list
v in
447 if List.length
xs >= 2
449 pr2 ("CONFLICT:" ^ k
);
450 let sorted = xs +> List.sort
(fun (ka
,va
) (kb
,vb
) ->
453 | CTypedef
, _
-> 1 (* first is smaller *)
459 let sorted = List.rev
sorted in
461 | [CTypedef
, i1
;CIdent
, i2
] ->
462 pr2 ("transforming some ident in typedef");
463 push2 k
ident_to_type;
465 pr2 ("TODO:other transforming?");
470 (* third phase, update ast.
471 * todo? but normally should try to handle correctly scope ? maybe sometime
472 * sizeof(id) and even if id was for a long time an identifier, maybe
473 * a few time, because of the scope it's actually really a type.
475 if (null
!ident_to_type)
478 let bigf = { Visitor_c.default_visitor_c_s
with
479 Visitor_c.kdefineval_s
= (fun (k
,bigf) x
->
481 | Ast_c.DefineExpr e
->
483 | (Ast_c.Ident
(ident
), _
), _ii
->
484 let s = Ast_c.str_of_name ident
in
485 if List.mem
s !ident_to_type
487 let t = ident_to_typename ident
in
494 Visitor_c.kexpr_s
= (fun (k
, bigf) x
->
496 | (Ast_c.SizeOfExpr e
, tref
), isizeof
->
497 let i1 = tuple_of_list1 isizeof
in
499 | (Ast_c.ParenExpr e
, _
), iiparen
->
501 | (Ast_c.Ident
(ident
), _
), _ii
->
503 let s = Ast_c.str_of_name ident
in
504 if List.mem
s !ident_to_type
506 let t = ident_to_typename ident
in
507 let (i2
, i3
) = tuple_of_list2 iiparen
in
508 (Ast_c.SizeOfType
t, tref
), [i1;i2
;i3
]
517 xs +> List.map
(fun (p
, info_item
) ->
518 Visitor_c.vk_toplevel_s
bigf p
, info_item
522 let consistency_checking a
=
523 Common.profile_code
"C consistencycheck" (fun () -> consistency_checking2 a
)
527 (*****************************************************************************)
529 (*****************************************************************************)
531 (* todo: do something if find Parser_c.Eof ? *)
532 let rec find_next_synchro next already_passed
=
534 (* Maybe because not enough }, because for example an ifdef contains
535 * in both branch some opening {, we later eat too much, "on deborde
536 * sur la fonction d'apres". So already_passed may be too big and
537 * looking for next synchro point starting from next may not be the
538 * best. So maybe we can find synchro point inside already_passed
539 * instead of looking in next.
541 * But take care! must progress. We must not stay in infinite loop!
542 * For instance now I have as a error recovery to look for
543 * a "start of something", corresponding to start of function,
544 * but must go beyond this start otherwise will loop.
545 * So look at premier(external_declaration2) in parser.output and
546 * pass at least those first tokens.
548 * I have chosen to start search for next synchro point after the
549 * first { I found, so quite sure we will not loop. *)
551 let last_round = List.rev already_passed
in
553 let xs = last_round +> List.filter
TH.is_not_comment
in
555 | Parser_c.TDefine _
::_
-> true
559 then find_next_synchro_define
(last_round ++ next
) []
562 let (before
, after
) =
563 last_round +> Common.span
(fun tok ->
565 (* by looking at TOBrace we are sure that the "start of something"
566 * will not arrive too early
568 | Parser_c.TOBrace _
-> false
569 | Parser_c.TDefine _
-> false
573 find_next_synchro_orig
(after
++ next
) (List.rev before
)
577 and find_next_synchro_define next already_passed
=
580 pr2 "ERROR-RECOV: end of file while in recovery mode";
582 | (Parser_c.TDefEOL i
as v)::xs ->
583 pr2 ("ERROR-RECOV: found sync end of #define, line "^i_to_s
(TH.line_of_tok
v));
584 v::already_passed
, xs
586 find_next_synchro_define
xs (v::already_passed
)
591 and find_next_synchro_orig next already_passed
=
594 pr2 "ERROR-RECOV: end of file while in recovery mode";
597 | (Parser_c.TCBrace i
as v)::xs when TH.col_of_tok
v =|= 0 ->
598 pr2 ("ERROR-RECOV: found sync '}' at line "^i_to_s
(TH.line_of_tok
v));
601 | [] -> raise Impossible
(* there is a EOF token normally *)
603 (* still useful: now parser.mly allow empty ';' so normally no pb *)
604 | Parser_c.TPtVirg iptvirg
::xs ->
605 pr2 "ERROR-RECOV: found sync bis, eating } and ;";
606 (Parser_c.TPtVirg iptvirg
)::v::already_passed
, xs
608 | Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
::xs ->
609 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
610 (Parser_c.TPtVirg iptvirg
)::(Parser_c.TIdent x
)::v::already_passed
,
613 | Parser_c.TCommentSpace sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
615 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
616 (Parser_c.TCommentSpace sp
)::
617 (Parser_c.TPtVirg iptvirg
)::
618 (Parser_c.TIdent x
)::
623 | Parser_c.TCommentNewline sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
625 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
626 (Parser_c.TCommentNewline sp
)::
627 (Parser_c.TPtVirg iptvirg
)::
628 (Parser_c.TIdent x
)::
634 v::already_passed
, xs
636 | v::xs when TH.col_of_tok
v =|= 0 && TH.is_start_of_something
v ->
637 pr2 ("ERROR-RECOV: found sync col 0 at line "^ i_to_s
(TH.line_of_tok
v));
638 already_passed
, v::xs
641 find_next_synchro_orig
xs (v::already_passed
)
644 (*****************************************************************************)
645 (* Include/Define hacks *)
646 (*****************************************************************************)
648 (* Sometimes I prefer to generate a single token for a list of things in the
649 * lexer so that if I have to passed them, like for passing TInclude then
650 * it's easy. Also if I don't do a single token, then I need to
651 * parse the rest which may not need special stuff, like detecting
652 * end of line which the parser is not really ready for. So for instance
653 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
654 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
655 * but this kind of token is valid only after a #include and the
656 * lexing and parsing rules are different for such tokens so not that
657 * easy to parse such things in parser_c.mly. Hence the following hacks.
659 * less?: maybe could get rid of this like I get rid of some of fix_define.
662 (* ------------------------------------------------------------------------- *)
664 (* ------------------------------------------------------------------------- *)
666 (* used to generate new token from existing one *)
667 let new_info posadd str ii
=
669 Ast_c.OriginTok
{ (Ast_c.parse_info_of_info ii
) with
670 charpos
= Ast_c.pos_of_info ii
+ posadd
;
672 column
= Ast_c.col_of_info ii
+ posadd
;
674 (* must generate a new ref each time, otherwise share *)
675 cocci_tag
= ref Ast_c.emptyAnnot
;
676 comments_tag
= ref Ast_c.emptyComments
;
680 let rec comment_until_defeol xs =
682 | [] -> failwith
"cant find end of define token TDefEOL"
685 | Parser_c.TDefEOL i
->
686 Parser_c.TCommentCpp
(Token_c.CppDirective
, TH.info_of_tok x
)
690 (* bugfix: otherwise may lose a TComment token *)
691 if TH.is_real_comment
x
693 else Parser_c.TCommentCpp
(Token_c.CppPassingNormal
(*good?*), TH.info_of_tok
x)
695 x'
::comment_until_defeol xs
698 let drop_until_defeol xs =
700 (Common.drop_until
(function Parser_c.TDefEOL _
-> true | _
-> false) xs)
704 (* ------------------------------------------------------------------------- *)
705 (* returns a pair (replaced token, list of next tokens) *)
706 (* ------------------------------------------------------------------------- *)
708 let tokens_include (info
, includes
, filename
, inifdef
) =
709 Parser_c.TIncludeStart
(Ast_c.rewrap_str includes info
, inifdef
),
710 [Parser_c.TIncludeFilename
711 (filename
, (new_info (String.length includes
) filename info
))
714 (*****************************************************************************)
715 (* Parsing default define macros, usually in a standard.h file *)
716 (*****************************************************************************)
718 let parse_cpp_define_file2 file =
719 let toks = tokens ~profile
:false file in
720 let toks = Parsing_hacks.fix_tokens_define
toks in
721 Parsing_hacks.extract_cpp_define
toks
723 let parse_cpp_define_file a
=
724 Common.profile_code_exclusif
"HACK" (fun () -> parse_cpp_define_file2 a
)
726 (* can not be put in parsing_hack, cos then mutually recursive problem as
727 * we also want to parse the standard.h file.
729 let init_defs std_h
=
730 if not
(Common.lfile_exists std_h
)
731 then pr2 ("warning: Can't find default macro file: " ^ std_h
)
733 pr2 ("init_defs: " ^ std_h
);
734 Parsing_hacks._defs
:= Common.hash_of_list
(parse_cpp_define_file std_h
);
738 (*****************************************************************************)
739 (* Main entry point *)
740 (*****************************************************************************)
742 type info_item
= string * Parser_c.token list
744 type program2
= toplevel2 list
745 and toplevel2
= Ast_c.toplevel
* info_item
747 let program_of_program2 xs =
750 let with_program2 f program2
=
753 +> (fun (program
, infos
) ->
756 +> Common.uncurry
Common.zip
760 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
761 * possible error recovery. Indeed, they allow to skip some tokens and
762 * still be able to call again the ocamlyacc parser. It is ugly code
763 * because we cant modify ocamllex and ocamlyacc. As we want some
764 * extended lexing tricks, we have to use such refs.
766 * Those refs are now also used for my lalr(k) technique. Indeed They
767 * store the futur and previous tokens that were parsed, and so
768 * provide enough context information for powerful lex trick.
770 * - passed_tokens_last_ckp stores the passed tokens since last
771 * checkpoint. Used for NotParsedCorrectly and also to build the
772 * info_item attached to each program_element.
773 * - passed_tokens_clean is used for lookahead, in fact for lookback.
774 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
775 * contain some comments and so would make pattern matching difficult
776 * in lookahead. Hence this variable. We would like also to get rid
777 * of cpp instruction because sometimes a cpp instruction is between
778 * two tokens and makes a pattern matching fail. But lookahead also
779 * transform some cpp instruction (in comment) so can't remove them.
781 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
782 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
786 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
787 * after the call to pop2.
788 * toks = (reverse passed_tok) ++ remaining_tokens
789 * at the and of the lexer_function call.
790 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
791 * At the end of lexer_function call, cur_tok overlap with passed_tok.
793 * convention: I use "tr" for "tokens refs"
795 * I now also need this lexing trick because the lexer return comment
799 type tokens_state
= {
800 mutable rest
: Parser_c.token list
;
801 mutable rest_clean
: Parser_c.token list
;
802 mutable current
: Parser_c.token
;
803 (* it's passed since last "checkpoint", not passed from the beginning *)
804 mutable passed
: Parser_c.token list
;
805 mutable passed_clean
: Parser_c.token list
;
807 let clone_tokens_stat tr
=
809 rest_clean
= tr
.rest_clean
;
810 current
= tr
.current
;
812 passed_clean
= tr
.passed_clean
;
814 let copy_tokens_stat ~src ~dst
=
815 dst
.rest
<- src
.rest
;
816 dst
.rest_clean
<- src
.rest_clean
;
817 dst
.current
<- src
.current
;
818 dst
.passed
<- src
.passed
;
819 dst
.passed_clean
<- src
.passed_clean
;
822 let rec filter_noise n
xs =
828 | Parser_c.TMacroAttr _
->
829 filter_noise (n
-1) xs
831 x::filter_noise (n
-1) xs
834 let clean_for_lookahead xs =
839 x::filter_noise 10 xs
843 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
844 * tr means token refs.
846 let rec lexer_function ~pass tr
= fun lexbuf ->
848 | [] -> pr2 "ALREADY AT END"; tr
.current
853 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
857 tr
.passed
<- v::tr
.passed
;
858 lexer_function ~pass tr
lexbuf
861 let x = List.hd tr
.rest_clean
in
862 tr
.rest_clean
<- List.tl tr
.rest_clean
;
869 * Why not in parsing_hacks lookahead and do passing like
870 * I do for some ifdef directives ? Because here I also need to
871 * generate some tokens sometimes and so I need access to the
872 * tr.passed, tr.rest, etc.
874 | Parser_c.TDefine
(tok) ->
875 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
876 (!Flag_parsing_c.cpp_directive_passing
|| (pass
=|= 2))
878 incr
Stat.nDefinePassing
;
879 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
880 let v'
= Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
882 tr
.passed
<- v'
::tr
.passed
;
883 tr
.rest
<- comment_until_defeol tr
.rest
;
884 tr
.rest_clean
<- drop_until_defeol tr
.rest_clean
;
885 lexer_function ~pass tr
lexbuf
888 tr
.passed
<- v::tr
.passed
;
889 tr
.passed_clean
<- v::tr
.passed_clean
;
893 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
894 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
895 (!Flag_parsing_c.cpp_directive_passing
|| (pass
=|= 2))
897 incr
Stat.nIncludePassing
;
898 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
899 let v = Parser_c.TCommentCpp
(Token_c.CppDirective
, info
) in
900 tr
.passed
<- v::tr
.passed
;
901 lexer_function ~pass tr
lexbuf
905 tokens_include (info
, includes
, filename
, inifdef
) in
906 let new_tokens_clean =
907 new_tokens
+> List.filter
TH.is_not_comment
in
909 tr
.passed
<- v::tr
.passed
;
910 tr
.passed_clean
<- v::tr
.passed_clean
;
911 tr
.rest
<- new_tokens
++ tr
.rest
;
912 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
920 | Parser_c.TIdent
(s, ii
) ->
923 not
(!Flag_parsing_c.disable_add_typedef
) &&
925 then Parser_c.TypedefIdent
(s, ii
)
926 else Parser_c.TIdent
(s, ii
)
930 let v = Parsing_hacks.lookahead ~pass
931 (clean_for_lookahead (v::tr
.rest_clean
))
934 tr
.passed
<- v::tr
.passed
;
936 (* the lookahead may have changed the status of the token and
937 * consider it as a comment, for instance some #include are
938 * turned into comments, hence this code. *)
940 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
942 tr
.passed_clean
<- v::tr
.passed_clean
;
949 let get_one_elem ~pass tr
(file, filelines
) =
951 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
952 then pr2 "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
954 (* normally have to do that only when come from an exception in which
955 * case the dt() may not have been done
956 * TODO but if was in scoped scope ? have to let only the last scope
957 * so need do a LP.lexer_reset_typedef ();
960 LP._lexer_hint
:= (LP.default_hint
());
961 LP.save_typedef_state
();
965 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
968 (* -------------------------------------------------- *)
970 (* -------------------------------------------------- *)
971 Common.profile_code_exclusif
"YACC" (fun () ->
972 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
975 if (pass
=|= 1 && !Flag_parsing_c.disable_two_pass
)|| (pass
=|= 2)
978 (* Lexical is not anymore launched I think *)
979 | Lexer_c.Lexical
s ->
980 pr2 ("lexical error " ^
s^
"\n =" ^
error_msg_tok tr
.current
)
981 | Parsing.Parse_error
->
982 pr2 ("parse error \n = " ^
error_msg_tok tr
.current
)
983 | Semantic_c.Semantic
(s, i
) ->
984 pr2 ("semantic error " ^
s^
"\n ="^
error_msg_tok tr
.current
)
988 LP.restore_typedef_state
();
990 (* must keep here, before the code that adjusts the tr fields *)
991 let line_error = TH.line_of_tok tr
.current
in
994 (* error recovery, go to next synchro point *)
995 let (passed'
, rest'
) = find_next_synchro tr
.rest tr
.passed
in
997 tr
.passed
<- passed'
;
999 tr
.current
<- List.hd passed'
;
1000 tr
.passed_clean
<- []; (* enough ? *)
1001 (* with error recovery, rest and rest_clean may not be in sync *)
1002 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
1005 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
1006 Right
(info_of_bads, line_error, tr
.passed
)
1013 (* note: as now we go in 2 passes, there is first all the error message of
1014 * the lexer, and then the error of the parser. It is not anymore
1017 * !!!This function use refs, and is not reentrant !!! so take care.
1018 * It use globals defined in Lexer_parser and also the _defs global
1019 * in parsing_hack.ml.
1021 * This function uses internally some semi globals in the
1022 * tokens_stat record and parsing_stat record.
1025 let parse_print_error_heuristic2 file =
1027 let filelines = Common.cat_array
file in
1028 let stat = Parsing_stat.default_stat
file in
1030 (* -------------------------------------------------- *)
1031 (* call lexer and get all the tokens *)
1032 (* -------------------------------------------------- *)
1033 LP.lexer_reset_typedef
();
1034 Parsing_hacks.ifdef_paren_cnt
:= 0;
1035 let toks_orig = tokens file in
1037 let toks = Parsing_hacks.fix_tokens_define
toks_orig in
1038 let toks = Parsing_hacks.fix_tokens_cpp
toks in
1042 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
1043 current
= (List.hd
toks);
1053 (* todo?: I am not sure that it represents current_line, cos maybe
1054 * tr.current partipated in the previous parsing phase, so maybe tr.current
1055 * is not the first token of the next parsing phase. Same with checkpoint2.
1056 * It would be better to record when we have a } or ; in parser.mly,
1057 * cos we know that they are the last symbols of external_declaration2.
1059 * bugfix: may not be equal to 'file' as after macro expansions we can
1060 * start to parse a new entity from the body of a macro, for instance
1061 * when parsing a define_machine() body, cf standard.h
1063 let checkpoint = TH.line_of_tok
tr.current
in
1064 let checkpoint_file = TH.file_of_tok
tr.current
in
1066 let tr_save = clone_tokens_stat tr in
1068 (* call the parser *)
1070 let pass1 = get_one_elem ~pass
:1 tr (file, filelines) in
1074 if !Flag_parsing_c.disable_two_pass
1077 pr2 "parsing pass2: try again";
1078 copy_tokens_stat ~src
:tr_save ~dst
: tr;
1079 let pass2 = get_one_elem ~pass
:2 tr (file, filelines) in
1085 (* again not sure if checkpoint2 corresponds to end of bad region *)
1086 let checkpoint2 = TH.line_of_tok
tr.current
in (* <> line_error *)
1087 let checkpoint2_file = TH.file_of_tok
tr.current
in
1092 | Right
(_
, line_error, _
) ->
1094 let xs = tr.passed
+> List.rev
+> List.filter
TH.is_not_comment
in
1095 if List.length
xs >= 2
1097 (match Common.head_middle_tail
xs with
1098 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
1103 pr2 "WEIRD: length list of error recovery tokens < 2 ";
1107 (if was_define && !Flag_parsing_c.filter_msg_define_error
1111 if (checkpoint_file =$
= checkpoint2_file) &&
1112 checkpoint_file =$
= file
1113 then print_bad line_error (checkpoint, checkpoint2) filelines
1114 else pr2 "PB: bad: but on tokens not from original file"
1121 if (checkpoint_file =$
= checkpoint2_file) && (checkpoint_file =$
= file)
1122 then (checkpoint2 - checkpoint)
1124 (* TODO? so if error come in middle of something ? where the
1125 * start token was from original file but synchro found in body
1126 * of macro ? then can have wrong number of lines stat.
1127 * Maybe simpler just to look at tr.passed and count
1128 * the lines in the token from the correct file ?
1131 let info = mk_info_item file (List.rev
tr.passed
) in
1133 (* some stat updates *)
1134 stat.Stat.commentized <-
1135 stat.Stat.commentized + count_lines_commentized (snd
info);
1140 stat.Stat.correct
<- stat.Stat.correct
+ diffline;
1142 | Right
(info_of_bads, line_error, toks_of_bads
) ->
1143 if was_define && !Flag_parsing_c.filter_define_error
1144 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1145 else stat.Stat.bad
<- stat.Stat.bad
+ diffline;
1149 +> Common.filter
(TH.is_same_line_or_close
line_error)
1150 +> Common.filter
TH.is_ident_like
1153 (pbline +> List.map
TH.str_of_tok
), line_error
1155 stat.Stat.problematic_lines
<-
1156 error_info::stat.Stat.problematic_lines
;
1158 Ast_c.NotParsedCorrectly
info_of_bads
1162 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1163 | xs -> (xs, info):: loop tr (* recurse *)
1167 let v = consistency_checking v in
1171 let time_total_parsing a
=
1172 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a
)
1174 let parse_print_error_heuristic a
=
1175 Common.profile_code
"C parsing" (fun () -> time_total_parsing a
)
1179 let parse_c_and_cpp a
= parse_print_error_heuristic a
1181 (*****************************************************************************)
1182 (* Same but faster cos memoize stuff *)
1183 (*****************************************************************************)
1184 let parse_cache file =
1185 if not
!Flag_parsing_c.use_cache
then parse_print_error_heuristic file
1187 let _ = pr2 "TOFIX" in
1188 let need_no_changed_files =
1189 (* should use Sys.argv.(0), would be safer. *)
1193 Config.path ^ "/parsing_c/c_parser.cma";
1194 (* we may also depend now on the semantic patch because
1195 the SP may use macro and so we will disable some of the
1196 macro expansions from standard.h.
1202 let need_no_changed_variables =
1203 (* could add some of the flags of flag_parsing_c.ml *)
1206 Common.cache_computation_robust
1208 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1209 (fun () -> parse_print_error_heuristic file)
1213 (*****************************************************************************)
1214 (* Some special cases *)
1215 (*****************************************************************************)
1217 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1218 Common.write_file
("/tmp/__cocci.c") ("void main() { \n" ^
s ^
"\n}");
1219 let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst
in
1220 program +> Common.find_some
(fun (e
,_) ->
1222 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1226 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1227 Common.write_file
("/tmp/__cocci.c") ("void main() { \n" ^
s ^
";\n}");
1228 let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst
in
1229 program +> Common.find_some
(fun (e
,_) ->
1231 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1232 (match compound
with
1233 | [Ast_c.StmtElem
(Ast_c.ExprStatement
(Some e
),ii
)] -> Some e