1 (* Copyright (C) 2006, 2007, 2008 Yoann Padioleau
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
15 module TH
= Token_helpers
16 module LP
= Lexer_parser
18 module Stat
= Parsing_stat
20 (*****************************************************************************)
22 (*****************************************************************************)
24 if !Flag_parsing_c.verbose_parsing
28 if !Flag_parsing_c.verbose_parsing
29 then Common.pr2_once s
31 (*****************************************************************************)
33 (*****************************************************************************)
35 let lexbuf_to_strpos lexbuf
=
36 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
38 let token_to_strpos tok
=
39 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
42 let error_msg_tok tok
=
43 let file = TH.file_of_tok tok
in
44 if !Flag_parsing_c.verbose_parsing
45 then Common.error_message
file (token_to_strpos tok
)
46 else ("error in " ^
file ^
"set verbose_parsing for more info")
49 let print_bad line_error
(start_line
, end_line
) filelines
=
51 pr2 ("badcount: " ^ i_to_s
(end_line
- start_line
));
53 for i
= start_line
to end_line
do
54 let line = filelines
.(i
) in
57 then pr2 ("BAD:!!!!!" ^
" " ^
line)
58 else pr2 ("bad:" ^
" " ^
line)
64 let mk_info_item2 filename toks
=
65 let buf = Buffer.create
100 in
67 (* old: get_slice_file filename (line1, line2) *)
69 toks
+> List.iter
(fun tok
->
70 match TH.pinfo_of_tok tok
with
71 | Ast_c.OriginTok _
-> Buffer.add_string
buf (TH.str_of_tok tok
)
72 | Ast_c.AbstractLineTok _
-> raise Impossible
80 let mk_info_item a b
=
81 Common.profile_code
"C parsing.mk_info_item"
82 (fun () -> mk_info_item2 a b
)
87 (*****************************************************************************)
88 (* Stats on what was passed/commentized *)
89 (*****************************************************************************)
91 let commentized xs
= xs
+> Common.map_filter
(function
92 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
93 let s = Ast_c.str_of_info ii
in
95 match !Flag_parsing_c.filter_passed_level
with
98 List.mem cppkind
[Ast_c.CppAttr
]
102 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
]
106 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
;Ast_c.CppDirective
]
110 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
;Ast_c.CppMacro
]
116 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
;Ast_c.CppDirective
;Ast_c.CppMacro
]
123 | _
-> failwith
"not valid level passing number"
125 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
130 | s when s =~ "KERN_.*" -> None
131 | s when s =~ "__.*" -> None
133 Some (ii.Ast_c.pinfo)
138 | Parser_c.TCommentMisc ii
139 | Parser_c.TAction ii
141 Some
(ii
.Ast_c.pinfo
)
146 let count_lines_commentized xs
=
147 let line = ref (-1) in
153 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
154 let newline = pinfo
.Common.line in
166 let print_commentized xs
=
167 let line = ref (-1) in
169 let ys = commentized xs
in
173 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
174 let newline = pinfo
.Common.line in
175 let s = pinfo
.Common.str
in
176 let s = Str.global_substitute
177 (Str.regexp
"\n") (fun s -> "") s
180 then prerr_string
(s ^
" ")
183 then pr2_no_nl
"passed:"
184 else pr2_no_nl
"\npassed:";
189 if not
(null
ys) then pr2 "";
195 (*****************************************************************************)
197 (*****************************************************************************)
199 (* called by parse_print_error_heuristic *)
201 let table = Common.full_charpos_to_pos
file in
203 Common.with_open_infile
file (fun chan
->
204 let lexbuf = Lexing.from_channel chan
in
206 let rec tokens_aux acc
=
207 let tok = Lexer_c.token
lexbuf in
208 (* fill in the line and col information *)
209 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
210 { ii
with Ast_c.pinfo
=
211 (* could assert pinfo.filename = file ? *)
212 match Ast_c.pinfo_of_info ii
with
213 Ast_c.OriginTok pi
->
214 Ast_c.OriginTok
(Common.complete_parse_info
file table pi
)
215 | Ast_c.ExpandedTok
(pi
,vpi
) ->
216 Ast_c.ExpandedTok
((Common.complete_parse_info
file table pi
),vpi
)
217 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
218 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
223 then List.rev
(tok::acc
)
224 else tokens_aux (tok::acc
)
228 | Lexer_c.Lexical
s ->
229 failwith
("lexical error " ^
s ^
"\n =" ^
230 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
234 let time_lexing ?
(profile
=true) a
=
236 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
238 let tokens ?profile a
=
239 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
242 let tokens_of_string string =
243 let lexbuf = Lexing.from_string
string in
245 let rec tokens_s_aux () =
246 let tok = Lexer_c.token
lexbuf in
249 else tok::(tokens_s_aux ())
253 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
257 (*****************************************************************************)
258 (* Parsing, but very basic, no more used *)
259 (*****************************************************************************)
262 * !!!Those function use refs, and are not reentrant !!! so take care.
263 * It use globals defined in Lexer_parser.
265 * update: because now lexer return comments tokens, those functions
266 * may not work anymore.
270 let lexbuf = Lexing.from_channel
(open_in
file) in
271 let result = Parser_c.main
Lexer_c.token
lexbuf in
275 let parse_print_error file =
276 let chan = (open_in
file) in
277 let lexbuf = Lexing.from_channel
chan in
279 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
281 lexbuf +> Parser_c.main
Lexer_c.token
283 | Lexer_c.Lexical
s ->
284 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
285 | Parsing.Parse_error
->
286 failwith
("parse error \n = " ^
error_msg ())
287 | Semantic_c.Semantic
(s, i
) ->
288 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
294 (*****************************************************************************)
295 (* Parsing subelements, useful to debug parser *)
296 (*****************************************************************************)
299 * !!!Those function use refs, and are not reentrant !!! so take care.
300 * It use globals defined in Lexer_parser.
305 * let parse_gen parsefunc s =
306 * let lexbuf = Lexing.from_string s in
307 * let result = parsefunc Lexer_c.token lexbuf in
311 let parse_gen parsefunc
s =
312 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
315 (* Why use this lexing scheme ? Why not classically give lexer func
316 * to parser ? Because I now keep comments in lexer. Could
317 * just do a simple wrapper that when comment ask again for a token,
318 * but maybe simpler to use cur_tok technique.
320 let all_tokens = ref toks in
321 let cur_tok = ref (List.hd
!all_tokens) in
325 if TH.is_eof
!cur_tok
326 then (pr2 "LEXER: ALREADY AT END"; !cur_tok)
328 let v = Common.pop2
all_tokens in
333 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
334 let result = parsefunc
lexer_function lexbuf_fake in
338 let type_of_string = parse_gen Parser_c.type_name
339 let statement_of_string = parse_gen Parser_c.statement
340 let expression_of_string = parse_gen Parser_c.expr
342 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
348 (*****************************************************************************)
349 (* Consistency checking *)
350 (*****************************************************************************)
353 | CIdent
(* can be var, func, field, tag, enum constant *)
356 let str_of_class_ident = function
358 | CTypedef
-> "Typedef"
368 (* but take care that must still be able to use '=' *)
369 type context
= InFunction
| InEnum
| InStruct
| InInitializer
| InParams
371 | CIdent
of class_ident
375 | CCommentCpp
of cppkind
385 | CReservedKwd
(type | decl
| qualif
| flow
| misc
| attr
)
388 (* parse_typedef_fix4 *)
389 let consistency_checking2 xs
=
391 (* first phase, gather data *)
392 let stat = Hashtbl.create
101 in
394 (* default value for hash *)
395 let v1 () = Hashtbl.create
101 in
398 let bigf = { Visitor_c.default_visitor_c
with
400 Visitor_c.kexpr
= (fun (k
,bigf) x
->
401 match Ast_c.unwrap_expr x
with
404 Common.hfind_default
s v1 +> Common.hfind_default CIdent
v2 +>
405 (fun aref
-> incr aref
)
409 Visitor_c.ktype
= (fun (k
,bigf) t
->
410 match Ast_c.unwrap_typeC t
with
411 | Ast_c.TypeName
(s,_typ
) ->
413 Common.hfind_default
s v1 +> Common.hfind_default CTypedef
v2 +>
414 (fun aref
-> incr aref
)
420 xs
+> List.iter
(fun (p
, info_item
) -> Visitor_c.vk_toplevel
bigf p
);
423 let ident_to_type = ref [] in
426 (* second phase, analyze data *)
427 stat +> Hashtbl.iter
(fun k
v ->
428 let xs = Common.hash_to_list
v in
429 if List.length
xs >= 2
431 pr2 ("CONFLICT:" ^ k
);
432 let sorted = xs +> List.sort
(fun (ka
,va
) (kb
,vb
) ->
435 | CTypedef
, _
-> 1 (* first is smaller *)
441 let sorted = List.rev
sorted in
443 | [CTypedef
, i1
;CIdent
, i2
] ->
444 pr2 ("transforming some ident in typedef");
445 push2 k
ident_to_type;
447 pr2 ("TODO:other transforming?");
452 (* third phase, update ast.
453 * todo? but normally should try to handle correctly scope ? maybe sometime
454 * sizeof(id) and even if id was for a long time an identifier, maybe
455 * a few time, because of the scope it's actually really a type.
457 if (null
!ident_to_type)
460 let bigf = { Visitor_c.default_visitor_c_s
with
461 Visitor_c.kdefineval_s
= (fun (k
,bigf) x
->
463 | Ast_c.DefineExpr e
->
465 | (Ast_c.Ident
s, _
), ii
when List.mem
s !ident_to_type ->
467 (Ast_c.TypeName
(s, Ast_c.noTypedefDef
()), ii
)) in
474 Visitor_c.kexpr_s
= (fun (k
, bigf) x
->
476 | (Ast_c.SizeOfExpr e
, tref
), isizeof
->
477 let i1 = tuple_of_list1 isizeof
in
479 | (Ast_c.ParenExpr e
, _
), iiparen
->
481 | (Ast_c.Ident
s, _
), ii
when List.mem
s !ident_to_type ->
482 let (i2
, i3
) = tuple_of_list2 iiparen
in
484 (Ast_c.TypeName
(s, Ast_c.noTypedefDef
()), ii
)) in
485 (Ast_c.SizeOfType
t, tref
), [i1;i2
;i3
]
494 xs +> List.map
(fun (p
, info_item
) ->
495 Visitor_c.vk_toplevel_s
bigf p
, info_item
499 let consistency_checking a
=
500 Common.profile_code
"C consistencycheck" (fun () -> consistency_checking2 a
)
504 (*****************************************************************************)
506 (*****************************************************************************)
508 (* todo: do something if find Parser_c.Eof ? *)
509 let rec find_next_synchro next already_passed
=
511 (* Maybe because not enough }, because for example an ifdef contains
512 * in both branch some opening {, we later eat too much, "on deborde
513 * sur la fonction d'apres". So already_passed may be too big and
514 * looking for next synchro point starting from next may not be the
515 * best. So maybe we can find synchro point inside already_passed
516 * instead of looking in next.
518 * But take care! must progress. We must not stay in infinite loop!
519 * For instance now I have as a error recovery to look for
520 * a "start of something", corresponding to start of function,
521 * but must go beyond this start otherwise will loop.
522 * So look at premier(external_declaration2) in parser.output and
523 * pass at least those first tokens.
525 * I have chosen to start search for next synchro point after the
526 * first { I found, so quite sure we will not loop. *)
528 let last_round = List.rev already_passed
in
530 let xs = last_round +> List.filter
TH.is_not_comment
in
532 | Parser_c.TDefine _
::_
-> true
536 then find_next_synchro_define
(last_round ++ next
) []
539 let (before
, after
) =
540 last_round +> Common.span
(fun tok ->
542 (* by looking at TOBrace we are sure that the "start of something"
543 * will not arrive too early
545 | Parser_c.TOBrace _
-> false
546 | Parser_c.TDefine _
-> false
550 find_next_synchro_orig
(after
++ next
) (List.rev before
)
554 and find_next_synchro_define next already_passed
=
557 pr2 "ERROR-RECOV: end of file while in recovery mode";
559 | (Parser_c.TDefEOL i
as v)::xs ->
560 pr2 ("ERROR-RECOV: found sync end of #define, line "^i_to_s
(TH.line_of_tok
v));
561 v::already_passed
, xs
563 find_next_synchro_define
xs (v::already_passed
)
568 and find_next_synchro_orig next already_passed
=
571 pr2 "ERROR-RECOV: end of file while in recovery mode";
574 | (Parser_c.TCBrace i
as v)::xs when TH.col_of_tok
v = 0 ->
575 pr2 ("ERROR-RECOV: found sync '}' at line "^i_to_s
(TH.line_of_tok
v));
578 | [] -> raise Impossible
(* there is a EOF token normally *)
580 (* still useful: now parser.mly allow empty ';' so normally no pb *)
581 | Parser_c.TPtVirg iptvirg
::xs ->
582 pr2 "ERROR-RECOV: found sync bis, eating } and ;";
583 (Parser_c.TPtVirg iptvirg
)::v::already_passed
, xs
585 | Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
::xs ->
586 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
587 (Parser_c.TPtVirg iptvirg
)::(Parser_c.TIdent x
)::v::already_passed
,
590 | Parser_c.TCommentSpace sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
592 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
593 (Parser_c.TCommentSpace sp
)::
594 (Parser_c.TPtVirg iptvirg
)::
595 (Parser_c.TIdent x
)::
600 | Parser_c.TCommentNewline sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
602 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
603 (Parser_c.TCommentNewline sp
)::
604 (Parser_c.TPtVirg iptvirg
)::
605 (Parser_c.TIdent x
)::
611 v::already_passed
, xs
613 | v::xs when TH.col_of_tok
v = 0 && TH.is_start_of_something
v ->
614 pr2 ("ERROR-RECOV: found sync col 0 at line "^ i_to_s
(TH.line_of_tok
v));
615 already_passed
, v::xs
618 find_next_synchro_orig
xs (v::already_passed
)
621 (*****************************************************************************)
622 (* Include/Define hacks *)
623 (*****************************************************************************)
625 (* Sometimes I prefer to generate a single token for a list of things in the
626 * lexer so that if I have to passed them, like for passing TInclude then
627 * it's easy. Also if I don't do a single token, then I need to
628 * parse the rest which may not need special stuff, like detecting
629 * end of line which the parser is not really ready for. So for instance
630 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
631 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
632 * but this kind of token is valid only after a #include and the
633 * lexing and parsing rules are different for such tokens so not that
634 * easy to parse such things in parser_c.mly. Hence the following hacks.
636 * less?: maybe could get rid of this like I get rid of some of fix_define.
639 (* ------------------------------------------------------------------------- *)
641 (* ------------------------------------------------------------------------- *)
643 (* used to generate new token from existing one *)
644 let new_info posadd str ii
=
646 Ast_c.OriginTok
{ (Ast_c.parse_info_of_info ii
) with
647 charpos
= Ast_c.pos_of_info ii
+ posadd
;
649 column
= Ast_c.col_of_info ii
+ posadd
;
651 (* must generate a new ref each time, otherwise share *)
652 cocci_tag
= ref Ast_c.emptyAnnot
;
653 comments_tag
= ref Ast_c.emptyComments
;
657 let rec comment_until_defeol xs =
659 | [] -> failwith
"cant find end of define token TDefEOL"
662 | Parser_c.TDefEOL i
->
663 Parser_c.TCommentCpp
(Ast_c.CppDirective
, TH.info_of_tok x
)
667 (* bugfix: otherwise may lose a TComment token *)
668 if TH.is_real_comment
x
670 else Parser_c.TCommentCpp
(Ast_c.CppPassingNormal
(*good?*), TH.info_of_tok
x)
672 x'
::comment_until_defeol xs
675 let drop_until_defeol xs =
677 (Common.drop_until
(function Parser_c.TDefEOL _
-> true | _
-> false) xs)
681 (* ------------------------------------------------------------------------- *)
682 (* returns a pair (replaced token, list of next tokens) *)
683 (* ------------------------------------------------------------------------- *)
685 let tokens_include (info
, includes
, filename
, inifdef
) =
686 Parser_c.TIncludeStart
(Ast_c.rewrap_str includes info
, inifdef
),
687 [Parser_c.TIncludeFilename
688 (filename
, (new_info (String.length includes
) filename info
))
691 (*****************************************************************************)
692 (* Parsing default define macros, usually in a standard.h file *)
693 (*****************************************************************************)
695 let parse_cpp_define_file2 file =
696 let toks = tokens ~profile
:false file in
697 let toks = Parsing_hacks.fix_tokens_define
toks in
698 Parsing_hacks.extract_cpp_define
toks
700 let parse_cpp_define_file a
=
701 Common.profile_code_exclusif
"HACK" (fun () -> parse_cpp_define_file2 a
)
703 (* can not be put in parsing_hack, cos then mutually recursive problem as
704 * we also want to parse the standard.h file.
706 let init_defs std_h
=
707 if not
(Common.lfile_exists std_h
)
708 then pr2 ("warning: Can't find default macro file: " ^ std_h
)
710 pr2 ("init_defs: " ^ std_h
);
711 Parsing_hacks._defs
:= Common.hash_of_list
(parse_cpp_define_file std_h
);
715 (*****************************************************************************)
716 (* Main entry point *)
717 (*****************************************************************************)
719 type info_item
= string * Parser_c.token list
721 type program2
= toplevel2 list
722 and toplevel2
= Ast_c.toplevel
* info_item
724 let program_of_program2 xs =
727 let with_program2 f program2
=
730 +> (fun (program
, infos
) ->
733 +> Common.uncurry
Common.zip
737 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
738 * possible error recovery. Indeed, they allow to skip some tokens and
739 * still be able to call again the ocamlyacc parser. It is ugly code
740 * because we cant modify ocamllex and ocamlyacc. As we want some
741 * extended lexing tricks, we have to use such refs.
743 * Those refs are now also used for my lalr(k) technique. Indeed They
744 * store the futur and previous tokens that were parsed, and so
745 * provide enough context information for powerful lex trick.
747 * - passed_tokens_last_ckp stores the passed tokens since last
748 * checkpoint. Used for NotParsedCorrectly and also to build the
749 * info_item attached to each program_element.
750 * - passed_tokens_clean is used for lookahead, in fact for lookback.
751 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
752 * contain some comments and so would make pattern matching difficult
753 * in lookahead. Hence this variable. We would like also to get rid
754 * of cpp instruction because sometimes a cpp instruction is between
755 * two tokens and makes a pattern matching fail. But lookahead also
756 * transform some cpp instruction (in comment) so can't remove them.
758 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
759 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
763 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
764 * after the call to pop2.
765 * toks = (reverse passed_tok) ++ remaining_tokens
766 * at the and of the lexer_function call.
767 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
768 * At the end of lexer_function call, cur_tok overlap with passed_tok.
770 * convention: I use "tr" for "tokens refs"
772 * I now also need this lexing trick because the lexer return comment
776 type tokens_state
= {
777 mutable rest
: Parser_c.token list
;
778 mutable rest_clean
: Parser_c.token list
;
779 mutable current
: Parser_c.token
;
780 (* it's passed since last "checkpoint", not passed from the beginning *)
781 mutable passed
: Parser_c.token list
;
782 mutable passed_clean
: Parser_c.token list
;
784 let clone_tokens_stat tr
=
786 rest_clean
= tr
.rest_clean
;
787 current
= tr
.current
;
789 passed_clean
= tr
.passed_clean
;
791 let copy_tokens_stat ~src ~dst
=
792 dst
.rest
<- src
.rest
;
793 dst
.rest_clean
<- src
.rest_clean
;
794 dst
.current
<- src
.current
;
795 dst
.passed
<- src
.passed
;
796 dst
.passed_clean
<- src
.passed_clean
;
799 let rec filter_noise n
xs =
805 | Parser_c.TMacroAttr _
->
806 filter_noise (n
-1) xs
808 x::filter_noise (n
-1) xs
811 let clean_for_lookahead xs =
816 x::filter_noise 10 xs
820 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
821 * tr means token refs.
823 let rec lexer_function ~pass tr
= fun lexbuf ->
825 | [] -> pr2 "ALREADY AT END"; tr
.current
830 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
834 tr
.passed
<- v::tr
.passed
;
835 lexer_function ~pass tr
lexbuf
838 let x = List.hd tr
.rest_clean
in
839 tr
.rest_clean
<- List.tl tr
.rest_clean
;
843 (* fix_define1. Why not in parsing_hacks lookahead and do passing like
844 * I do for some ifdef directives ? Because here I also need to
845 * generate some tokens sometimes.
847 | Parser_c.TDefine
(tok) ->
848 if not
(LP.current_context
() = LP.InTopLevel
) &&
849 (!Flag_parsing_c.cpp_directive_passing
|| (pass
= 2))
851 incr
Stat.nDefinePassing
;
852 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
853 let v'
= Parser_c.TCommentCpp
(Ast_c.CppDirective
,TH.info_of_tok
v)
855 tr
.passed
<- v'
::tr
.passed
;
856 tr
.rest
<- comment_until_defeol tr
.rest
;
857 tr
.rest_clean
<- drop_until_defeol tr
.rest_clean
;
858 lexer_function ~pass tr
lexbuf
861 tr
.passed
<- v::tr
.passed
;
862 tr
.passed_clean
<- v::tr
.passed_clean
;
866 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
867 if not
(LP.current_context
() = LP.InTopLevel
) &&
868 (!Flag_parsing_c.cpp_directive_passing
|| (pass
= 2))
870 incr
Stat.nIncludePassing
;
871 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
872 let v = Parser_c.TCommentCpp
(Ast_c.CppDirective
, info
) in
873 tr
.passed
<- v::tr
.passed
;
874 lexer_function ~pass tr
lexbuf
878 tokens_include (info
, includes
, filename
, inifdef
) in
879 let new_tokens_clean =
880 new_tokens
+> List.filter
TH.is_not_comment
in
882 tr
.passed
<- v::tr
.passed
;
883 tr
.passed_clean
<- v::tr
.passed_clean
;
884 tr
.rest
<- new_tokens
++ tr
.rest
;
885 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
893 | Parser_c.TIdent
(s, ii
) ->
896 not
(!Flag_parsing_c.disable_add_typedef
) &&
898 then Parser_c.TypedefIdent
(s, ii
)
899 else Parser_c.TIdent
(s, ii
)
903 let v = Parsing_hacks.lookahead ~pass
904 (clean_for_lookahead (v::tr
.rest_clean
))
907 tr
.passed
<- v::tr
.passed
;
909 (* the lookahead may have changed the status of the token and
910 * consider it as a comment, for instance some #include are
911 * turned into comments, hence this code. *)
913 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
915 tr
.passed_clean
<- v::tr
.passed_clean
;
922 let get_one_elem ~pass tr
(file, filelines
) =
924 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
925 then pr2 "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
927 (* normally have to do that only when come from an exception in which
928 * case the dt() may not have been done
929 * TODO but if was in scoped scope ? have to let only the last scope
930 * so need do a LP.lexer_reset_typedef ();
933 LP._lexer_hint
:= (LP.default_hint
());
934 LP.save_typedef_state
();
938 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
941 (* -------------------------------------------------- *)
943 (* -------------------------------------------------- *)
944 Common.profile_code_exclusif
"YACC" (fun () ->
945 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
948 if (pass
= 1 && !Flag_parsing_c.disable_two_pass
)|| (pass
= 2)
951 (* Lexical is not anymore launched I think *)
952 | Lexer_c.Lexical
s ->
953 pr2 ("lexical error " ^
s^
"\n =" ^
error_msg_tok tr
.current
)
954 | Parsing.Parse_error
->
955 pr2 ("parse error \n = " ^
error_msg_tok tr
.current
)
956 | Semantic_c.Semantic
(s, i
) ->
957 pr2 ("semantic error " ^
s^
"\n ="^
error_msg_tok tr
.current
)
961 LP.restore_typedef_state
();
963 (* must keep here, before the code that adjusts the tr fields *)
964 let line_error = TH.line_of_tok tr
.current
in
967 (* error recovery, go to next synchro point *)
968 let (passed'
, rest'
) = find_next_synchro tr
.rest tr
.passed
in
970 tr
.passed
<- passed'
;
972 tr
.current
<- List.hd passed'
;
973 tr
.passed_clean
<- []; (* enough ? *)
974 (* with error recovery, rest and rest_clean may not be in sync *)
975 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
978 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
979 Right
(info_of_bads, line_error)
986 (* note: as now we go in 2 passes, there is first all the error message of
987 * the lexer, and then the error of the parser. It is not anymore
990 * !!!This function use refs, and is not reentrant !!! so take care.
991 * It use globals defined in Lexer_parser and also the _defs global
992 * in parsing_hack.ml.
994 * This function uses internally some semi globals in the
995 * tokens_stat record and parsing_stat record.
998 let parse_print_error_heuristic2 file =
1000 let filelines = (""::Common.cat
file) +> Array.of_list
in
1001 let stat = Parsing_stat.default_stat
file in
1003 (* -------------------------------------------------- *)
1004 (* call lexer and get all the tokens *)
1005 (* -------------------------------------------------- *)
1006 LP.lexer_reset_typedef
();
1007 Parsing_hacks.ifdef_paren_cnt
:= 0;
1008 let toks_orig = tokens file in
1010 let toks = Parsing_hacks.fix_tokens_define
toks_orig in
1011 let toks = Parsing_hacks.fix_tokens_cpp
toks in
1015 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
1016 current
= (List.hd
toks);
1026 (* todo?: I am not sure that it represents current_line, cos maybe
1027 * tr.current partipated in the previous parsing phase, so maybe tr.current
1028 * is not the first token of the next parsing phase. Same with checkpoint2.
1029 * It would be better to record when we have a } or ; in parser.mly,
1030 * cos we know that they are the last symbols of external_declaration2.
1032 * bugfix: may not be equal to 'file' as after macro expansions we can
1033 * start to parse a new entity from the body of a macro, for instance
1034 * when parsing a define_machine() body, cf standard.h
1036 let checkpoint = TH.line_of_tok
tr.current
in
1037 let checkpoint_file = TH.file_of_tok
tr.current
in
1039 let tr_save = clone_tokens_stat tr in
1041 (* call the parser *)
1043 let pass1 = get_one_elem ~pass
:1 tr (file, filelines) in
1047 if !Flag_parsing_c.disable_two_pass
1050 pr2 "parsing pass2: try again";
1051 copy_tokens_stat ~src
:tr_save ~dst
: tr;
1052 let pass2 = get_one_elem ~pass
:2 tr (file, filelines) in
1058 (* again not sure if checkpoint2 corresponds to end of bad region *)
1059 let checkpoint2 = TH.line_of_tok
tr.current
in (* <> line_error *)
1060 let checkpoint2_file = TH.file_of_tok
tr.current
in
1065 | Right
(_
, line_error) ->
1067 let xs = tr.passed
+> List.rev
+> List.filter
TH.is_not_comment
in
1068 if List.length
xs >= 2
1070 (match Common.head_middle_tail
xs with
1071 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
1076 pr2 "WIERD: length list of error recovery tokens < 2 ";
1080 (if was_define && !Flag_parsing_c.filter_msg_define_error
1084 if (checkpoint_file = checkpoint2_file) && checkpoint_file = file
1085 then print_bad line_error (checkpoint, checkpoint2) filelines
1086 else pr2 "PB: bad: but on tokens not from original file"
1093 if (checkpoint_file = checkpoint2_file) && (checkpoint_file = file)
1094 then (checkpoint2 - checkpoint)
1096 (* TODO? so if error come in middle of something ? where the
1097 * start token was from original file but synchro found in body
1098 * of macro ? then can have wrong number of lines stat.
1099 * Maybe simpler just to look at tr.passed and count
1100 * the lines in the token from the correct file ?
1103 let info = mk_info_item file (List.rev
tr.passed
) in
1105 (* some stat updates *)
1106 stat.Stat.commentized <-
1107 stat.Stat.commentized + count_lines_commentized (snd
info);
1112 | Right
(info_of_bads, _line_error
) ->
1113 Ast_c.NotParsedCorrectly
info_of_bads
1116 | Ast_c.NotParsedCorrectly
xs ->
1117 if was_define && !Flag_parsing_c.filter_define_error
1118 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1119 else stat.Stat.bad
<- stat.Stat.bad
+ diffline
1120 | _
-> stat.Stat.correct
<- stat.Stat.correct
+ diffline
1124 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1125 | xs -> (xs, info):: loop tr (* recurse *)
1130 let v = consistency_checking v in
1134 let time_total_parsing a
=
1135 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a
)
1137 let parse_print_error_heuristic a
=
1138 Common.profile_code
"C parsing" (fun () -> time_total_parsing a
)
1142 let parse_c_and_cpp a
= parse_print_error_heuristic a
1144 (*****************************************************************************)
1145 (* Same but faster cos memoize stuff *)
1146 (*****************************************************************************)
1147 let parse_cache file =
1148 if not
!Flag_parsing_c.use_cache
then parse_print_error_heuristic file
1150 let _ = pr2 "TOFIX" in
1151 let need_no_changed_files =
1152 (* should use Sys.argv.(0), would be safer. *)
1156 Config.path ^ "/parsing_c/c_parser.cma";
1157 (* we may also depend now on the semantic patch because
1158 the SP may use macro and so we will disable some of the
1159 macro expansions from standard.h.
1165 let need_no_changed_variables =
1166 (* could add some of the flags of flag_parsing_c.ml *)
1169 Common.cache_computation_robust
1171 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1172 (fun () -> parse_print_error_heuristic file)
1176 (*****************************************************************************)
1177 (* Some special cases *)
1178 (*****************************************************************************)
1180 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1181 Common.write_file
("/tmp/__cocci.c") ("void main() { \n" ^
s ^
"\n}");
1182 let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst
in
1183 program +> Common.find_some
(fun (e
,_) ->
1185 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1189 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1190 Common.write_file
("/tmp/__cocci.c") ("void main() { \n" ^
s ^
";\n}");
1191 let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst
in
1192 program +> Common.find_some
(fun (e
,_) ->
1194 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1195 (match compound
with
1196 | [Ast_c.StmtElem
(Ast_c.ExprStatement
(Some e
),ii
)] -> Some e