1 (* Copyright (C) 2006, 2007, 2008 Yoann Padioleau
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
15 module TH
= Token_helpers
16 module LP
= Lexer_parser
18 module Stat
= Parsing_stat
20 (*****************************************************************************)
22 (*****************************************************************************)
24 if !Flag_parsing_c.verbose_parsing
28 if !Flag_parsing_c.verbose_parsing
29 then Common.pr2_once s
31 (*****************************************************************************)
33 (*****************************************************************************)
35 let lexbuf_to_strpos lexbuf
=
36 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
38 let token_to_strpos tok
=
39 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
42 let error_msg_tok tok
=
43 let file = TH.file_of_tok tok
in
44 if !Flag_parsing_c.verbose_parsing
45 then Common.error_message
file (token_to_strpos tok
)
46 else ("error in " ^
file ^
"set verbose_parsing for more info")
49 let print_bad line_error
(start_line
, end_line
) filelines
=
51 pr2 ("badcount: " ^ i_to_s
(end_line
- start_line
));
53 for i
= start_line
to end_line
do
54 let line = filelines
.(i
) in
57 then pr2 ("BAD:!!!!!" ^
" " ^
line)
58 else pr2 ("bad:" ^
" " ^
line)
64 let mk_info_item2 filename toks
=
65 let buf = Buffer.create
100 in
67 (* old: get_slice_file filename (line1, line2) *)
69 toks
+> List.iter
(fun tok
->
70 match TH.pinfo_of_tok tok
with
71 | Ast_c.OriginTok _
->
72 Buffer.add_string
buf (TH.str_of_tok tok
)
73 | Ast_c.AbstractLineTok _
->
82 let mk_info_item a b
=
83 Common.profile_code
"C parsing.mk_info_item"
84 (fun () -> mk_info_item2 a b
)
87 let info_same_line line xs
=
88 xs
+> List.filter
(fun info
-> Ast_c.line_of_info info
= line)
91 (*****************************************************************************)
92 (* Stats on what was passed/commentized *)
93 (*****************************************************************************)
95 let commentized xs
= xs
+> Common.map_filter
(function
96 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
97 let s = Ast_c.str_of_info ii
in
99 match !Flag_parsing_c.filter_passed_level
with
102 List.mem cppkind
[Ast_c.CppAttr
]
106 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
]
110 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
;Ast_c.CppDirective
]
114 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
;Ast_c.CppMacro
]
120 List.mem cppkind
[Ast_c.CppAttr
;Ast_c.CppPassingNormal
;Ast_c.CppDirective
;Ast_c.CppMacro
]
127 | _
-> failwith
"not valid level passing number"
129 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
134 | s when s =~ "KERN_.*" -> None
135 | s when s =~ "__.*" -> None
137 Some (ii.Ast_c.pinfo)
142 | Parser_c.TCommentMisc ii
143 | Parser_c.TAction ii
145 Some
(ii
.Ast_c.pinfo
)
150 let count_lines_commentized xs
=
151 let line = ref (-1) in
157 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
158 let newline = pinfo
.Common.line in
170 let print_commentized xs
=
171 let line = ref (-1) in
173 let ys = commentized xs
in
177 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
178 let newline = pinfo
.Common.line in
179 let s = pinfo
.Common.str
in
180 let s = Str.global_substitute
181 (Str.regexp
"\n") (fun s -> "") s
184 then prerr_string
(s ^
" ")
187 then pr2_no_nl
"passed:"
188 else pr2_no_nl
"\npassed:";
193 if not
(null
ys) then pr2 "";
199 (*****************************************************************************)
201 (*****************************************************************************)
203 (* called by parse_print_error_heuristic *)
205 let table = Common.full_charpos_to_pos
file in
207 Common.with_open_infile
file (fun chan
->
208 let lexbuf = Lexing.from_channel chan
in
210 let rec tokens_aux acc
=
211 let tok = Lexer_c.token
lexbuf in
212 (* fill in the line and col information *)
213 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
214 { ii
with Ast_c.pinfo
=
215 (* could assert pinfo.filename = file ? *)
216 match Ast_c.pinfo_of_info ii
with
217 Ast_c.OriginTok pi
->
218 Ast_c.OriginTok
(Common.complete_parse_info
file table pi
)
219 | Ast_c.ExpandedTok
(pi
,vpi
) ->
220 Ast_c.ExpandedTok
((Common.complete_parse_info
file table pi
),vpi
)
221 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
222 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
227 then List.rev
(tok::acc
)
228 else tokens_aux (tok::acc
)
232 | Lexer_c.Lexical
s ->
233 failwith
("lexical error " ^
s ^
"\n =" ^
234 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
238 let time_lexing ?
(profile
=true) a
=
240 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
242 let tokens ?profile a
=
243 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
246 let tokens_of_string string =
247 let lexbuf = Lexing.from_string
string in
249 let rec tokens_s_aux () =
250 let tok = Lexer_c.token
lexbuf in
253 else tok::(tokens_s_aux ())
257 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
261 (*****************************************************************************)
262 (* Parsing, but very basic, no more used *)
263 (*****************************************************************************)
266 * !!!Those function use refs, and are not reentrant !!! so take care.
267 * It use globals defined in Lexer_parser.
269 * update: because now lexer return comments tokens, those functions
270 * may not work anymore.
274 let lexbuf = Lexing.from_channel
(open_in
file) in
275 let result = Parser_c.main
Lexer_c.token
lexbuf in
279 let parse_print_error file =
280 let chan = (open_in
file) in
281 let lexbuf = Lexing.from_channel
chan in
283 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
285 lexbuf +> Parser_c.main
Lexer_c.token
287 | Lexer_c.Lexical
s ->
288 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
289 | Parsing.Parse_error
->
290 failwith
("parse error \n = " ^
error_msg ())
291 | Semantic_c.Semantic
(s, i
) ->
292 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
298 (*****************************************************************************)
299 (* Parsing subelements, useful to debug parser *)
300 (*****************************************************************************)
303 * !!!Those function use refs, and are not reentrant !!! so take care.
304 * It use globals defined in Lexer_parser.
309 * let parse_gen parsefunc s =
310 * let lexbuf = Lexing.from_string s in
311 * let result = parsefunc Lexer_c.token lexbuf in
315 let parse_gen parsefunc
s =
316 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
319 (* Why use this lexing scheme ? Why not classically give lexer func
320 * to parser ? Because I now keep comments in lexer. Could
321 * just do a simple wrapper that when comment ask again for a token,
322 * but maybe simpler to use cur_tok technique.
324 let all_tokens = ref toks in
325 let cur_tok = ref (List.hd
!all_tokens) in
329 if TH.is_eof
!cur_tok
330 then (pr2 "LEXER: ALREADY AT END"; !cur_tok)
332 let v = Common.pop2
all_tokens in
337 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
338 let result = parsefunc
lexer_function lexbuf_fake in
342 let type_of_string = parse_gen Parser_c.type_name
343 let statement_of_string = parse_gen Parser_c.statement
344 let expression_of_string = parse_gen Parser_c.expr
346 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
352 (*****************************************************************************)
353 (* Consistency checking *)
354 (*****************************************************************************)
357 | CIdent
(* can be var, func, field, tag, enum constant *)
360 let str_of_class_ident = function
362 | CTypedef
-> "Typedef"
372 (* but take care that must still be able to use '=' *)
373 type context
= InFunction
| InEnum
| InStruct
| InInitializer
| InParams
375 | CIdent
of class_ident
379 | CCommentCpp
of cppkind
389 | CReservedKwd
(type | decl
| qualif
| flow
| misc
| attr
)
392 (* parse_typedef_fix4 *)
393 let consistency_checking2 xs
=
395 (* first phase, gather data *)
396 let stat = Hashtbl.create
101 in
398 (* default value for hash *)
399 let v1 () = Hashtbl.create
101 in
402 let bigf = { Visitor_c.default_visitor_c
with
404 Visitor_c.kexpr
= (fun (k
,bigf) x
->
405 match Ast_c.unwrap_expr x
with
408 Common.hfind_default
s v1 +> Common.hfind_default CIdent
v2 +>
409 (fun aref
-> incr aref
)
413 Visitor_c.ktype
= (fun (k
,bigf) t
->
414 match Ast_c.unwrap_typeC t
with
415 | Ast_c.TypeName
(s,_typ
) ->
417 Common.hfind_default
s v1 +> Common.hfind_default CTypedef
v2 +>
418 (fun aref
-> incr aref
)
424 xs
+> List.iter
(fun (p
, info_item
) -> Visitor_c.vk_toplevel
bigf p
);
427 let ident_to_type = ref [] in
430 (* second phase, analyze data *)
431 stat +> Hashtbl.iter
(fun k
v ->
432 let xs = Common.hash_to_list
v in
433 if List.length
xs >= 2
435 pr2 ("CONFLICT:" ^ k
);
436 let sorted = xs +> List.sort
(fun (ka
,va
) (kb
,vb
) ->
439 | CTypedef
, _
-> 1 (* first is smaller *)
445 let sorted = List.rev
sorted in
447 | [CTypedef
, i1
;CIdent
, i2
] ->
448 pr2 ("transforming some ident in typedef");
449 push2 k
ident_to_type;
451 pr2 ("TODO:other transforming?");
456 (* third phase, update ast.
457 * todo? but normally should try to handle correctly scope ? maybe sometime
458 * sizeof(id) and even if id was for a long time an identifier, maybe
459 * a few time, because of the scope it's actually really a type.
461 if (null
!ident_to_type)
464 let bigf = { Visitor_c.default_visitor_c_s
with
465 Visitor_c.kdefineval_s
= (fun (k
,bigf) x
->
467 | Ast_c.DefineExpr e
->
469 | (Ast_c.Ident
s, _
), ii
when List.mem
s !ident_to_type ->
471 (Ast_c.TypeName
(s, Ast_c.noTypedefDef
()), ii
)) in
478 Visitor_c.kexpr_s
= (fun (k
, bigf) x
->
480 | (Ast_c.SizeOfExpr e
, tref
), isizeof
->
481 let i1 = tuple_of_list1 isizeof
in
483 | (Ast_c.ParenExpr e
, _
), iiparen
->
485 | (Ast_c.Ident
s, _
), ii
when List.mem
s !ident_to_type ->
486 let (i2
, i3
) = tuple_of_list2 iiparen
in
488 (Ast_c.TypeName
(s, Ast_c.noTypedefDef
()), ii
)) in
489 (Ast_c.SizeOfType
t, tref
), [i1;i2
;i3
]
498 xs +> List.map
(fun (p
, info_item
) ->
499 Visitor_c.vk_toplevel_s
bigf p
, info_item
503 let consistency_checking a
=
504 Common.profile_code
"C consistencycheck" (fun () -> consistency_checking2 a
)
508 (*****************************************************************************)
510 (*****************************************************************************)
512 (* todo: do something if find Parser_c.Eof ? *)
513 let rec find_next_synchro next already_passed
=
515 (* Maybe because not enough }, because for example an ifdef contains
516 * in both branch some opening {, we later eat too much, "on deborde
517 * sur la fonction d'apres". So already_passed may be too big and
518 * looking for next synchro point starting from next may not be the
519 * best. So maybe we can find synchro point inside already_passed
520 * instead of looking in next.
522 * But take care! must progress. We must not stay in infinite loop!
523 * For instance now I have as a error recovery to look for
524 * a "start of something", corresponding to start of function,
525 * but must go beyond this start otherwise will loop.
526 * So look at premier(external_declaration2) in parser.output and
527 * pass at least those first tokens.
529 * I have chosen to start search for next synchro point after the
530 * first { I found, so quite sure we will not loop. *)
532 let last_round = List.rev already_passed
in
534 let xs = last_round +> List.filter
TH.is_not_comment
in
536 | Parser_c.TDefine _
::_
-> true
540 then find_next_synchro_define
(last_round ++ next
) []
543 let (before
, after
) =
544 last_round +> Common.span
(fun tok ->
546 (* by looking at TOBrace we are sure that the "start of something"
547 * will not arrive too early
549 | Parser_c.TOBrace _
-> false
550 | Parser_c.TDefine _
-> false
554 find_next_synchro_orig
(after
++ next
) (List.rev before
)
558 and find_next_synchro_define next already_passed
=
561 pr2 "ERROR-RECOV: end of file while in recovery mode";
563 | (Parser_c.TDefEOL i
as v)::xs ->
564 pr2 ("ERROR-RECOV: found sync end of #define, line "^i_to_s
(TH.line_of_tok
v));
565 v::already_passed
, xs
567 find_next_synchro_define
xs (v::already_passed
)
572 and find_next_synchro_orig next already_passed
=
575 pr2 "ERROR-RECOV: end of file while in recovery mode";
578 | (Parser_c.TCBrace i
as v)::xs when TH.col_of_tok
v = 0 ->
579 pr2 ("ERROR-RECOV: found sync '}' at line "^i_to_s
(TH.line_of_tok
v));
582 | [] -> raise Impossible
(* there is a EOF token normally *)
584 (* still useful: now parser.mly allow empty ';' so normally no pb *)
585 | Parser_c.TPtVirg iptvirg
::xs ->
586 pr2 "ERROR-RECOV: found sync bis, eating } and ;";
587 (Parser_c.TPtVirg iptvirg
)::v::already_passed
, xs
589 | Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
::xs ->
590 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
591 (Parser_c.TPtVirg iptvirg
)::(Parser_c.TIdent x
)::v::already_passed
,
594 | Parser_c.TCommentSpace sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
596 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
597 (Parser_c.TCommentSpace sp
)::
598 (Parser_c.TPtVirg iptvirg
)::
599 (Parser_c.TIdent x
)::
604 | Parser_c.TCommentNewline sp
::Parser_c.TIdent x
::Parser_c.TPtVirg iptvirg
606 pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;";
607 (Parser_c.TCommentNewline sp
)::
608 (Parser_c.TPtVirg iptvirg
)::
609 (Parser_c.TIdent x
)::
615 v::already_passed
, xs
617 | v::xs when TH.col_of_tok
v = 0 && TH.is_start_of_something
v ->
618 pr2 ("ERROR-RECOV: found sync col 0 at line "^ i_to_s
(TH.line_of_tok
v));
619 already_passed
, v::xs
622 find_next_synchro_orig
xs (v::already_passed
)
625 (*****************************************************************************)
626 (* Include/Define hacks *)
627 (*****************************************************************************)
629 (* Sometimes I prefer to generate a single token for a list of things in the
630 * lexer so that if I have to passed them, like for passing TInclude then
631 * it's easy. Also if I don't do a single token, then I need to
632 * parse the rest which may not need special stuff, like detecting
633 * end of line which the parser is not really ready for. So for instance
634 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
635 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
636 * but this kind of token is valid only after a #include and the
637 * lexing and parsing rules are different for such tokens so not that
638 * easy to parse such things in parser_c.mly. Hence the following hacks.
640 * less?: maybe could get rid of this like I get rid of some of fix_define.
643 (* ------------------------------------------------------------------------- *)
645 (* ------------------------------------------------------------------------- *)
647 (* used to generate new token from existing one *)
648 let new_info posadd str ii
=
650 Ast_c.OriginTok
{ (Ast_c.parse_info_of_info ii
) with
651 charpos
= Ast_c.pos_of_info ii
+ posadd
;
653 column
= Ast_c.col_of_info ii
+ posadd
;
655 (* must generate a new ref each time, otherwise share *)
656 cocci_tag
= ref Ast_c.emptyAnnot
;
657 comments_tag
= ref Ast_c.emptyComments
;
661 let rec comment_until_defeol xs =
663 | [] -> failwith
"cant find end of define token TDefEOL"
666 | Parser_c.TDefEOL i
->
667 Parser_c.TCommentCpp
(Ast_c.CppDirective
, TH.info_of_tok x
)
671 (* bugfix: otherwise may lose a TComment token *)
672 if TH.is_real_comment
x
674 else Parser_c.TCommentCpp
(Ast_c.CppPassingNormal
(*good?*), TH.info_of_tok
x)
676 x'
::comment_until_defeol xs
679 let drop_until_defeol xs =
681 (Common.drop_until
(function Parser_c.TDefEOL _
-> true | _
-> false) xs)
685 (* ------------------------------------------------------------------------- *)
686 (* returns a pair (replaced token, list of next tokens) *)
687 (* ------------------------------------------------------------------------- *)
689 let tokens_include (info
, includes
, filename
, inifdef
) =
690 Parser_c.TIncludeStart
(Ast_c.rewrap_str includes info
, inifdef
),
691 [Parser_c.TIncludeFilename
692 (filename
, (new_info (String.length includes
) filename info
))
695 (*****************************************************************************)
696 (* Parsing default define macros, usually in a standard.h file *)
697 (*****************************************************************************)
699 let parse_cpp_define_file2 file =
700 let toks = tokens ~profile
:false file in
701 let toks = Parsing_hacks.fix_tokens_define
toks in
702 Parsing_hacks.extract_cpp_define
toks
704 let parse_cpp_define_file a
=
705 Common.profile_code_exclusif
"HACK" (fun () -> parse_cpp_define_file2 a
)
707 (* can not be put in parsing_hack, cos then mutually recursive problem as
708 * we also want to parse the standard.h file.
710 let init_defs std_h
=
711 if not
(Common.lfile_exists std_h
)
712 then pr2 ("warning: Can't find default macro file: " ^ std_h
)
714 pr2 ("init_defs: " ^ std_h
);
715 Parsing_hacks._defs
:= Common.hash_of_list
(parse_cpp_define_file std_h
);
719 (*****************************************************************************)
720 (* Main entry point *)
721 (*****************************************************************************)
723 type info_item
= string * Parser_c.token list
725 type program2
= toplevel2 list
726 and toplevel2
= Ast_c.toplevel
* info_item
728 let program_of_program2 xs =
731 let with_program2 f program2
=
734 +> (fun (program
, infos
) ->
737 +> Common.uncurry
Common.zip
741 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
742 * possible error recovery. Indeed, they allow to skip some tokens and
743 * still be able to call again the ocamlyacc parser. It is ugly code
744 * because we cant modify ocamllex and ocamlyacc. As we want some
745 * extended lexing tricks, we have to use such refs.
747 * Those refs are now also used for my lalr(k) technique. Indeed They
748 * store the futur and previous tokens that were parsed, and so
749 * provide enough context information for powerful lex trick.
751 * - passed_tokens_last_ckp stores the passed tokens since last
752 * checkpoint. Used for NotParsedCorrectly and also to build the
753 * info_item attached to each program_element.
754 * - passed_tokens_clean is used for lookahead, in fact for lookback.
755 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
756 * contain some comments and so would make pattern matching difficult
757 * in lookahead. Hence this variable. We would like also to get rid
758 * of cpp instruction because sometimes a cpp instruction is between
759 * two tokens and makes a pattern matching fail. But lookahead also
760 * transform some cpp instruction (in comment) so can't remove them.
762 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
763 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
767 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
768 * after the call to pop2.
769 * toks = (reverse passed_tok) ++ remaining_tokens
770 * at the and of the lexer_function call.
771 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
772 * At the end of lexer_function call, cur_tok overlap with passed_tok.
774 * convention: I use "tr" for "tokens refs"
776 * I now also need this lexing trick because the lexer return comment
780 type tokens_state
= {
781 mutable rest
: Parser_c.token list
;
782 mutable rest_clean
: Parser_c.token list
;
783 mutable current
: Parser_c.token
;
784 (* it's passed since last "checkpoint", not passed from the beginning *)
785 mutable passed
: Parser_c.token list
;
786 mutable passed_clean
: Parser_c.token list
;
788 let clone_tokens_stat tr
=
790 rest_clean
= tr
.rest_clean
;
791 current
= tr
.current
;
793 passed_clean
= tr
.passed_clean
;
795 let copy_tokens_stat ~src ~dst
=
796 dst
.rest
<- src
.rest
;
797 dst
.rest_clean
<- src
.rest_clean
;
798 dst
.current
<- src
.current
;
799 dst
.passed
<- src
.passed
;
800 dst
.passed_clean
<- src
.passed_clean
;
803 let rec filter_noise n
xs =
809 | Parser_c.TMacroAttr _
->
810 filter_noise (n
-1) xs
812 x::filter_noise (n
-1) xs
815 let clean_for_lookahead xs =
820 x::filter_noise 10 xs
824 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
825 * tr means token refs.
827 let rec lexer_function ~pass tr
= fun lexbuf ->
829 | [] -> pr2 "ALREADY AT END"; tr
.current
834 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
838 tr
.passed
<- v::tr
.passed
;
839 lexer_function ~pass tr
lexbuf
842 let x = List.hd tr
.rest_clean
in
843 tr
.rest_clean
<- List.tl tr
.rest_clean
;
847 (* fix_define1. Why not in parsing_hacks lookahead and do passing like
848 * I do for some ifdef directives ? Because here I also need to
849 * generate some tokens sometimes.
851 | Parser_c.TDefine
(tok) ->
852 if not
(LP.current_context
() = LP.InTopLevel
) &&
853 (!Flag_parsing_c.cpp_directive_passing
|| (pass
= 2))
855 incr
Stat.nDefinePassing
;
856 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
857 let v'
= Parser_c.TCommentCpp
(Ast_c.CppDirective
,TH.info_of_tok
v)
859 tr
.passed
<- v'
::tr
.passed
;
860 tr
.rest
<- comment_until_defeol tr
.rest
;
861 tr
.rest_clean
<- drop_until_defeol tr
.rest_clean
;
862 lexer_function ~pass tr
lexbuf
865 tr
.passed
<- v::tr
.passed
;
866 tr
.passed_clean
<- v::tr
.passed_clean
;
870 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
871 if not
(LP.current_context
() = LP.InTopLevel
) &&
872 (!Flag_parsing_c.cpp_directive_passing
|| (pass
= 2))
874 incr
Stat.nIncludePassing
;
875 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
876 let v = Parser_c.TCommentCpp
(Ast_c.CppDirective
, info
) in
877 tr
.passed
<- v::tr
.passed
;
878 lexer_function ~pass tr
lexbuf
882 tokens_include (info
, includes
, filename
, inifdef
) in
883 let new_tokens_clean =
884 new_tokens
+> List.filter
TH.is_not_comment
in
886 tr
.passed
<- v::tr
.passed
;
887 tr
.passed_clean
<- v::tr
.passed_clean
;
888 tr
.rest
<- new_tokens
++ tr
.rest
;
889 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
897 | Parser_c.TIdent
(s, ii
) ->
900 not
(!Flag_parsing_c.disable_add_typedef
) &&
902 then Parser_c.TypedefIdent
(s, ii
)
903 else Parser_c.TIdent
(s, ii
)
907 let v = Parsing_hacks.lookahead ~pass
908 (clean_for_lookahead (v::tr
.rest_clean
))
911 tr
.passed
<- v::tr
.passed
;
913 (* the lookahead may have changed the status of the token and
914 * consider it as a comment, for instance some #include are
915 * turned into comments, hence this code. *)
917 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
919 tr
.passed_clean
<- v::tr
.passed_clean
;
926 let get_one_elem ~pass tr
(file, filelines
) =
928 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
929 then pr2 "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
931 (* normally have to do that only when come from an exception in which
932 * case the dt() may not have been done
933 * TODO but if was in scoped scope ? have to let only the last scope
934 * so need do a LP.lexer_reset_typedef ();
937 LP._lexer_hint
:= (LP.default_hint
());
938 LP.save_typedef_state
();
942 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
945 (* -------------------------------------------------- *)
947 (* -------------------------------------------------- *)
948 Common.profile_code_exclusif
"YACC" (fun () ->
949 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
952 if (pass
= 1 && !Flag_parsing_c.disable_two_pass
)|| (pass
= 2)
955 (* Lexical is not anymore launched I think *)
956 | Lexer_c.Lexical
s ->
957 pr2 ("lexical error " ^
s^
"\n =" ^
error_msg_tok tr
.current
)
958 | Parsing.Parse_error
->
959 pr2 ("parse error \n = " ^
error_msg_tok tr
.current
)
960 | Semantic_c.Semantic
(s, i
) ->
961 pr2 ("semantic error " ^
s^
"\n ="^
error_msg_tok tr
.current
)
965 LP.restore_typedef_state
();
967 (* must keep here, before the code that adjusts the tr fields *)
968 let line_error = TH.line_of_tok tr
.current
in
971 (* error recovery, go to next synchro point *)
972 let (passed'
, rest'
) = find_next_synchro tr
.rest tr
.passed
in
974 tr
.passed
<- passed'
;
976 tr
.current
<- List.hd passed'
;
977 tr
.passed_clean
<- []; (* enough ? *)
978 (* with error recovery, rest and rest_clean may not be in sync *)
979 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
982 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
983 Right
(info_of_bads, line_error, tr
.passed
)
990 (* note: as now we go in 2 passes, there is first all the error message of
991 * the lexer, and then the error of the parser. It is not anymore
994 * !!!This function use refs, and is not reentrant !!! so take care.
995 * It use globals defined in Lexer_parser and also the _defs global
996 * in parsing_hack.ml.
998 * This function uses internally some semi globals in the
999 * tokens_stat record and parsing_stat record.
1002 let parse_print_error_heuristic2 file =
1004 let filelines = Common.cat_array
file in
1005 let stat = Parsing_stat.default_stat
file in
1007 (* -------------------------------------------------- *)
1008 (* call lexer and get all the tokens *)
1009 (* -------------------------------------------------- *)
1010 LP.lexer_reset_typedef
();
1011 Parsing_hacks.ifdef_paren_cnt
:= 0;
1012 let toks_orig = tokens file in
1014 let toks = Parsing_hacks.fix_tokens_define
toks_orig in
1015 let toks = Parsing_hacks.fix_tokens_cpp
toks in
1019 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
1020 current
= (List.hd
toks);
1030 (* todo?: I am not sure that it represents current_line, cos maybe
1031 * tr.current partipated in the previous parsing phase, so maybe tr.current
1032 * is not the first token of the next parsing phase. Same with checkpoint2.
1033 * It would be better to record when we have a } or ; in parser.mly,
1034 * cos we know that they are the last symbols of external_declaration2.
1036 * bugfix: may not be equal to 'file' as after macro expansions we can
1037 * start to parse a new entity from the body of a macro, for instance
1038 * when parsing a define_machine() body, cf standard.h
1040 let checkpoint = TH.line_of_tok
tr.current
in
1041 let checkpoint_file = TH.file_of_tok
tr.current
in
1043 let tr_save = clone_tokens_stat tr in
1045 (* call the parser *)
1047 let pass1 = get_one_elem ~pass
:1 tr (file, filelines) in
1051 if !Flag_parsing_c.disable_two_pass
1054 pr2 "parsing pass2: try again";
1055 copy_tokens_stat ~src
:tr_save ~dst
: tr;
1056 let pass2 = get_one_elem ~pass
:2 tr (file, filelines) in
1062 (* again not sure if checkpoint2 corresponds to end of bad region *)
1063 let checkpoint2 = TH.line_of_tok
tr.current
in (* <> line_error *)
1064 let checkpoint2_file = TH.file_of_tok
tr.current
in
1069 | Right
(_
, line_error, _
) ->
1071 let xs = tr.passed
+> List.rev
+> List.filter
TH.is_not_comment
in
1072 if List.length
xs >= 2
1074 (match Common.head_middle_tail
xs with
1075 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
1080 pr2 "WIERD: length list of error recovery tokens < 2 ";
1084 (if was_define && !Flag_parsing_c.filter_msg_define_error
1088 if (checkpoint_file = checkpoint2_file) && checkpoint_file = file
1089 then print_bad line_error (checkpoint, checkpoint2) filelines
1090 else pr2 "PB: bad: but on tokens not from original file"
1097 if (checkpoint_file = checkpoint2_file) && (checkpoint_file = file)
1098 then (checkpoint2 - checkpoint)
1100 (* TODO? so if error come in middle of something ? where the
1101 * start token was from original file but synchro found in body
1102 * of macro ? then can have wrong number of lines stat.
1103 * Maybe simpler just to look at tr.passed and count
1104 * the lines in the token from the correct file ?
1107 let info = mk_info_item file (List.rev
tr.passed
) in
1109 (* some stat updates *)
1110 stat.Stat.commentized <-
1111 stat.Stat.commentized + count_lines_commentized (snd
info);
1116 stat.Stat.correct
<- stat.Stat.correct
+ diffline;
1118 | Right
(info_of_bads, line_error, toks_of_bads
) ->
1119 if was_define && !Flag_parsing_c.filter_define_error
1120 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1121 else stat.Stat.bad
<- stat.Stat.bad
+ diffline;
1125 +> Common.filter
(TH.is_same_line
line_error)
1126 +> Common.filter
TH.is_ident_like
1129 (pbline +> List.map
TH.str_of_tok
), line_error
1131 stat.Stat.problematic_lines
<-
1132 error_info::stat.Stat.problematic_lines
;
1134 Ast_c.NotParsedCorrectly
info_of_bads
1138 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1139 | xs -> (xs, info):: loop tr (* recurse *)
1143 let v = consistency_checking v in
1147 let time_total_parsing a
=
1148 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a
)
1150 let parse_print_error_heuristic a
=
1151 Common.profile_code
"C parsing" (fun () -> time_total_parsing a
)
1155 let parse_c_and_cpp a
= parse_print_error_heuristic a
1157 (*****************************************************************************)
1158 (* Same but faster cos memoize stuff *)
1159 (*****************************************************************************)
1160 let parse_cache file =
1161 if not
!Flag_parsing_c.use_cache
then parse_print_error_heuristic file
1163 let _ = pr2 "TOFIX" in
1164 let need_no_changed_files =
1165 (* should use Sys.argv.(0), would be safer. *)
1169 Config.path ^ "/parsing_c/c_parser.cma";
1170 (* we may also depend now on the semantic patch because
1171 the SP may use macro and so we will disable some of the
1172 macro expansions from standard.h.
1178 let need_no_changed_variables =
1179 (* could add some of the flags of flag_parsing_c.ml *)
1182 Common.cache_computation_robust
1184 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1185 (fun () -> parse_print_error_heuristic file)
1189 (*****************************************************************************)
1190 (* Some special cases *)
1191 (*****************************************************************************)
1193 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1194 Common.write_file
("/tmp/__cocci.c") ("void main() { \n" ^
s ^
"\n}");
1195 let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst
in
1196 program +> Common.find_some
(fun (e
,_) ->
1198 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1202 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1203 Common.write_file
("/tmp/__cocci.c") ("void main() { \n" ^
s ^
";\n}");
1204 let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst
in
1205 program +> Common.find_some
(fun (e
,_) ->
1207 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1208 (match compound
with
1209 | [Ast_c.StmtElem
(Ast_c.ExprStatement
(Some e
),ii
)] -> Some e