3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
18 module TH
= Token_helpers
19 module LP
= Lexer_parser
21 module Stat
= Parsing_stat
23 (*****************************************************************************)
25 (*****************************************************************************)
26 let pr2_err, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
28 (*****************************************************************************)
30 (*****************************************************************************)
32 let lexbuf_to_strpos lexbuf
=
33 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
35 let token_to_strpos tok
=
36 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
39 let mk_info_item2 filename toks
=
40 let buf = Buffer.create
100 in
42 (* old: get_slice_file filename (line1, line2) *)
44 toks
+> List.iter
(fun tok
->
45 match TH.pinfo_of_tok tok
with
46 | Ast_c.OriginTok _
->
47 Buffer.add_string
buf (TH.str_of_tok tok
)
48 | Ast_c.AbstractLineTok _
->
57 let mk_info_item a b
=
58 Common.profile_code
"C parsing.mk_info_item"
59 (fun () -> mk_info_item2 a b
)
62 let info_same_line line xs
=
63 xs
+> List.filter
(fun info
-> Ast_c.line_of_info info
=|= line
)
66 (* move in cpp_token_c ? *)
67 let is_define_passed passed
=
68 let xs = passed
+> List.rev
+> List.filter
TH.is_not_comment
in
69 if List.length
xs >= 2
71 (match Common.head_middle_tail
xs with
72 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
82 (*****************************************************************************)
83 (* Error diagnostic *)
84 (*****************************************************************************)
86 let error_msg_tok tok
=
87 let file = TH.file_of_tok tok
in
88 if !Flag_parsing_c.verbose_parsing
89 then Common.error_message
file (token_to_strpos tok
)
90 else ("error in " ^
file ^
"; set verbose_parsing for more info")
93 let print_bad line_error
(start_line
, end_line
) filelines
=
95 pr2
("badcount: " ^ i_to_s
(end_line
- start_line
));
97 for i
= start_line
to end_line
do
98 let line = filelines
.(i
) in
101 then pr2
("BAD:!!!!!" ^
" " ^
line)
102 else pr2
("bad:" ^
" " ^
line)
107 (*****************************************************************************)
108 (* Stats on what was passed/commentized *)
109 (*****************************************************************************)
111 let commentized xs = xs +> Common.tail_map_filter
(function
112 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
113 let s = Ast_c.str_of_info ii
in
115 match !Flag_parsing_c.filter_passed_level
with
118 List.mem cppkind
[Token_c.CppAttr
]
122 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
]
126 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
]
130 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppMacro
]
136 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
;Token_c.CppMacro
]
143 | _
-> failwith
"not valid level passing number"
145 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
150 | s when s =~ "KERN_.*" -> None
151 | s when s =~ "__.*" -> None
153 Some (ii.Ast_c.pinfo)
158 | Parser_c.TCommentMisc ii
159 | Parser_c.TAction ii
161 Some
(ii
.Ast_c.pinfo
)
166 let count_lines_commentized xs =
167 let line = ref (-1) in
173 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
174 let newline = pinfo
.Common.line in
186 let print_commentized xs =
187 let line = ref (-1) in
189 let ys = commentized xs in
193 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
194 let newline = pinfo
.Common.line in
195 let s = pinfo
.Common.str
in
196 let s = Str.global_substitute
197 (Str.regexp
"\n") (fun s -> "") s
200 then prerr_string
(s ^
" ")
203 then pr2_no_nl
"passed:"
204 else pr2_no_nl
"\npassed:";
209 if not
(null
ys) then pr2
"";
215 (*****************************************************************************)
217 (*****************************************************************************)
219 (* called by parse_print_error_heuristic *)
221 let table = Common.full_charpos_to_pos_large
file in
223 Common.with_open_infile
file (fun chan
->
224 let lexbuf = Lexing.from_channel chan
in
226 let rec tokens_aux acc
=
227 let tok = Lexer_c.token
lexbuf in
228 (* fill in the line and col information *)
229 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
230 { ii
with Ast_c.pinfo
=
231 (* could assert pinfo.filename = file ? *)
232 match Ast_c.pinfo_of_info ii
with
233 Ast_c.OriginTok pi
->
234 Ast_c.OriginTok
(Common.complete_parse_info_large
file table pi
)
235 | Ast_c.ExpandedTok
(pi
,vpi
) ->
236 Ast_c.ExpandedTok
((Common.complete_parse_info_large
file table pi
),vpi
)
237 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
238 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
243 then List.rev
(tok::acc
)
244 else tokens_aux (tok::acc
)
248 | Lexer_c.Lexical
s ->
249 failwith
("lexical error " ^
s ^
"\n =" ^
250 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
254 let time_lexing ?
(profile
=true) a
=
256 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
258 let tokens ?profile a
=
259 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
262 let tokens_of_string string =
263 let lexbuf = Lexing.from_string
string in
265 let rec tokens_s_aux () =
266 let tok = Lexer_c.token
lexbuf in
269 else tok::(tokens_s_aux ())
273 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
277 (*****************************************************************************)
278 (* Parsing, but very basic, no more used *)
279 (*****************************************************************************)
282 * !!!Those function use refs, and are not reentrant !!! so take care.
283 * It use globals defined in Lexer_parser.
285 * update: because now lexer return comments tokens, those functions
286 * may not work anymore.
290 let lexbuf = Lexing.from_channel
(open_in
file) in
291 let result = Parser_c.main
Lexer_c.token
lexbuf in
295 let parse_print_error file =
296 let chan = (open_in
file) in
297 let lexbuf = Lexing.from_channel
chan in
299 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
301 lexbuf +> Parser_c.main
Lexer_c.token
303 | Lexer_c.Lexical
s ->
304 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
305 | Parsing.Parse_error
->
306 failwith
("parse error \n = " ^
error_msg ())
307 | Semantic_c.Semantic
(s, i
) ->
308 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
314 (*****************************************************************************)
315 (* Parsing subelements, useful to debug parser *)
316 (*****************************************************************************)
319 * !!!Those function use refs, and are not reentrant !!! so take care.
320 * It use globals defined in Lexer_parser.
325 * let parse_gen parsefunc s =
326 * let lexbuf = Lexing.from_string s in
327 * let result = parsefunc Lexer_c.token lexbuf in
331 let parse_gen parsefunc
s =
332 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
335 (* Why use this lexing scheme ? Why not classically give lexer func
336 * to parser ? Because I now keep comments in lexer. Could
337 * just do a simple wrapper that when comment ask again for a token,
338 * but maybe simpler to use cur_tok technique.
340 let all_tokens = ref toks in
341 let cur_tok = ref (List.hd
!all_tokens) in
345 if TH.is_eof
!cur_tok
346 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
348 let v = Common.pop2
all_tokens in
353 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
354 let result = parsefunc
lexer_function lexbuf_fake in
358 let type_of_string = parse_gen Parser_c.type_name
359 let statement_of_string = parse_gen Parser_c.statement
360 let expression_of_string = parse_gen Parser_c.expr
362 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
368 (*****************************************************************************)
369 (* Parsing default define macros, usually in a standard.h file *)
370 (*****************************************************************************)
372 let extract_macros2 file =
373 Common.save_excursion
Flag_parsing_c.verbose_lexing
(fun () ->
374 Flag_parsing_c.verbose_lexing
:= false;
375 let toks = tokens ~profile
:false file in
376 let toks = Parsing_hacks.fix_tokens_define
toks in
377 Cpp_token_c.extract_macros
toks
380 let extract_macros a
=
381 Common.profile_code_exclusif
"HACK" (fun () -> extract_macros2 a
)
384 (*****************************************************************************)
385 (* Helper for main entry point *)
386 (*****************************************************************************)
389 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
390 * possible error recovery. Indeed, they allow to skip some tokens and
391 * still be able to call again the ocamlyacc parser. It is ugly code
392 * because we cant modify ocamllex and ocamlyacc. As we want some
393 * extended lexing tricks, we have to use such refs.
395 * Those refs are now also used for my lalr(k) technique. Indeed They
396 * store the futur and previous tokens that were parsed, and so
397 * provide enough context information for powerful lex trick.
399 * - passed_tokens_last_ckp stores the passed tokens since last
400 * checkpoint. Used for NotParsedCorrectly and also to build the
401 * info_item attached to each program_element.
402 * - passed_tokens_clean is used for lookahead, in fact for lookback.
403 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
404 * contain some comments and so would make pattern matching difficult
405 * in lookahead. Hence this variable. We would like also to get rid
406 * of cpp instruction because sometimes a cpp instruction is between
407 * two tokens and makes a pattern matching fail. But lookahead also
408 * transform some cpp instruction (in comment) so can't remove them.
410 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
411 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
415 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
416 * after the call to pop2.
417 * toks = (reverse passed_tok) ++ remaining_tokens
418 * at the and of the lexer_function call.
419 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
420 * At the end of lexer_function call, cur_tok overlap with passed_tok.
422 * convention: I use "tr" for "tokens refs"
424 * I now also need this lexing trick because the lexer return comment
428 type tokens_state
= {
429 mutable rest
: Parser_c.token list
;
430 mutable rest_clean
: Parser_c.token list
;
431 mutable current
: Parser_c.token
;
432 (* it's passed since last "checkpoint", not passed from the beginning *)
433 mutable passed
: Parser_c.token list
;
434 mutable passed_clean
: Parser_c.token list
;
437 let mk_tokens_state toks =
440 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
441 current
= (List.hd
toks);
448 let clone_tokens_state tr
=
450 rest_clean
= tr
.rest_clean
;
451 current
= tr
.current
;
453 passed_clean
= tr
.passed_clean
;
455 let copy_tokens_state ~src ~dst
=
456 dst
.rest
<- src
.rest
;
457 dst
.rest_clean
<- src
.rest_clean
;
458 dst
.current
<- src
.current
;
459 dst
.passed
<- src
.passed
;
460 dst
.passed_clean
<- src
.passed_clean
;
463 (* todo? agglomerate the x##b ? *)
464 let rec filter_noise n
xs =
470 | Parser_c.TMacroAttr _
->
471 filter_noise (n
-1) xs
473 x
::filter_noise (n
-1) xs
476 let clean_for_lookahead xs =
481 x
::filter_noise 10 xs
485 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
486 * tr means token refs.
488 let rec lexer_function ~pass tr
= fun lexbuf ->
490 | [] -> pr2_err "ALREADY AT END"; tr
.current
495 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
499 tr
.passed
<- v::tr
.passed
;
500 lexer_function ~pass tr
lexbuf
503 let x = List.hd tr
.rest_clean
in
504 tr
.rest_clean
<- List.tl tr
.rest_clean
;
511 * Why not in parsing_hacks lookahead and do passing like
512 * I do for some ifdef directives ? Because here I also need to
513 * generate some tokens sometimes and so I need access to the
514 * tr.passed, tr.rest, etc.
516 | Parser_c.TDefine
(tok) ->
517 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
518 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
520 incr
Stat.nDefinePassing
;
521 pr2_once
("CPP-DEFINE: inside function, I treat it as comment");
523 Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
525 tr
.passed
<- v'
::tr
.passed
;
526 tr
.rest
<- Parsing_hacks.comment_until_defeol tr
.rest
;
527 tr
.rest_clean
<- Parsing_hacks.drop_until_defeol tr
.rest_clean
;
528 lexer_function ~pass tr
lexbuf
531 tr
.passed
<- v::tr
.passed
;
532 tr
.passed_clean
<- v::tr
.passed_clean
;
536 | Parser_c.TUndef
(tok) ->
537 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
538 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
540 incr
Stat.nUndefPassing
;
541 pr2_once
("CPP-UNDEF: inside function, I treat it as comment");
543 Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
545 tr
.passed
<- v'
::tr
.passed
;
546 tr
.rest
<- Parsing_hacks.comment_until_defeol tr
.rest
;
547 tr
.rest_clean
<- Parsing_hacks.drop_until_defeol tr
.rest_clean
;
548 lexer_function ~pass tr
lexbuf
551 tr
.passed
<- v::tr
.passed
;
552 tr
.passed_clean
<- v::tr
.passed_clean
;
556 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
557 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
558 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
560 incr
Stat.nIncludePassing
;
561 pr2_once
("CPP-INCLUDE: inside function, I treat it as comment");
562 let v = Parser_c.TCommentCpp
(Token_c.CppDirective
, info
) in
563 tr
.passed
<- v::tr
.passed
;
564 lexer_function ~pass tr
lexbuf
568 Parsing_hacks.tokens_include
(info
, includes
, filename
, inifdef
) in
569 let new_tokens_clean =
570 new_tokens
+> List.filter
TH.is_not_comment
in
572 tr
.passed
<- v::tr
.passed
;
573 tr
.passed_clean
<- v::tr
.passed_clean
;
574 tr
.rest
<- new_tokens
++ tr
.rest
;
575 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
583 | Parser_c.TIdent
(s, ii
) ->
586 not
(!Flag_parsing_c.disable_add_typedef
) &&
588 then Parser_c.TypedefIdent
(s, ii
)
589 else Parser_c.TIdent
(s, ii
)
593 let v = Parsing_hacks.lookahead ~pass
594 (clean_for_lookahead (v::tr
.rest_clean
))
597 tr
.passed
<- v::tr
.passed
;
599 (* the lookahead may have changed the status of the token and
600 * consider it as a comment, for instance some #include are
601 * turned into comments, hence this code. *)
603 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
605 tr
.passed_clean
<- v::tr
.passed_clean
;
614 let get_one_elem ~pass tr
(file, filelines
) =
616 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
617 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
619 (* normally have to do that only when come from an exception in which
620 * case the dt() may not have been done
621 * TODO but if was in scoped scope ? have to let only the last scope
622 * so need do a LP.lexer_reset_typedef ();
625 LP._lexer_hint
:= (LP.default_hint
());
626 LP.save_typedef_state
();
630 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
633 (* -------------------------------------------------- *)
635 (* -------------------------------------------------- *)
636 Common.profile_code_exclusif
"YACC" (fun () ->
637 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
640 LP.restore_typedef_state
();
642 (* must keep here, before the code that adjusts the tr fields *)
643 let line_error = TH.line_of_tok tr
.current
in
645 let passed_before_error = tr
.passed
in
646 let current = tr
.current in
647 (* error recovery, go to next synchro point *)
648 let (passed'
, rest'
) =
649 Parsing_recovery_c.find_next_synchro tr
.rest tr
.passed
in
651 tr
.passed
<- passed'
;
653 tr
.current <- List.hd passed'
;
654 tr
.passed_clean
<- []; (* enough ? *)
655 (* with error recovery, rest and rest_clean may not be in sync *)
656 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
659 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
660 Right
(info_of_bads, line_error,
661 tr
.passed
, passed_before_error,
667 (* Macro problem recovery *)
668 (* used by the multi-pass error recovery expand-on-demand *)
670 val candidate_macros_in_passed:
671 defs: (string, define_def) Hashtbl.t ->
672 Parser_c.token list -> (string * define_def) list
675 let candidate_macros_in_passed2 ~defs passed
=
679 passed
+> List.iter
(function
680 | Parser_c.TIdent
(s,_
)
681 (* bugfix: may have to undo some infered things *)
682 | Parser_c.TMacroIterator
(s,_
)
683 | Parser_c.TypedefIdent
(s,_
)
685 (match Common.hfind_option
s defs
with
687 if s ==~
Parsing_hacks.regexp_macro
689 (* pr2 (spf "candidate: %s" s); *)
690 Common.push2
(s, def
) res
692 Common.push2
(s, def
) res2
702 let candidate_macros_in_passed ~defs b
=
703 Common.profile_code
"MACRO managment" (fun () ->
704 candidate_macros_in_passed2 ~defs b
)
710 let find_optional_macro_to_expand2 ~defs
toks =
712 let defs = Common.hash_of_list
defs in
714 let toks = toks +> Common.tail_map
(function
716 (* special cases to undo *)
717 | Parser_c.TMacroIterator
(s, ii
) ->
718 if Hashtbl.mem
defs s
719 then Parser_c.TIdent
(s, ii
)
720 else Parser_c.TMacroIterator
(s, ii
)
722 | Parser_c.TypedefIdent
(s, ii
) ->
723 if Hashtbl.mem
defs s
724 then Parser_c.TIdent
(s, ii
)
725 else Parser_c.TypedefIdent
(s, ii
)
731 Parsing_hacks.fix_tokens_cpp ~macro_defs
:defs tokens
733 (* just calling apply_macro_defs and having a specialized version
734 * of the code in fix_tokens_cpp is not enough as some work such
735 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
736 * will not get the chance to be run on the new expanded tokens.
737 * Hence even if it's expensive, it's currently better to
738 * just call directly fix_tokens_cpp again here.
740 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
741 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
742 let paren_grouped = TV.mk_parenthised cleaner in
743 Cpp_token_c.apply_macro_defs
744 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
745 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
747 (* because the before field is used by apply_macro_defs *)
748 tokens2 := TV.rebuild_tokens_extented
!tokens2;
749 Parsing_hacks.insert_virtual_positions
750 (!tokens2 +> Common.acc_map
(fun x -> x.TV.tok))
752 let find_optional_macro_to_expand ~
defs a
=
753 Common.profile_code
"MACRO managment" (fun () ->
754 find_optional_macro_to_expand2 ~
defs a
)
760 (*****************************************************************************)
761 (* Main entry points *)
762 (*****************************************************************************)
764 let (_defs
: (string, Cpp_token_c.define_def
) Hashtbl.t
ref) =
765 ref (Hashtbl.create
101)
767 let (_defs_builtins
: (string, Cpp_token_c.define_def
) Hashtbl.t
ref) =
768 ref (Hashtbl.create
101)
771 (* can not be put in parsing_hack, cos then mutually recursive problem as
772 * we also want to parse the standard.h file.
774 let init_defs_macros std_h
=
775 if not
(Common.lfile_exists std_h
)
776 then pr2
("warning: Can't find default macro file: " ^ std_h
)
778 pr2
("init_defs: " ^ std_h
);
779 _defs
:= Common.hash_of_list
(extract_macros std_h
);
782 let init_defs_builtins file_h
=
783 if not
(Common.lfile_exists file_h
)
784 then pr2
("warning: Can't find macro file: " ^ file_h
)
786 pr2
("init_defs_builtins: " ^ file_h
);
788 Common.hash_of_list
(extract_macros file_h
);
793 type info_item
= string * Parser_c.token list
795 type program2
= toplevel2 list
796 and extended_program2
= toplevel2 list
*
797 (string, Lexer_parser.identkind
) Common.scoped_h_env
(* type defs *) *
798 (string, Cpp_token_c.define_def
) Hashtbl.t
(* macro defs *)
799 and toplevel2
= Ast_c.toplevel
* info_item
801 let program_of_program2 xs =
804 let with_program2 f program2
=
807 +> (fun (program
, infos
) ->
810 +> Common.uncurry
Common.zip
817 (* note: as now we go in 2 passes, there is first all the error message of
818 * the lexer, and then the error of the parser. It is not anymore
821 * !!!This function use refs, and is not reentrant !!! so take care.
822 * It use globals defined in Lexer_parser and also the _defs global
823 * in parsing_hack.ml.
825 * This function uses internally some semi globals in the
826 * tokens_stat record and parsing_stat record.
829 let parse_print_error_heuristic2 saved_typedefs saved_macros
file =
831 let filelines = Common.cat_array
file in
832 let stat = Parsing_stat.default_stat
file in
834 (* -------------------------------------------------- *)
835 (* call lexer and get all the tokens *)
836 (* -------------------------------------------------- *)
838 LP.lexer_reset_typedef saved_typedefs
;
839 Parsing_hacks.ifdef_paren_cnt
:= 0;
841 let toks_orig = tokens file in
842 let toks = Parsing_hacks.fix_tokens_define
toks_orig in
843 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs
:!_defs_builtins
toks in
845 (* expand macros on demand trick, preparation phase *)
847 Common.profile_code
"MACRO mgmt prep 1" (fun () ->
849 match saved_macros
with None
-> Hashtbl.copy
!_defs
| Some h
-> h
in
850 (* include also builtins as some macros may generate some builtins too
851 * like __decl_spec or __stdcall
853 !_defs_builtins
+> Hashtbl.iter
(fun s def
->
854 Hashtbl.replace
macros s def
;
859 Common.profile_code
"MACRO mgmt prep 2" (fun () ->
860 let local_macros = extract_macros file in
861 local_macros +> List.iter
(fun (s, def
) ->
862 Hashtbl.replace
macros s def
;
866 let tr = mk_tokens_state toks in
870 (* todo?: I am not sure that it represents current_line, cos maybe
871 * tr.current partipated in the previous parsing phase, so maybe tr.current
872 * is not the first token of the next parsing phase. Same with checkpoint2.
873 * It would be better to record when we have a } or ; in parser.mly,
874 * cos we know that they are the last symbols of external_declaration2.
876 * bugfix: may not be equal to 'file' as after macro expansions we can
877 * start to parse a new entity from the body of a macro, for instance
878 * when parsing a define_machine() body, cf standard.h
880 let checkpoint = TH.line_of_tok
tr.current in
881 let checkpoint_file = TH.file_of_tok
tr.current in
883 (* call the parser *)
886 Common.profile_code
"Parsing: 1st pass" (fun () ->
887 get_one_elem ~pass
:1 tr (file, filelines)
891 | Right
(info
,line_err
, passed
, passed_before_error, cur
, exn
) ->
892 if !Flag_parsing_c.disable_multi_pass
895 Common.profile_code
"Parsing: multi pass" (fun () ->
897 pr2_err "parsing pass2: try again";
898 let toks = List.rev passed
++ tr.rest
in
899 let new_tr = mk_tokens_state toks in
900 copy_tokens_state ~src
:new_tr ~dst
:tr;
901 let passx = get_one_elem ~pass
:2 tr (file, filelines) in
905 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
907 candidate_macros_in_passed ~
defs:macros passed
911 if is_define_passed passed
|| null
candidates
914 (* todo factorize code *)
916 pr2_err "parsing pass3: try again";
917 let toks = List.rev passed
++ tr.rest
in
919 find_optional_macro_to_expand ~
defs:candidates toks in
920 let new_tr = mk_tokens_state toks'
in
921 copy_tokens_state ~src
:new_tr ~dst
:tr;
922 let passx = get_one_elem ~pass
:3 tr (file, filelines) in
926 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
927 pr2_err "parsing pass4: try again";
930 candidate_macros_in_passed
934 let toks = List.rev passed
++ tr.rest
in
936 find_optional_macro_to_expand ~
defs:candidates toks in
937 let new_tr = mk_tokens_state toks'
in
938 copy_tokens_state ~src
:new_tr ~dst
:tr;
939 let passx = get_one_elem ~pass
:4 tr (file, filelines) in
949 (* again not sure if checkpoint2 corresponds to end of bad region *)
950 let checkpoint2 = TH.line_of_tok
tr.current in (* <> line_error *)
951 let checkpoint2_file = TH.file_of_tok
tr.current in
954 if (checkpoint_file =$
= checkpoint2_file) && (checkpoint_file =$
= file)
955 then (checkpoint2 - checkpoint)
957 (* TODO? so if error come in middle of something ? where the
958 * start token was from original file but synchro found in body
959 * of macro ? then can have wrong number of lines stat.
960 * Maybe simpler just to look at tr.passed and count
961 * the lines in the token from the correct file ?
964 let info = mk_info_item file (List.rev
tr.passed
) in
966 (* some stat updates *)
967 stat.Stat.commentized <-
968 stat.Stat.commentized + count_lines_commentized (snd
info);
973 stat.Stat.correct
<- stat.Stat.correct
+ diffline;
975 | Right
(info_of_bads, line_error, toks_of_bads
,
976 _passed_before_error
, cur
, exn
) ->
978 let was_define = is_define_passed tr.passed
in
980 if was_define && !Flag_parsing_c.filter_msg_define_error
986 | Parsing.Parse_error
987 | Semantic_c.Semantic _
-> ()
991 if !Flag_parsing_c.show_parsing_error
994 (* Lexical is not anymore launched I think *)
995 | Lexer_c.Lexical
s ->
996 pr2
("lexical error " ^
s^
"\n =" ^
error_msg_tok cur
)
997 | Parsing.Parse_error
->
998 pr2
("parse error \n = " ^
error_msg_tok cur
)
999 | Semantic_c.Semantic
(s, i
) ->
1000 pr2
("semantic error " ^
s^
"\n ="^
error_msg_tok cur
)
1001 | e
-> raise Impossible
1004 if (checkpoint_file =$
= checkpoint2_file) &&
1005 checkpoint_file =$
= file
1006 then print_bad line_error (checkpoint, checkpoint2) filelines
1007 else pr2
"PB: bad: but on tokens not from original file"
1013 +> Common.filter
(TH.is_same_line_or_close
line_error)
1014 +> Common.filter
TH.is_ident_like
1017 (pbline +> List.map
TH.str_of_tok
), line_error
1019 stat.Stat.problematic_lines
<-
1020 error_info::stat.Stat.problematic_lines
;
1024 if was_define && !Flag_parsing_c.filter_define_error
1025 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1026 else stat.Stat.bad
<- stat.Stat.bad
+ diffline;
1028 Ast_c.NotParsedCorrectly
info_of_bads
1032 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1033 | xs -> (xs, info):: loop tr (* recurse *)
1037 let v = with_program2 Parsing_consistency_c.consistency_checking
v in
1039 let new_td = ref (Common.clone_scoped_h_env
!LP._typedef
) in
1040 Common.clean_scope_h
new_td;
1041 (v, !new_td, macros) in
1045 let time_total_parsing a b
=
1046 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a b
)
1048 let parse_print_error_heuristic a b
=
1049 Common.profile_code
"C parsing" (fun () -> time_total_parsing a b
)
1053 let parse_c_and_cpp a
=
1054 let ((c
,_
,_
),stat) = parse_print_error_heuristic None None a
in (c
,stat)
1055 let parse_c_and_cpp_keep_typedefs td macs a
=
1056 parse_print_error_heuristic td macs a
1058 (*****************************************************************************)
1059 (* Same but faster cos memoize stuff *)
1060 (*****************************************************************************)
1061 let parse_cache file =
1062 if not
!Flag_parsing_c.use_cache
1063 then parse_print_error_heuristic None None
file
1065 let _ = pr2_once
"TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
1066 let need_no_changed_files =
1067 (* should use Sys.argv.(0), would be safer. *)
1071 Config.path ^ "/parsing_c/c_parser.cma";
1072 (* we may also depend now on the semantic patch because
1073 the SP may use macro and so we will disable some of the
1074 macro expansions from standard.h.
1079 let need_no_changed_variables =
1080 (* could add some of the flags of flag_parsing_c.ml *)
1082 Common.cache_computation_robust_in_dir
1083 !Flag_parsing_c.cache_prefix
file ".ast_raw"
1084 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1086 (* check whether to clear the cache *)
1087 (match (!Flag_parsing_c.cache_limit
,!Flag_parsing_c.cache_prefix
) with
1088 (None
,_) | (_,None
) -> ()
1089 | (Some limit
,Some prefix
) ->
1092 (Printf.sprintf
"test -e %s && find %s -name \"*_raw\" | wc -l"
1096 if int_of_string c
>= limit
1101 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1106 parse_print_error_heuristic None None
file)
1110 (*****************************************************************************)
1111 (* Some special cases *)
1112 (*****************************************************************************)
1114 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1115 let tmpfile = Common.new_temp_file
"cocci_stmt_of_s" "c" in
1116 Common.write_file
tmpfile ("void main() { \n" ^
s ^
"\n}");
1117 let program = parse_c_and_cpp tmpfile +> fst
in
1118 program +> Common.find_some
(fun (e
,_) ->
1120 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1124 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1125 let tmpfile = Common.new_temp_file
"cocci_expr_of_s" "c" in
1126 Common.write_file
tmpfile ("void main() { \n" ^
s ^
";\n}");
1127 let program = parse_c_and_cpp tmpfile +> fst
in
1128 program +> Common.find_some
(fun (e
,_) ->
1130 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1131 (match compound
with
1132 | [Ast_c.StmtElem st
] ->
1133 (match Ast_c.unwrap_st st
with
1134 | Ast_c.ExprStatement
(Some e
) -> Some e