3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
18 module TH
= Token_helpers
19 module LP
= Lexer_parser
21 module Stat
= Parsing_stat
23 (*****************************************************************************)
25 (*****************************************************************************)
26 let pr2_err, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
28 (*****************************************************************************)
30 (*****************************************************************************)
32 let lexbuf_to_strpos lexbuf
=
33 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
35 let token_to_strpos tok
=
36 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
39 let mk_info_item2 filename toks
=
40 let buf = Buffer.create
100 in
42 (* old: get_slice_file filename (line1, line2) *)
44 toks
+> List.iter
(fun tok
->
45 match TH.pinfo_of_tok tok
with
46 | Ast_c.OriginTok _
->
47 Buffer.add_string
buf (TH.str_of_tok tok
)
48 | Ast_c.AbstractLineTok _
->
57 let mk_info_item a b
=
58 Common.profile_code
"C parsing.mk_info_item"
59 (fun () -> mk_info_item2 a b
)
62 let info_same_line line xs
=
63 xs
+> List.filter
(fun info
-> Ast_c.line_of_info info
=|= line
)
66 (* move in cpp_token_c ? *)
67 let is_define_passed passed
=
68 let xs = passed
+> List.rev
+> List.filter
TH.is_not_comment
in
69 if List.length
xs >= 2
71 (match Common.head_middle_tail
xs with
72 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
82 (*****************************************************************************)
83 (* Error diagnostic *)
84 (*****************************************************************************)
86 let error_msg_tok tok
=
87 let file = TH.file_of_tok tok
in
88 if !Flag_parsing_c.verbose_parsing
89 then Common.error_message
file (token_to_strpos tok
)
90 else ("error in " ^
file ^
"; set verbose_parsing for more info")
93 let print_bad line_error
(start_line
, end_line
) filelines
=
95 pr2
("badcount: " ^ i_to_s
(end_line
- start_line
));
97 for i
= start_line
to end_line
do
98 let line = filelines
.(i
) in
101 then pr2
("BAD:!!!!!" ^
" " ^
line)
102 else pr2
("bad:" ^
" " ^
line)
107 (*****************************************************************************)
108 (* Stats on what was passed/commentized *)
109 (*****************************************************************************)
111 let commentized xs = xs +> Common.tail_map_filter
(function
112 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
113 let s = Ast_c.str_of_info ii
in
115 match !Flag_parsing_c.filter_passed_level
with
118 List.mem cppkind
[Token_c.CppAttr
]
122 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
]
127 Token_c.CppAttr
| Token_c.CppPassingNormal
128 | Token_c.CppDirective
| Token_c.CppIfDirective _
-> true
134 [Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppMacro
]
141 Token_c.CppAttr
| Token_c.CppPassingNormal
142 | Token_c.CppDirective
| Token_c.CppIfDirective _
143 | Token_c.CppMacro
-> true
151 | _
-> failwith
"not valid level passing number"
153 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
158 | s when s =~ "KERN_.*" -> None
159 | s when s =~ "__.*" -> None
161 Some (ii.Ast_c.pinfo)
166 | Parser_c.TCommentMisc ii
167 | Parser_c.TAction ii
169 Some
(ii
.Ast_c.pinfo
)
174 let count_lines_commentized xs =
175 let line = ref (-1) in
181 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
182 let newline = pinfo
.Common.line in
194 let print_commentized xs =
195 let line = ref (-1) in
197 let ys = commentized xs in
201 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
202 let newline = pinfo
.Common.line in
203 let s = pinfo
.Common.str
in
204 let s = Str.global_substitute
205 (Str.regexp
"\n") (fun s -> "") s
208 then prerr_string
(s ^
" ")
211 then pr2_no_nl
"passed:"
212 else pr2_no_nl
"\npassed:";
217 if not
(null
ys) then pr2
"";
223 (*****************************************************************************)
225 (*****************************************************************************)
227 (* called by parse_print_error_heuristic *)
229 let table = Common.full_charpos_to_pos_large
file in
231 Common.with_open_infile
file (fun chan
->
232 let lexbuf = Lexing.from_channel chan
in
234 let rec tokens_aux acc
=
235 let tok = Lexer_c.token
lexbuf in
236 (* fill in the line and col information *)
237 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
238 { ii
with Ast_c.pinfo
=
239 (* could assert pinfo.filename = file ? *)
240 match Ast_c.pinfo_of_info ii
with
241 Ast_c.OriginTok pi
->
242 Ast_c.OriginTok
(Common.complete_parse_info_large
file table pi
)
243 | Ast_c.ExpandedTok
(pi
,vpi
) ->
244 Ast_c.ExpandedTok
((Common.complete_parse_info_large
file table pi
),vpi
)
245 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
246 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
251 then List.rev
(tok::acc
)
252 else tokens_aux (tok::acc
)
256 | Lexer_c.Lexical
s ->
257 failwith
("lexical error " ^
s ^
"\n =" ^
258 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
262 let time_lexing ?
(profile
=true) a
=
264 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
266 let tokens ?profile a
=
267 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
270 let tokens_of_string string =
271 let lexbuf = Lexing.from_string
string in
273 let rec tokens_s_aux () =
274 let tok = Lexer_c.token
lexbuf in
277 else tok::(tokens_s_aux ())
281 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
285 (*****************************************************************************)
286 (* Parsing, but very basic, no more used *)
287 (*****************************************************************************)
290 * !!!Those function use refs, and are not reentrant !!! so take care.
291 * It use globals defined in Lexer_parser.
293 * update: because now lexer return comments tokens, those functions
294 * may not work anymore.
298 let lexbuf = Lexing.from_channel
(open_in
file) in
299 let result = Parser_c.main
Lexer_c.token
lexbuf in
303 let parse_print_error file =
304 let chan = (open_in
file) in
305 let lexbuf = Lexing.from_channel
chan in
307 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
309 lexbuf +> Parser_c.main
Lexer_c.token
311 | Lexer_c.Lexical
s ->
312 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
313 | Parsing.Parse_error
->
314 failwith
("parse error \n = " ^
error_msg ())
315 | Semantic_c.Semantic
(s, i
) ->
316 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
322 (*****************************************************************************)
323 (* Parsing subelements, useful to debug parser *)
324 (*****************************************************************************)
327 * !!!Those function use refs, and are not reentrant !!! so take care.
328 * It use globals defined in Lexer_parser.
333 * let parse_gen parsefunc s =
334 * let lexbuf = Lexing.from_string s in
335 * let result = parsefunc Lexer_c.token lexbuf in
339 let parse_gen parsefunc
s =
340 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
343 (* Why use this lexing scheme ? Why not classically give lexer func
344 * to parser ? Because I now keep comments in lexer. Could
345 * just do a simple wrapper that when comment ask again for a token,
346 * but maybe simpler to use cur_tok technique.
348 let all_tokens = ref toks in
349 let cur_tok = ref (List.hd
!all_tokens) in
353 if TH.is_eof
!cur_tok
354 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
356 let v = Common.pop2
all_tokens in
361 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise
(Impossible
80)) in
362 let result = parsefunc
lexer_function lexbuf_fake in
366 let type_of_string = parse_gen Parser_c.type_name
367 let statement_of_string = parse_gen Parser_c.statement
368 let expression_of_string = parse_gen Parser_c.expr
370 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
376 (*****************************************************************************)
377 (* Parsing default define macros, usually in a standard.h file *)
378 (*****************************************************************************)
380 let extract_macros2 file =
381 Common.save_excursion
Flag_parsing_c.verbose_lexing
(fun () ->
382 Flag_parsing_c.verbose_lexing
:= false;
383 let toks = tokens ~profile
:false file in
384 let toks = Parsing_hacks.fix_tokens_define
toks in
385 Cpp_token_c.extract_macros
toks
388 let extract_macros a
=
389 Common.profile_code_exclusif
"HACK" (fun () -> extract_macros2 a
)
392 (*****************************************************************************)
393 (* Helper for main entry point *)
394 (*****************************************************************************)
397 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
398 * possible error recovery. Indeed, they allow to skip some tokens and
399 * still be able to call again the ocamlyacc parser. It is ugly code
400 * because we cant modify ocamllex and ocamlyacc. As we want some
401 * extended lexing tricks, we have to use such refs.
403 * Those refs are now also used for my lalr(k) technique. Indeed They
404 * store the futur and previous tokens that were parsed, and so
405 * provide enough context information for powerful lex trick.
407 * - passed_tokens_last_ckp stores the passed tokens since last
408 * checkpoint. Used for NotParsedCorrectly and also to build the
409 * info_item attached to each program_element.
410 * - passed_tokens_clean is used for lookahead, in fact for lookback.
411 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
412 * contain some comments and so would make pattern matching difficult
413 * in lookahead. Hence this variable. We would like also to get rid
414 * of cpp instruction because sometimes a cpp instruction is between
415 * two tokens and makes a pattern matching fail. But lookahead also
416 * transform some cpp instruction (in comment) so can't remove them.
418 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
419 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
423 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
424 * after the call to pop2.
425 * toks = (reverse passed_tok) ++ remaining_tokens
426 * at the and of the lexer_function call.
427 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
428 * At the end of lexer_function call, cur_tok overlap with passed_tok.
430 * convention: I use "tr" for "tokens refs"
432 * I now also need this lexing trick because the lexer return comment
436 type tokens_state
= {
437 mutable rest
: Parser_c.token list
;
438 mutable rest_clean
: Parser_c.token list
;
439 mutable current
: Parser_c.token
;
440 (* it's passed since last "checkpoint", not passed from the beginning *)
441 mutable passed
: Parser_c.token list
;
442 mutable passed_clean
: Parser_c.token list
;
445 let mk_tokens_state toks =
448 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
449 current
= (List.hd
toks);
456 let clone_tokens_state tr
=
458 rest_clean
= tr
.rest_clean
;
459 current
= tr
.current
;
461 passed_clean
= tr
.passed_clean
;
463 let copy_tokens_state ~src ~dst
=
464 dst
.rest
<- src
.rest
;
465 dst
.rest_clean
<- src
.rest_clean
;
466 dst
.current
<- src
.current
;
467 dst
.passed
<- src
.passed
;
468 dst
.passed_clean
<- src
.passed_clean
;
471 (* todo? agglomerate the x##b ? *)
472 let rec filter_noise n
xs =
478 | Parser_c.TMacroAttr _
->
479 filter_noise (n
-1) xs
481 x
::filter_noise (n
-1) xs
484 let clean_for_lookahead xs =
489 x
::filter_noise 10 xs
493 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
494 * tr means token refs.
496 let rec lexer_function ~pass tr
= fun lexbuf ->
498 | [] -> pr2_err "ALREADY AT END"; tr
.current
503 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
507 tr
.passed
<- v::tr
.passed
;
508 lexer_function ~pass tr
lexbuf
511 let x = List.hd tr
.rest_clean
in
512 tr
.rest_clean
<- List.tl tr
.rest_clean
;
519 * Why not in parsing_hacks lookahead and do passing like
520 * I do for some ifdef directives ? Because here I also need to
521 * generate some tokens sometimes and so I need access to the
522 * tr.passed, tr.rest, etc.
524 | Parser_c.TDefine
(tok) ->
525 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
526 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
528 incr
Stat.nDefinePassing
;
529 pr2_once
("CPP-DEFINE: inside function, I treat it as comment");
531 Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
533 tr
.passed
<- v'
::tr
.passed
;
534 tr
.rest
<- Parsing_hacks.comment_until_defeol tr
.rest
;
535 tr
.rest_clean
<- Parsing_hacks.drop_until_defeol tr
.rest_clean
;
536 lexer_function ~pass tr
lexbuf
539 tr
.passed
<- v::tr
.passed
;
540 tr
.passed_clean
<- v::tr
.passed_clean
;
544 | Parser_c.TUndef
(tok) ->
545 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
546 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
548 incr
Stat.nUndefPassing
;
549 pr2_once
("CPP-UNDEF: inside function, I treat it as comment");
551 Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
553 tr
.passed
<- v'
::tr
.passed
;
554 tr
.rest
<- Parsing_hacks.comment_until_defeol tr
.rest
;
555 tr
.rest_clean
<- Parsing_hacks.drop_until_defeol tr
.rest_clean
;
556 lexer_function ~pass tr
lexbuf
559 tr
.passed
<- v::tr
.passed
;
560 tr
.passed_clean
<- v::tr
.passed_clean
;
564 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
565 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
566 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
568 incr
Stat.nIncludePassing
;
569 pr2_once
("CPP-INCLUDE: inside function, I treat it as comment");
570 let v = Parser_c.TCommentCpp
(Token_c.CppDirective
, info
) in
571 tr
.passed
<- v::tr
.passed
;
572 lexer_function ~pass tr
lexbuf
576 Parsing_hacks.tokens_include
(info
, includes
, filename
, inifdef
) in
577 let new_tokens_clean =
578 new_tokens
+> List.filter
TH.is_not_comment
in
580 tr
.passed
<- v::tr
.passed
;
581 tr
.passed_clean
<- v::tr
.passed_clean
;
582 tr
.rest
<- new_tokens
++ tr
.rest
;
583 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
591 | Parser_c.TIdent
(s, ii
) ->
594 not
(!Flag_parsing_c.disable_add_typedef
) &&
596 then Parser_c.TypedefIdent
(s, ii
)
597 else Parser_c.TIdent
(s, ii
)
601 let v = Parsing_hacks.lookahead ~pass
602 (clean_for_lookahead (v::tr
.rest_clean
))
605 tr
.passed
<- v::tr
.passed
;
607 (* the lookahead may have changed the status of the token and
608 * consider it as a comment, for instance some #include are
609 * turned into comments, hence this code. *)
611 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
613 tr
.passed_clean
<- v::tr
.passed_clean
;
621 let get_one_elem ~pass tr
(file, filelines
) =
623 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
624 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
626 (* normally have to do that only when come from an exception in which
627 * case the dt() may not have been done
628 * TODO but if was in scoped scope ? have to let only the last scope
629 * so need do a LP.lexer_reset_typedef ();
632 LP._lexer_hint
:= (LP.default_hint
());
633 LP.save_typedef_state
();
637 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise
(Impossible
81)) in
640 (* -------------------------------------------------- *)
642 (* -------------------------------------------------- *)
643 Common.profile_code_exclusif
"YACC" (fun () ->
644 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
647 LP.restore_typedef_state
();
649 (* must keep here, before the code that adjusts the tr fields *)
650 let line_error = TH.line_of_tok tr
.current
in
652 let passed_before_error = tr
.passed
in
653 let current = tr
.current in
654 (* error recovery, go to next synchro point *)
655 let (passed'
, rest'
) =
656 Parsing_recovery_c.find_next_synchro tr
.rest tr
.passed
in
658 tr
.passed
<- passed'
;
660 tr
.current <- List.hd passed'
;
661 tr
.passed_clean
<- []; (* enough ? *)
662 (* with error recovery, rest and rest_clean may not be in sync *)
663 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
666 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
667 Right
(info_of_bads, line_error,
668 tr
.passed
, passed_before_error,
674 (* Macro problem recovery *)
675 (* used by the multi-pass error recovery expand-on-demand *)
677 val candidate_macros_in_passed:
678 defs: (string, define_def) Hashtbl.t ->
679 Parser_c.token list -> (string * define_def) list
682 let candidate_macros_in_passed2 ~defs passed
=
686 passed
+> List.iter
(function
687 | Parser_c.TIdent
(s,_
)
688 (* bugfix: may have to undo some infered things *)
689 | Parser_c.TMacroIterator
(s,_
)
690 | Parser_c.TypedefIdent
(s,_
)
692 (match Common.hfind_option
s defs
with
694 if s ==~
Parsing_hacks.regexp_macro
696 (* pr2 (spf "candidate: %s" s); *)
697 Common.push2
(s, def
) res
699 Common.push2
(s, def
) res2
709 let candidate_macros_in_passed ~defs b
=
710 Common.profile_code
"MACRO managment" (fun () ->
711 candidate_macros_in_passed2 ~defs b
)
717 let find_optional_macro_to_expand2 ~defs
toks =
719 let defs = Common.hash_of_list
defs in
721 let toks = toks +> Common.tail_map
(function
723 (* special cases to undo *)
724 | Parser_c.TMacroIterator
(s, ii
) ->
725 if Hashtbl.mem
defs s
726 then Parser_c.TIdent
(s, ii
)
727 else Parser_c.TMacroIterator
(s, ii
)
729 | Parser_c.TypedefIdent
(s, ii
) ->
730 if Hashtbl.mem
defs s
731 then Parser_c.TIdent
(s, ii
)
732 else Parser_c.TypedefIdent
(s, ii
)
738 Parsing_hacks.fix_tokens_cpp ~macro_defs
:defs tokens
740 (* just calling apply_macro_defs and having a specialized version
741 * of the code in fix_tokens_cpp is not enough as some work such
742 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
743 * will not get the chance to be run on the new expanded tokens.
744 * Hence even if it's expensive, it's currently better to
745 * just call directly fix_tokens_cpp again here.
747 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
748 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
749 let paren_grouped = TV.mk_parenthised cleaner in
750 Cpp_token_c.apply_macro_defs
751 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
752 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
754 (* because the before field is used by apply_macro_defs *)
755 tokens2 := TV.rebuild_tokens_extented
!tokens2;
756 Parsing_hacks.insert_virtual_positions
757 (!tokens2 +> Common.acc_map
(fun x -> x.TV.tok))
759 let find_optional_macro_to_expand ~
defs a
=
760 Common.profile_code
"MACRO managment" (fun () ->
761 find_optional_macro_to_expand2 ~
defs a
)
767 (*****************************************************************************)
768 (* Main entry points *)
769 (*****************************************************************************)
771 let (_defs
: (string, Cpp_token_c.define_def
) Hashtbl.t
ref) =
772 ref (Hashtbl.create
101)
774 let (_defs_builtins
: (string, Cpp_token_c.define_def
) Hashtbl.t
ref) =
775 ref (Hashtbl.create
101)
778 (* can not be put in parsing_hack, cos then mutually recursive problem as
779 * we also want to parse the standard.h file.
781 let init_defs_macros std_h
=
782 if not
(Common.lfile_exists std_h
)
783 then pr2
("warning: Can't find default macro file: " ^ std_h
)
785 pr2
("init_defs: " ^ std_h
);
786 _defs
:= Common.hash_of_list
(extract_macros std_h
);
789 let init_defs_builtins file_h
=
790 if not
(Common.lfile_exists file_h
)
791 then pr2
("warning: Can't find macro file: " ^ file_h
)
793 pr2
("init_defs_builtins: " ^ file_h
);
795 Common.hash_of_list
(extract_macros file_h
);
800 type info_item
= string * Parser_c.token list
802 type program2
= toplevel2 list
803 and extended_program2
= toplevel2 list
*
804 (string, Lexer_parser.identkind
) Common.scoped_h_env
(* type defs *) *
805 (string, Cpp_token_c.define_def
) Hashtbl.t
(* macro defs *)
806 and toplevel2
= Ast_c.toplevel
* info_item
808 let program_of_program2 xs =
811 let with_program2 f program2
=
814 +> (fun (program
, infos
) ->
817 +> Common.uncurry
Common.zip
824 (* note: as now we go in 2 passes, there is first all the error message of
825 * the lexer, and then the error of the parser. It is not anymore
828 * !!!This function use refs, and is not reentrant !!! so take care.
829 * It use globals defined in Lexer_parser and also the _defs global
830 * in parsing_hack.ml.
832 * This function uses internally some semi globals in the
833 * tokens_stat record and parsing_stat record.
836 let parse_print_error_heuristic2 saved_typedefs saved_macros
file =
838 let filelines = Common.cat_array
file in
839 let stat = Parsing_stat.default_stat
file in
841 (* -------------------------------------------------- *)
842 (* call lexer and get all the tokens *)
843 (* -------------------------------------------------- *)
845 LP.lexer_reset_typedef saved_typedefs
;
846 Parsing_hacks.ifdef_paren_cnt
:= 0;
848 let toks_orig = tokens file in
849 let toks = Parsing_hacks.fix_tokens_define
toks_orig in
850 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs
:!_defs_builtins
toks in
852 (* expand macros on demand trick, preparation phase *)
854 Common.profile_code
"MACRO mgmt prep 1" (fun () ->
856 match saved_macros
with None
-> Hashtbl.copy
!_defs
| Some h
-> h
in
857 (* include also builtins as some macros may generate some builtins too
858 * like __decl_spec or __stdcall
860 !_defs_builtins
+> Hashtbl.iter
(fun s def
->
861 Hashtbl.replace
macros s def
;
866 Common.profile_code
"MACRO mgmt prep 2" (fun () ->
867 let local_macros = extract_macros file in
868 local_macros +> List.iter
(fun (s, def
) ->
869 Hashtbl.replace
macros s def
;
873 let tr = mk_tokens_state toks in
877 (* todo?: I am not sure that it represents current_line, cos maybe
878 * tr.current partipated in the previous parsing phase, so maybe tr.current
879 * is not the first token of the next parsing phase. Same with checkpoint2.
880 * It would be better to record when we have a } or ; in parser.mly,
881 * cos we know that they are the last symbols of external_declaration2.
883 * bugfix: may not be equal to 'file' as after macro expansions we can
884 * start to parse a new entity from the body of a macro, for instance
885 * when parsing a define_machine() body, cf standard.h
887 let checkpoint = TH.line_of_tok
tr.current in
888 let checkpoint_file = TH.file_of_tok
tr.current in
890 (* call the parser *)
893 Common.profile_code
"Parsing: 1st pass" (fun () ->
894 get_one_elem ~pass
:1 tr (file, filelines)
898 | Right
(info
,line_err
, passed
, passed_before_error, cur
, exn
) ->
899 if !Flag_parsing_c.disable_multi_pass
902 Common.profile_code
"Parsing: multi pass" (fun () ->
904 pr2_err "parsing pass2: try again";
905 let toks = List.rev passed
++ tr.rest
in
906 let new_tr = mk_tokens_state toks in
907 copy_tokens_state ~src
:new_tr ~dst
:tr;
908 let passx = get_one_elem ~pass
:2 tr (file, filelines) in
912 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
914 candidate_macros_in_passed ~
defs:macros passed
918 if is_define_passed passed
|| null
candidates
921 (* todo factorize code *)
923 pr2_err "parsing pass3: try again";
924 let toks = List.rev passed
++ tr.rest
in
926 find_optional_macro_to_expand ~
defs:candidates toks in
927 let new_tr = mk_tokens_state toks'
in
928 copy_tokens_state ~src
:new_tr ~dst
:tr;
929 let passx = get_one_elem ~pass
:3 tr (file, filelines) in
933 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
934 pr2_err "parsing pass4: try again";
937 candidate_macros_in_passed
941 let toks = List.rev passed
++ tr.rest
in
943 find_optional_macro_to_expand ~
defs:candidates toks in
944 let new_tr = mk_tokens_state toks'
in
945 copy_tokens_state ~src
:new_tr ~dst
:tr;
946 let passx = get_one_elem ~pass
:4 tr (file, filelines) in
956 (* again not sure if checkpoint2 corresponds to end of bad region *)
957 let checkpoint2 = TH.line_of_tok
tr.current in (* <> line_error *)
958 let checkpoint2_file = TH.file_of_tok
tr.current in
961 if (checkpoint_file =$
= checkpoint2_file) && (checkpoint_file =$
= file)
962 then (checkpoint2 - checkpoint)
964 (* TODO? so if error come in middle of something ? where the
965 * start token was from original file but synchro found in body
966 * of macro ? then can have wrong number of lines stat.
967 * Maybe simpler just to look at tr.passed and count
968 * the lines in the token from the correct file ?
971 let info = mk_info_item file (List.rev
tr.passed
) in
973 (* some stat updates *)
974 stat.Stat.commentized <-
975 stat.Stat.commentized + count_lines_commentized (snd
info);
980 stat.Stat.correct
<- stat.Stat.correct
+ diffline;
982 | Right
(info_of_bads, line_error, toks_of_bads
,
983 _passed_before_error
, cur
, exn
) ->
985 let was_define = is_define_passed tr.passed
in
987 if was_define && !Flag_parsing_c.filter_msg_define_error
993 | Parsing.Parse_error
994 | Semantic_c.Semantic _
-> ()
998 if !Flag_parsing_c.show_parsing_error
1001 (* Lexical is not anymore launched I think *)
1002 | Lexer_c.Lexical
s ->
1003 pr2
("lexical error " ^
s^
"\n =" ^
error_msg_tok cur
)
1004 | Parsing.Parse_error
->
1005 pr2
("parse error \n = " ^
error_msg_tok cur
)
1006 | Semantic_c.Semantic
(s, i
) ->
1007 pr2
("semantic error " ^
s^
"\n ="^
error_msg_tok cur
)
1008 | e
-> raise
(Impossible
82)
1011 if (checkpoint_file =$
= checkpoint2_file) &&
1012 checkpoint_file =$
= file
1013 then print_bad line_error (checkpoint, checkpoint2) filelines
1014 else pr2
"PB: bad: but on tokens not from original file"
1020 +> Common.filter
(TH.is_same_line_or_close
line_error)
1021 +> Common.filter
TH.is_ident_like
1024 (pbline +> List.map
TH.str_of_tok
), line_error
1026 stat.Stat.problematic_lines
<-
1027 error_info::stat.Stat.problematic_lines
;
1031 if was_define && !Flag_parsing_c.filter_define_error
1032 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1033 else stat.Stat.bad
<- stat.Stat.bad
+ diffline;
1035 Ast_c.NotParsedCorrectly
info_of_bads
1039 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1040 | xs -> (xs, info):: loop tr (* recurse *)
1044 let v = with_program2 Parsing_consistency_c.consistency_checking
v in
1046 let new_td = ref (Common.clone_scoped_h_env
!LP._typedef
) in
1047 Common.clean_scope_h
new_td;
1048 (v, !new_td, macros) in
1052 let time_total_parsing a b
=
1053 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a b
)
1055 let parse_print_error_heuristic a b
=
1056 Common.profile_code
"C parsing" (fun () -> time_total_parsing a b
)
1060 let parse_c_and_cpp a
=
1061 let ((c
,_
,_
),stat) = parse_print_error_heuristic None None a
in (c
,stat)
1062 let parse_c_and_cpp_keep_typedefs td macs a
=
1063 parse_print_error_heuristic td macs a
1065 (*****************************************************************************)
1066 (* Same but faster cos memoize stuff *)
1067 (*****************************************************************************)
1068 let parse_cache file =
1069 if not
!Flag_parsing_c.use_cache
1070 then parse_print_error_heuristic None None
file
1072 let _ = pr2_once
"TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
1073 let need_no_changed_files =
1074 (* should use Sys.argv.(0), would be safer. *)
1078 Config.path ^ "/parsing_c/c_parser.cma";
1079 (* we may also depend now on the semantic patch because
1080 the SP may use macro and so we will disable some of the
1081 macro expansions from standard.h.
1086 let need_no_changed_variables =
1087 (* could add some of the flags of flag_parsing_c.ml *)
1089 Common.cache_computation_robust_in_dir
1090 !Flag_parsing_c.cache_prefix
file ".ast_raw"
1091 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1093 (* check whether to clear the cache *)
1094 (match (!Flag_parsing_c.cache_limit
,!Flag_parsing_c.cache_prefix
) with
1095 (None
,_) | (_,None
) -> ()
1096 | (Some limit
,Some prefix
) ->
1099 (Printf.sprintf
"test -e %s && find %s -name \"*_raw\" | wc -l"
1103 if int_of_string c
>= limit
1108 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1113 parse_print_error_heuristic None None
file)
1117 (*****************************************************************************)
1118 (* Some special cases *)
1119 (*****************************************************************************)
1121 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1122 let tmpfile = Common.new_temp_file
"cocci_stmt_of_s" "c" in
1123 Common.write_file
tmpfile ("void main() { \n" ^
s ^
"\n}");
1124 let program = parse_c_and_cpp tmpfile +> fst
in
1125 program +> Common.find_some
(fun (e
,_) ->
1127 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1131 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1132 let tmpfile = Common.new_temp_file
"cocci_expr_of_s" "c" in
1133 Common.write_file
tmpfile ("void main() { \n" ^
s ^
";\n}");
1134 let program = parse_c_and_cpp tmpfile +> fst
in
1135 program +> Common.find_some
(fun (e
,_) ->
1137 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1138 (match compound
with
1139 | [Ast_c.StmtElem st
] ->
1140 (match Ast_c.unwrap_st st
with
1141 | Ast_c.ExprStatement
(Some e
) -> Some e