3 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
17 module TH
= Token_helpers
18 module LP
= Lexer_parser
20 module Stat
= Parsing_stat
22 (*****************************************************************************)
24 (*****************************************************************************)
25 let pr2_err, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
27 (*****************************************************************************)
29 (*****************************************************************************)
31 let lexbuf_to_strpos lexbuf
=
32 (Lexing.lexeme lexbuf
, Lexing.lexeme_start lexbuf
)
34 let token_to_strpos tok
=
35 (TH.str_of_tok tok
, TH.pos_of_tok tok
)
38 let mk_info_item2 filename toks
=
39 let buf = Buffer.create
100 in
41 (* old: get_slice_file filename (line1, line2) *)
43 toks
+> List.iter
(fun tok
->
44 match TH.pinfo_of_tok tok
with
45 | Ast_c.OriginTok _
->
46 Buffer.add_string
buf (TH.str_of_tok tok
)
47 | Ast_c.AbstractLineTok _
->
56 let mk_info_item a b
=
57 Common.profile_code
"C parsing.mk_info_item"
58 (fun () -> mk_info_item2 a b
)
61 let info_same_line line xs
=
62 xs
+> List.filter
(fun info
-> Ast_c.line_of_info info
=|= line
)
65 (* move in cpp_token_c ? *)
66 let is_define_passed passed
=
67 let xs = passed
+> List.rev
+> List.filter
TH.is_not_comment
in
68 if List.length
xs >= 2
70 (match Common.head_middle_tail
xs with
71 | Parser_c.TDefine _
, _
, Parser_c.TDefEOL _
->
76 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
81 (*****************************************************************************)
82 (* Error diagnostic *)
83 (*****************************************************************************)
85 let error_msg_tok tok
=
86 let file = TH.file_of_tok tok
in
87 if !Flag_parsing_c.verbose_parsing
88 then Common.error_message
file (token_to_strpos tok
)
89 else ("error in " ^
file ^
"; set verbose_parsing for more info")
92 let print_bad line_error
(start_line
, end_line
) filelines
=
94 pr2
("badcount: " ^ i_to_s
(end_line
- start_line
));
96 for i
= start_line
to end_line
do
97 let line = filelines
.(i
) in
100 then pr2
("BAD:!!!!!" ^
" " ^
line)
101 else pr2
("bad:" ^
" " ^
line)
106 (*****************************************************************************)
107 (* Stats on what was passed/commentized *)
108 (*****************************************************************************)
110 let commentized xs = xs +> Common.map_filter
(function
111 | Parser_c.TCommentCpp
(cppkind
, ii
) ->
112 let s = Ast_c.str_of_info ii
in
114 match !Flag_parsing_c.filter_passed_level
with
117 List.mem cppkind
[Token_c.CppAttr
]
121 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
]
125 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
]
129 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppMacro
]
135 List.mem cppkind
[Token_c.CppAttr
;Token_c.CppPassingNormal
;Token_c.CppDirective
;Token_c.CppMacro
]
142 | _
-> failwith
"not valid level passing number"
144 if legal_passing then None
else Some
(ii
.Ast_c.pinfo
)
149 | s when s =~ "KERN_.*" -> None
150 | s when s =~ "__.*" -> None
152 Some (ii.Ast_c.pinfo)
157 | Parser_c.TCommentMisc ii
158 | Parser_c.TAction ii
160 Some
(ii
.Ast_c.pinfo
)
165 let count_lines_commentized xs =
166 let line = ref (-1) in
172 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
173 let newline = pinfo
.Common.line in
185 let print_commentized xs =
186 let line = ref (-1) in
188 let ys = commentized xs in
192 Ast_c.OriginTok pinfo
| Ast_c.ExpandedTok
(_
,(pinfo
,_
)) ->
193 let newline = pinfo
.Common.line in
194 let s = pinfo
.Common.str
in
195 let s = Str.global_substitute
196 (Str.regexp
"\n") (fun s -> "") s
199 then prerr_string
(s ^
" ")
202 then pr2_no_nl
"passed:"
203 else pr2_no_nl
"\npassed:";
208 if not
(null
ys) then pr2
"";
214 (*****************************************************************************)
216 (*****************************************************************************)
218 (* called by parse_print_error_heuristic *)
220 let table = Common.full_charpos_to_pos_large
file in
222 Common.with_open_infile
file (fun chan
->
223 let lexbuf = Lexing.from_channel chan
in
225 let rec tokens_aux acc
=
226 let tok = Lexer_c.token
lexbuf in
227 (* fill in the line and col information *)
228 let tok = tok +> TH.visitor_info_of_tok
(fun ii
->
229 { ii
with Ast_c.pinfo
=
230 (* could assert pinfo.filename = file ? *)
231 match Ast_c.pinfo_of_info ii
with
232 Ast_c.OriginTok pi
->
233 Ast_c.OriginTok
(Common.complete_parse_info_large
file table pi
)
234 | Ast_c.ExpandedTok
(pi
,vpi
) ->
235 Ast_c.ExpandedTok
((Common.complete_parse_info_large
file table pi
),vpi
)
236 | Ast_c.FakeTok
(s,vpi
) -> Ast_c.FakeTok
(s,vpi
)
237 | Ast_c.AbstractLineTok pi
-> failwith
"should not occur"
242 then List.rev
(tok::acc
)
243 else tokens_aux (tok::acc
)
247 | Lexer_c.Lexical
s ->
248 failwith
("lexical error " ^
s ^
"\n =" ^
249 (Common.error_message
file (lexbuf_to_strpos lexbuf)))
253 let time_lexing ?
(profile
=true) a
=
255 then Common.profile_code_exclusif
"LEXING" (fun () -> tokens2 a
)
257 let tokens ?profile a
=
258 Common.profile_code
"C parsing.tokens" (fun () -> time_lexing ?profile a
)
261 let tokens_of_string string =
262 let lexbuf = Lexing.from_string
string in
264 let rec tokens_s_aux () =
265 let tok = Lexer_c.token
lexbuf in
268 else tok::(tokens_s_aux ())
272 | Lexer_c.Lexical
s -> failwith
("lexical error " ^
s ^
"\n =" )
276 (*****************************************************************************)
277 (* Parsing, but very basic, no more used *)
278 (*****************************************************************************)
281 * !!!Those function use refs, and are not reentrant !!! so take care.
282 * It use globals defined in Lexer_parser.
284 * update: because now lexer return comments tokens, those functions
285 * may not work anymore.
289 let lexbuf = Lexing.from_channel
(open_in
file) in
290 let result = Parser_c.main
Lexer_c.token
lexbuf in
294 let parse_print_error file =
295 let chan = (open_in
file) in
296 let lexbuf = Lexing.from_channel
chan in
298 let error_msg () = Common.error_message
file (lexbuf_to_strpos lexbuf) in
300 lexbuf +> Parser_c.main
Lexer_c.token
302 | Lexer_c.Lexical
s ->
303 failwith
("lexical error " ^
s^
"\n =" ^
error_msg ())
304 | Parsing.Parse_error
->
305 failwith
("parse error \n = " ^
error_msg ())
306 | Semantic_c.Semantic
(s, i
) ->
307 failwith
("semantic error " ^
s ^
"\n =" ^
error_msg ())
313 (*****************************************************************************)
314 (* Parsing subelements, useful to debug parser *)
315 (*****************************************************************************)
318 * !!!Those function use refs, and are not reentrant !!! so take care.
319 * It use globals defined in Lexer_parser.
324 * let parse_gen parsefunc s =
325 * let lexbuf = Lexing.from_string s in
326 * let result = parsefunc Lexer_c.token lexbuf in
330 let parse_gen parsefunc
s =
331 let toks = tokens_of_string s +> List.filter
TH.is_not_comment
in
334 (* Why use this lexing scheme ? Why not classically give lexer func
335 * to parser ? Because I now keep comments in lexer. Could
336 * just do a simple wrapper that when comment ask again for a token,
337 * but maybe simpler to use cur_tok technique.
339 let all_tokens = ref toks in
340 let cur_tok = ref (List.hd
!all_tokens) in
344 if TH.is_eof
!cur_tok
345 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
347 let v = Common.pop2
all_tokens in
352 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
353 let result = parsefunc
lexer_function lexbuf_fake in
357 let type_of_string = parse_gen Parser_c.type_name
358 let statement_of_string = parse_gen Parser_c.statement
359 let expression_of_string = parse_gen Parser_c.expr
361 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
367 (*****************************************************************************)
368 (* Parsing default define macros, usually in a standard.h file *)
369 (*****************************************************************************)
371 let extract_macros2 file =
372 Common.save_excursion
Flag_parsing_c.verbose_lexing
(fun () ->
373 Flag_parsing_c.verbose_lexing
:= false;
374 let toks = tokens ~profile
:false file in
375 let toks = Parsing_hacks.fix_tokens_define
toks in
376 Cpp_token_c.extract_macros
toks
379 let extract_macros a
=
380 Common.profile_code_exclusif
"HACK" (fun () -> extract_macros2 a
)
383 (*****************************************************************************)
384 (* Helper for main entry point *)
385 (*****************************************************************************)
388 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
389 * possible error recovery. Indeed, they allow to skip some tokens and
390 * still be able to call again the ocamlyacc parser. It is ugly code
391 * because we cant modify ocamllex and ocamlyacc. As we want some
392 * extended lexing tricks, we have to use such refs.
394 * Those refs are now also used for my lalr(k) technique. Indeed They
395 * store the futur and previous tokens that were parsed, and so
396 * provide enough context information for powerful lex trick.
398 * - passed_tokens_last_ckp stores the passed tokens since last
399 * checkpoint. Used for NotParsedCorrectly and also to build the
400 * info_item attached to each program_element.
401 * - passed_tokens_clean is used for lookahead, in fact for lookback.
402 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
403 * contain some comments and so would make pattern matching difficult
404 * in lookahead. Hence this variable. We would like also to get rid
405 * of cpp instruction because sometimes a cpp instruction is between
406 * two tokens and makes a pattern matching fail. But lookahead also
407 * transform some cpp instruction (in comment) so can't remove them.
409 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
410 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
414 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
415 * after the call to pop2.
416 * toks = (reverse passed_tok) ++ remaining_tokens
417 * at the and of the lexer_function call.
418 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
419 * At the end of lexer_function call, cur_tok overlap with passed_tok.
421 * convention: I use "tr" for "tokens refs"
423 * I now also need this lexing trick because the lexer return comment
427 type tokens_state
= {
428 mutable rest
: Parser_c.token list
;
429 mutable rest_clean
: Parser_c.token list
;
430 mutable current
: Parser_c.token
;
431 (* it's passed since last "checkpoint", not passed from the beginning *)
432 mutable passed
: Parser_c.token list
;
433 mutable passed_clean
: Parser_c.token list
;
436 let mk_tokens_state toks =
439 rest_clean
= (toks +> List.filter
TH.is_not_comment
);
440 current
= (List.hd
toks);
447 let clone_tokens_state tr
=
449 rest_clean
= tr
.rest_clean
;
450 current
= tr
.current
;
452 passed_clean
= tr
.passed_clean
;
454 let copy_tokens_state ~src ~dst
=
455 dst
.rest
<- src
.rest
;
456 dst
.rest_clean
<- src
.rest_clean
;
457 dst
.current
<- src
.current
;
458 dst
.passed
<- src
.passed
;
459 dst
.passed_clean
<- src
.passed_clean
;
462 (* todo? agglomerate the x##b ? *)
463 let rec filter_noise n
xs =
469 | Parser_c.TMacroAttr _
->
470 filter_noise (n
-1) xs
472 x
::filter_noise (n
-1) xs
475 let clean_for_lookahead xs =
480 x
::filter_noise 10 xs
484 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
485 * tr means token refs.
487 let rec lexer_function ~pass tr
= fun lexbuf ->
489 | [] -> pr2_err "ALREADY AT END"; tr
.current
494 if !Flag_parsing_c.debug_lexer
then Common.pr2_gen
v;
498 tr
.passed
<- v::tr
.passed
;
499 lexer_function ~pass tr
lexbuf
502 let x = List.hd tr
.rest_clean
in
503 tr
.rest_clean
<- List.tl tr
.rest_clean
;
510 * Why not in parsing_hacks lookahead and do passing like
511 * I do for some ifdef directives ? Because here I also need to
512 * generate some tokens sometimes and so I need access to the
513 * tr.passed, tr.rest, etc.
515 | Parser_c.TDefine
(tok) ->
516 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
517 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
519 incr
Stat.nDefinePassing
;
520 pr2_once
("CPP-DEFINE: inside function, I treat it as comment");
521 let v'
= Parser_c.TCommentCpp
(Token_c.CppDirective
,TH.info_of_tok
v)
523 tr
.passed
<- v'
::tr
.passed
;
524 tr
.rest
<- Parsing_hacks.comment_until_defeol tr
.rest
;
525 tr
.rest_clean
<- Parsing_hacks.drop_until_defeol tr
.rest_clean
;
526 lexer_function ~pass tr
lexbuf
529 tr
.passed
<- v::tr
.passed
;
530 tr
.passed_clean
<- v::tr
.passed_clean
;
534 | Parser_c.TInclude
(includes
, filename
, inifdef
, info
) ->
535 if not
(LP.current_context
() =*= LP.InTopLevel
) &&
536 (!Flag_parsing_c.cpp_directive_passing
|| (pass
>= 2))
538 incr
Stat.nIncludePassing
;
539 pr2_once
("CPP-INCLUDE: inside function, I treat it as comment");
540 let v = Parser_c.TCommentCpp
(Token_c.CppDirective
, info
) in
541 tr
.passed
<- v::tr
.passed
;
542 lexer_function ~pass tr
lexbuf
546 Parsing_hacks.tokens_include
(info
, includes
, filename
, inifdef
) in
547 let new_tokens_clean =
548 new_tokens
+> List.filter
TH.is_not_comment
in
550 tr
.passed
<- v::tr
.passed
;
551 tr
.passed_clean
<- v::tr
.passed_clean
;
552 tr
.rest
<- new_tokens
++ tr
.rest
;
553 tr
.rest_clean
<- new_tokens_clean ++ tr
.rest_clean
;
561 | Parser_c.TIdent
(s, ii
) ->
564 not
(!Flag_parsing_c.disable_add_typedef
) &&
566 then Parser_c.TypedefIdent
(s, ii
)
567 else Parser_c.TIdent
(s, ii
)
571 let v = Parsing_hacks.lookahead ~pass
572 (clean_for_lookahead (v::tr
.rest_clean
))
575 tr
.passed
<- v::tr
.passed
;
577 (* the lookahead may have changed the status of the token and
578 * consider it as a comment, for instance some #include are
579 * turned into comments, hence this code. *)
581 | Parser_c.TCommentCpp _
-> lexer_function ~pass tr
lexbuf
583 tr
.passed_clean
<- v::tr
.passed_clean
;
592 let get_one_elem ~pass tr
(file, filelines
) =
594 if not
(LP.is_enabled_typedef
()) && !Flag_parsing_c.debug_typedef
595 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
597 (* normally have to do that only when come from an exception in which
598 * case the dt() may not have been done
599 * TODO but if was in scoped scope ? have to let only the last scope
600 * so need do a LP.lexer_reset_typedef ();
603 LP._lexer_hint
:= (LP.default_hint
());
604 LP.save_typedef_state
();
608 let lexbuf_fake = Lexing.from_function
(fun buf n
-> raise Impossible
) in
611 (* -------------------------------------------------- *)
613 (* -------------------------------------------------- *)
614 Common.profile_code_exclusif
"YACC" (fun () ->
615 Left
(Parser_c.celem
(lexer_function ~pass tr
) lexbuf_fake)
618 LP.restore_typedef_state
();
620 (* must keep here, before the code that adjusts the tr fields *)
621 let line_error = TH.line_of_tok tr
.current
in
623 let passed_before_error = tr
.passed
in
624 let current = tr
.current in
626 (* error recovery, go to next synchro point *)
627 let (passed'
, rest'
) =
628 Parsing_recovery_c.find_next_synchro tr
.rest tr
.passed
in
630 tr
.passed
<- passed'
;
632 tr
.current <- List.hd passed'
;
633 tr
.passed_clean
<- []; (* enough ? *)
634 (* with error recovery, rest and rest_clean may not be in sync *)
635 tr
.rest_clean
<- (tr
.rest
+> List.filter
TH.is_not_comment
);
638 let info_of_bads = Common.map_eff_rev
TH.info_of_tok tr
.passed
in
639 Right
(info_of_bads, line_error,
640 tr
.passed
, passed_before_error,
646 (* Macro problem recovery *)
647 (* used by the multi-pass error recovery expand-on-demand *)
649 val candidate_macros_in_passed:
650 defs: (string, define_def) Hashtbl.t ->
651 Parser_c.token list -> (string * define_def) list
654 let candidate_macros_in_passed2 ~defs passed
=
658 passed
+> List.iter
(function
659 | Parser_c.TIdent
(s,_
)
660 (* bugfix: may have to undo some infered things *)
661 | Parser_c.TMacroIterator
(s,_
)
662 | Parser_c.TypedefIdent
(s,_
)
664 (match Common.hfind_option
s defs
with
666 if s ==~
Parsing_hacks.regexp_macro
668 (* pr2 (spf "candidate: %s" s); *)
669 Common.push2
(s, def
) res
671 Common.push2
(s, def
) res2
681 let candidate_macros_in_passed ~defs b
=
682 Common.profile_code
"MACRO managment" (fun () ->
683 candidate_macros_in_passed2 ~defs b
)
689 let find_optional_macro_to_expand2 ~defs
toks =
691 let defs = Common.hash_of_list
defs in
693 let toks = toks +> Common.map
(function
695 (* special cases to undo *)
696 | Parser_c.TMacroIterator
(s, ii
) ->
697 if Hashtbl.mem
defs s
698 then Parser_c.TIdent
(s, ii
)
699 else Parser_c.TMacroIterator
(s, ii
)
701 | Parser_c.TypedefIdent
(s, ii
) ->
702 if Hashtbl.mem
defs s
703 then Parser_c.TIdent
(s, ii
)
704 else Parser_c.TypedefIdent
(s, ii
)
710 Parsing_hacks.fix_tokens_cpp ~macro_defs
:defs tokens
712 (* just calling apply_macro_defs and having a specialized version
713 * of the code in fix_tokens_cpp is not enough as some work such
714 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
715 * will not get the chance to be run on the new expanded tokens.
716 * Hence even if it's expensive, it's currently better to
717 * just call directly fix_tokens_cpp again here.
719 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
720 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
721 let paren_grouped = TV.mk_parenthised cleaner in
722 Cpp_token_c.apply_macro_defs
723 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
724 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
726 (* because the before field is used by apply_macro_defs *)
727 tokens2 := TV.rebuild_tokens_extented
!tokens2;
728 Parsing_hacks.insert_virtual_positions
729 (!tokens2 +> Common.acc_map
(fun x -> x.TV.tok))
731 let find_optional_macro_to_expand ~
defs a
=
732 Common.profile_code
"MACRO managment" (fun () ->
733 find_optional_macro_to_expand2 ~
defs a
)
739 (*****************************************************************************)
740 (* Main entry points *)
741 (*****************************************************************************)
743 let (_defs
: (string, Cpp_token_c.define_def
) Hashtbl.t
ref) =
744 ref (Hashtbl.create
101)
746 let (_defs_builtins
: (string, Cpp_token_c.define_def
) Hashtbl.t
ref) =
747 ref (Hashtbl.create
101)
750 (* can not be put in parsing_hack, cos then mutually recursive problem as
751 * we also want to parse the standard.h file.
753 let init_defs_macros std_h
=
754 if not
(Common.lfile_exists std_h
)
755 then pr2
("warning: Can't find default macro file: " ^ std_h
)
757 pr2
("init_defs: " ^ std_h
);
758 _defs
:= Common.hash_of_list
(extract_macros std_h
);
761 let init_defs_builtins file_h
=
762 if not
(Common.lfile_exists file_h
)
763 then pr2
("warning: Can't find macro file: " ^ file_h
)
765 pr2
("init_defs_builtins: " ^ file_h
);
767 Common.hash_of_list
(extract_macros file_h
);
772 type info_item
= string * Parser_c.token list
774 type program2
= toplevel2 list
775 and toplevel2
= Ast_c.toplevel
* info_item
777 let program_of_program2 xs =
780 let with_program2 f program2
=
783 +> (fun (program
, infos
) ->
786 +> Common.uncurry
Common.zip
793 (* note: as now we go in 2 passes, there is first all the error message of
794 * the lexer, and then the error of the parser. It is not anymore
797 * !!!This function use refs, and is not reentrant !!! so take care.
798 * It use globals defined in Lexer_parser and also the _defs global
799 * in parsing_hack.ml.
801 * This function uses internally some semi globals in the
802 * tokens_stat record and parsing_stat record.
805 let parse_print_error_heuristic2 file =
807 let filelines = Common.cat_array
file in
808 let stat = Parsing_stat.default_stat
file in
810 (* -------------------------------------------------- *)
811 (* call lexer and get all the tokens *)
812 (* -------------------------------------------------- *)
813 LP.lexer_reset_typedef
();
814 Parsing_hacks.ifdef_paren_cnt
:= 0;
816 let toks_orig = tokens file in
817 let toks = Parsing_hacks.fix_tokens_define
toks_orig in
818 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs
:!_defs_builtins
toks in
820 (* expand macros on demand trick, preparation phase *)
822 Common.profile_code
"MACRO mgmt prep 1" (fun () ->
823 let macros = Hashtbl.copy
!_defs
in
824 (* include also builtins as some macros may generate some builtins too
825 * like __decl_spec or __stdcall
827 !_defs_builtins
+> Hashtbl.iter
(fun s def
->
828 Hashtbl.replace
macros s def
;
833 Common.profile_code
"MACRO mgmt prep 2" (fun () ->
834 let local_macros = extract_macros file in
835 local_macros +> List.iter
(fun (s, def
) ->
836 Hashtbl.replace
macros s def
;
840 let tr = mk_tokens_state toks in
844 (* todo?: I am not sure that it represents current_line, cos maybe
845 * tr.current partipated in the previous parsing phase, so maybe tr.current
846 * is not the first token of the next parsing phase. Same with checkpoint2.
847 * It would be better to record when we have a } or ; in parser.mly,
848 * cos we know that they are the last symbols of external_declaration2.
850 * bugfix: may not be equal to 'file' as after macro expansions we can
851 * start to parse a new entity from the body of a macro, for instance
852 * when parsing a define_machine() body, cf standard.h
854 let checkpoint = TH.line_of_tok
tr.current in
855 let checkpoint_file = TH.file_of_tok
tr.current in
857 (* call the parser *)
860 Common.profile_code
"Parsing: 1st pass" (fun () ->
861 get_one_elem ~pass
:1 tr (file, filelines)
865 | Right
(info
,line_err
, passed
, passed_before_error, cur
, exn
) ->
866 if !Flag_parsing_c.disable_multi_pass
869 Common.profile_code
"Parsing: multi pass" (fun () ->
871 pr2_err "parsing pass2: try again";
872 let toks = List.rev passed
++ tr.rest
in
873 let new_tr = mk_tokens_state toks in
874 copy_tokens_state ~src
:new_tr ~dst
:tr;
875 let passx = get_one_elem ~pass
:2 tr (file, filelines) in
879 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
881 candidate_macros_in_passed ~
defs:macros passed
885 if is_define_passed passed
|| null
candidates
888 (* todo factorize code *)
890 pr2_err "parsing pass3: try again";
891 let toks = List.rev passed
++ tr.rest
in
893 find_optional_macro_to_expand ~
defs:candidates toks in
894 let new_tr = mk_tokens_state toks'
in
895 copy_tokens_state ~src
:new_tr ~dst
:tr;
896 let passx = get_one_elem ~pass
:3 tr (file, filelines) in
900 | Right
(info
,line_err
,passed
,passed_before_error,cur
,exn
) ->
901 pr2_err "parsing pass4: try again";
904 candidate_macros_in_passed
908 let toks = List.rev passed
++ tr.rest
in
910 find_optional_macro_to_expand ~
defs:candidates toks in
911 let new_tr = mk_tokens_state toks'
in
912 copy_tokens_state ~src
:new_tr ~dst
:tr;
913 let passx = get_one_elem ~pass
:4 tr (file, filelines) in
923 (* again not sure if checkpoint2 corresponds to end of bad region *)
924 let checkpoint2 = TH.line_of_tok
tr.current in (* <> line_error *)
925 let checkpoint2_file = TH.file_of_tok
tr.current in
928 if (checkpoint_file =$
= checkpoint2_file) && (checkpoint_file =$
= file)
929 then (checkpoint2 - checkpoint)
931 (* TODO? so if error come in middle of something ? where the
932 * start token was from original file but synchro found in body
933 * of macro ? then can have wrong number of lines stat.
934 * Maybe simpler just to look at tr.passed and count
935 * the lines in the token from the correct file ?
938 let info = mk_info_item file (List.rev
tr.passed
) in
940 (* some stat updates *)
941 stat.Stat.commentized <-
942 stat.Stat.commentized + count_lines_commentized (snd
info);
947 stat.Stat.correct
<- stat.Stat.correct
+ diffline;
949 | Right
(info_of_bads, line_error, toks_of_bads
,
950 _passed_before_error
, cur
, exn
) ->
952 let was_define = is_define_passed tr.passed
in
954 if was_define && !Flag_parsing_c.filter_msg_define_error
960 | Parsing.Parse_error
961 | Semantic_c.Semantic _
-> ()
965 if !Flag_parsing_c.show_parsing_error
968 (* Lexical is not anymore launched I think *)
969 | Lexer_c.Lexical
s ->
970 pr2
("lexical error " ^
s^
"\n =" ^
error_msg_tok cur
)
971 | Parsing.Parse_error
->
972 pr2
("parse error \n = " ^
error_msg_tok cur
)
973 | Semantic_c.Semantic
(s, i
) ->
974 pr2
("semantic error " ^
s^
"\n ="^
error_msg_tok cur
)
975 | e
-> raise Impossible
978 if (checkpoint_file =$
= checkpoint2_file) &&
979 checkpoint_file =$
= file
980 then print_bad line_error (checkpoint, checkpoint2) filelines
981 else pr2
"PB: bad: but on tokens not from original file"
987 +> Common.filter
(TH.is_same_line_or_close
line_error)
988 +> Common.filter
TH.is_ident_like
991 (pbline +> List.map
TH.str_of_tok
), line_error
993 stat.Stat.problematic_lines
<-
994 error_info::stat.Stat.problematic_lines
;
998 if was_define && !Flag_parsing_c.filter_define_error
999 then stat.Stat.correct
<- stat.Stat.correct
+ diffline
1000 else stat.Stat.bad
<- stat.Stat.bad
+ diffline;
1002 Ast_c.NotParsedCorrectly
info_of_bads
1006 | Ast_c.FinalDef
x -> [(Ast_c.FinalDef
x, info)]
1007 | xs -> (xs, info):: loop tr (* recurse *)
1011 let v = with_program2 Parsing_consistency_c.consistency_checking
v in
1015 let time_total_parsing a
=
1016 Common.profile_code
"TOTAL" (fun () -> parse_print_error_heuristic2 a
)
1018 let parse_print_error_heuristic a
=
1019 Common.profile_code
"C parsing" (fun () -> time_total_parsing a
)
1023 let parse_c_and_cpp a
= parse_print_error_heuristic a
1025 (*****************************************************************************)
1026 (* Same but faster cos memoize stuff *)
1027 (*****************************************************************************)
1028 let parse_cache file =
1029 if not
!Flag_parsing_c.use_cache
then parse_print_error_heuristic file
1031 let _ = pr2
"TOFIX" in
1032 let need_no_changed_files =
1033 (* should use Sys.argv.(0), would be safer. *)
1037 Config.path ^ "/parsing_c/c_parser.cma";
1038 (* we may also depend now on the semantic patch because
1039 the SP may use macro and so we will disable some of the
1040 macro expansions from standard.h.
1046 let need_no_changed_variables =
1047 (* could add some of the flags of flag_parsing_c.ml *)
1050 Common.cache_computation_robust
1052 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1053 (fun () -> parse_print_error_heuristic file)
1057 (*****************************************************************************)
1058 (* Some special cases *)
1059 (*****************************************************************************)
1061 let (cstatement_of_string
: string -> Ast_c.statement
) = fun s ->
1062 let tmpfile = Common.new_temp_file
"cocci_stmt_of_s" "c" in
1063 Common.write_file
tmpfile ("void main() { \n" ^
s ^
"\n}");
1064 let program = parse_c_and_cpp tmpfile +> fst
in
1065 program +> Common.find_some
(fun (e
,_) ->
1067 | Ast_c.Definition
({Ast_c.f_body
= [Ast_c.StmtElem st
]},_) -> Some st
1071 let (cexpression_of_string
: string -> Ast_c.expression
) = fun s ->
1072 let tmpfile = Common.new_temp_file
"cocci_expr_of_s" "c" in
1073 Common.write_file
tmpfile ("void main() { \n" ^
s ^
";\n}");
1074 let program = parse_c_and_cpp tmpfile +> fst
in
1075 program +> Common.find_some
(fun (e
,_) ->
1077 | Ast_c.Definition
({Ast_c.f_body
= compound
},_) ->
1078 (match compound
with
1079 | [Ast_c.StmtElem st
] ->
1080 (match Ast_c.unwrap_st st
with
1081 | Ast_c.ExprStatement
(Some e
) -> Some e