Release coccinelle-0.2.0
[bpt/coccinelle.git] / parsing_c / parse_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15 open Common
16
17 module TH = Token_helpers
18 module LP = Lexer_parser
19
20 module Stat = Parsing_stat
21
22 (*****************************************************************************)
23 (* Wrappers *)
24 (*****************************************************************************)
25 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
26
27 (*****************************************************************************)
28 (* Helpers *)
29 (*****************************************************************************)
30
31 let lexbuf_to_strpos lexbuf =
32 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
33
34 let token_to_strpos tok =
35 (TH.str_of_tok tok, TH.pos_of_tok tok)
36
37
38 let mk_info_item2 filename toks =
39 let buf = Buffer.create 100 in
40 let s =
41 (* old: get_slice_file filename (line1, line2) *)
42 begin
43 toks +> List.iter (fun tok ->
44 match TH.pinfo_of_tok tok with
45 | Ast_c.OriginTok _ ->
46 Buffer.add_string buf (TH.str_of_tok tok)
47 | Ast_c.AbstractLineTok _ ->
48 raise Impossible
49 | _ -> ()
50 );
51 Buffer.contents buf
52 end
53 in
54 (s, toks)
55
56 let mk_info_item a b =
57 Common.profile_code "C parsing.mk_info_item"
58 (fun () -> mk_info_item2 a b)
59
60
61 let info_same_line line xs =
62 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
63
64
65 (* move in cpp_token_c ? *)
66 let is_define_passed passed =
67 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
68 if List.length xs >= 2
69 then
70 (match Common.head_middle_tail xs with
71 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
72 true
73 | _ -> false
74 )
75 else begin
76 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
77 false
78 end
79
80
81 (*****************************************************************************)
82 (* Error diagnostic *)
83 (*****************************************************************************)
84
85 let error_msg_tok tok =
86 let file = TH.file_of_tok tok in
87 if !Flag_parsing_c.verbose_parsing
88 then Common.error_message file (token_to_strpos tok)
89 else ("error in " ^ file ^ "; set verbose_parsing for more info")
90
91
92 let print_bad line_error (start_line, end_line) filelines =
93 begin
94 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
95
96 for i = start_line to end_line do
97 let line = filelines.(i) in
98
99 if i =|= line_error
100 then pr2 ("BAD:!!!!!" ^ " " ^ line)
101 else pr2 ("bad:" ^ " " ^ line)
102 done
103 end
104
105
106 (*****************************************************************************)
107 (* Stats on what was passed/commentized *)
108 (*****************************************************************************)
109
110 let commentized xs = xs +> Common.map_filter (function
111 | Parser_c.TCommentCpp (cppkind, ii) ->
112 let s = Ast_c.str_of_info ii in
113 let legal_passing =
114 match !Flag_parsing_c.filter_passed_level with
115 | 0 -> false
116 | 1 ->
117 List.mem cppkind [Token_c.CppAttr]
118 ||
119 (s =~ "__.*")
120 | 2 ->
121 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
122 ||
123 (s =~ "__.*")
124 | 3 ->
125 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
126 ||
127 (s =~ "__.*")
128 | 4 ->
129 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
130 ||
131 (s =~ "__.*")
132
133
134 | 5 ->
135 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
136 ||
137 (s =~ "__.*")
138
139
140
141
142 | _ -> failwith "not valid level passing number"
143 in
144 if legal_passing then None else Some (ii.Ast_c.pinfo)
145
146 (*
147 | Ast_c.CppOther ->
148 (match s with
149 | s when s =~ "KERN_.*" -> None
150 | s when s =~ "__.*" -> None
151 | _ ->
152 Some (ii.Ast_c.pinfo)
153 )
154 *)
155
156
157 | Parser_c.TCommentMisc ii
158 | Parser_c.TAction ii
159 ->
160 Some (ii.Ast_c.pinfo)
161 | _ ->
162 None
163 )
164
165 let count_lines_commentized xs =
166 let line = ref (-1) in
167 let count = ref 0 in
168 begin
169 commentized xs +>
170 List.iter
171 (function
172 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
173 let newline = pinfo.Common.line in
174 if newline <> !line
175 then begin
176 line := newline;
177 incr count
178 end
179 | _ -> ());
180 !count
181 end
182
183
184
185 let print_commentized xs =
186 let line = ref (-1) in
187 begin
188 let ys = commentized xs in
189 ys +>
190 List.iter
191 (function
192 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
193 let newline = pinfo.Common.line in
194 let s = pinfo.Common.str in
195 let s = Str.global_substitute
196 (Str.regexp "\n") (fun s -> "") s
197 in
198 if newline =|= !line
199 then prerr_string (s ^ " ")
200 else begin
201 if !line =|= -1
202 then pr2_no_nl "passed:"
203 else pr2_no_nl "\npassed:";
204 line := newline;
205 pr2_no_nl (s ^ " ");
206 end
207 | _ -> ());
208 if not (null ys) then pr2 "";
209 end
210
211
212
213
214 (*****************************************************************************)
215 (* Lexing only *)
216 (*****************************************************************************)
217
218 (* called by parse_print_error_heuristic *)
219 let tokens2 file =
220 let table = Common.full_charpos_to_pos_large file in
221
222 Common.with_open_infile file (fun chan ->
223 let lexbuf = Lexing.from_channel chan in
224 try
225 let rec tokens_aux acc =
226 let tok = Lexer_c.token lexbuf in
227 (* fill in the line and col information *)
228 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
229 { ii with Ast_c.pinfo=
230 (* could assert pinfo.filename = file ? *)
231 match Ast_c.pinfo_of_info ii with
232 Ast_c.OriginTok pi ->
233 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
234 | Ast_c.ExpandedTok (pi,vpi) ->
235 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
236 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
237 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
238 })
239 in
240
241 if TH.is_eof tok
242 then List.rev (tok::acc)
243 else tokens_aux (tok::acc)
244 in
245 tokens_aux []
246 with
247 | Lexer_c.Lexical s ->
248 failwith ("lexical error " ^ s ^ "\n =" ^
249 (Common.error_message file (lexbuf_to_strpos lexbuf)))
250 | e -> raise e
251 )
252
253 let time_lexing ?(profile=true) a =
254 if profile
255 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
256 else tokens2 a
257 let tokens ?profile a =
258 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
259
260
261 let tokens_of_string string =
262 let lexbuf = Lexing.from_string string in
263 try
264 let rec tokens_s_aux () =
265 let tok = Lexer_c.token lexbuf in
266 if TH.is_eof tok
267 then [tok]
268 else tok::(tokens_s_aux ())
269 in
270 tokens_s_aux ()
271 with
272 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
273 | e -> raise e
274
275
276 (*****************************************************************************)
277 (* Parsing, but very basic, no more used *)
278 (*****************************************************************************)
279
280 (*
281 * !!!Those function use refs, and are not reentrant !!! so take care.
282 * It use globals defined in Lexer_parser.
283 *
284 * update: because now lexer return comments tokens, those functions
285 * may not work anymore.
286 *)
287
288 let parse file =
289 let lexbuf = Lexing.from_channel (open_in file) in
290 let result = Parser_c.main Lexer_c.token lexbuf in
291 result
292
293
294 let parse_print_error file =
295 let chan = (open_in file) in
296 let lexbuf = Lexing.from_channel chan in
297
298 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
299 try
300 lexbuf +> Parser_c.main Lexer_c.token
301 with
302 | Lexer_c.Lexical s ->
303 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
304 | Parsing.Parse_error ->
305 failwith ("parse error \n = " ^ error_msg ())
306 | Semantic_c.Semantic (s, i) ->
307 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
308 | e -> raise e
309
310
311
312
313 (*****************************************************************************)
314 (* Parsing subelements, useful to debug parser *)
315 (*****************************************************************************)
316
317 (*
318 * !!!Those function use refs, and are not reentrant !!! so take care.
319 * It use globals defined in Lexer_parser.
320 *)
321
322
323 (* old:
324 * let parse_gen parsefunc s =
325 * let lexbuf = Lexing.from_string s in
326 * let result = parsefunc Lexer_c.token lexbuf in
327 * result
328 *)
329
330 let parse_gen parsefunc s =
331 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
332
333
334 (* Why use this lexing scheme ? Why not classically give lexer func
335 * to parser ? Because I now keep comments in lexer. Could
336 * just do a simple wrapper that when comment ask again for a token,
337 * but maybe simpler to use cur_tok technique.
338 *)
339 let all_tokens = ref toks in
340 let cur_tok = ref (List.hd !all_tokens) in
341
342 let lexer_function =
343 (fun _ ->
344 if TH.is_eof !cur_tok
345 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
346 else
347 let v = Common.pop2 all_tokens in
348 cur_tok := v;
349 !cur_tok
350 )
351 in
352 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
353 let result = parsefunc lexer_function lexbuf_fake in
354 result
355
356
357 let type_of_string = parse_gen Parser_c.type_name
358 let statement_of_string = parse_gen Parser_c.statement
359 let expression_of_string = parse_gen Parser_c.expr
360
361 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
362
363
364
365
366
367 (*****************************************************************************)
368 (* Parsing default define macros, usually in a standard.h file *)
369 (*****************************************************************************)
370
371 let extract_macros2 file =
372 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
373 Flag_parsing_c.verbose_lexing := false;
374 let toks = tokens ~profile:false file in
375 let toks = Parsing_hacks.fix_tokens_define toks in
376 Cpp_token_c.extract_macros toks
377 )
378
379 let extract_macros a =
380 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
381
382
383 (*****************************************************************************)
384 (* Helper for main entry point *)
385 (*****************************************************************************)
386
387
388 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
389 * possible error recovery. Indeed, they allow to skip some tokens and
390 * still be able to call again the ocamlyacc parser. It is ugly code
391 * because we cant modify ocamllex and ocamlyacc. As we want some
392 * extended lexing tricks, we have to use such refs.
393 *
394 * Those refs are now also used for my lalr(k) technique. Indeed They
395 * store the futur and previous tokens that were parsed, and so
396 * provide enough context information for powerful lex trick.
397 *
398 * - passed_tokens_last_ckp stores the passed tokens since last
399 * checkpoint. Used for NotParsedCorrectly and also to build the
400 * info_item attached to each program_element.
401 * - passed_tokens_clean is used for lookahead, in fact for lookback.
402 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
403 * contain some comments and so would make pattern matching difficult
404 * in lookahead. Hence this variable. We would like also to get rid
405 * of cpp instruction because sometimes a cpp instruction is between
406 * two tokens and makes a pattern matching fail. But lookahead also
407 * transform some cpp instruction (in comment) so can't remove them.
408 *
409 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
410 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
411 * comment-tokens.
412 *
413 * Normally we have:
414 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
415 * after the call to pop2.
416 * toks = (reverse passed_tok) ++ remaining_tokens
417 * at the and of the lexer_function call.
418 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
419 * At the end of lexer_function call, cur_tok overlap with passed_tok.
420 *
421 * convention: I use "tr" for "tokens refs"
422 *
423 * I now also need this lexing trick because the lexer return comment
424 * tokens.
425 *)
426
427 type tokens_state = {
428 mutable rest : Parser_c.token list;
429 mutable rest_clean : Parser_c.token list;
430 mutable current : Parser_c.token;
431 (* it's passed since last "checkpoint", not passed from the beginning *)
432 mutable passed : Parser_c.token list;
433 mutable passed_clean : Parser_c.token list;
434 }
435
436 let mk_tokens_state toks =
437 {
438 rest = toks;
439 rest_clean = (toks +> List.filter TH.is_not_comment);
440 current = (List.hd toks);
441 passed = [];
442 passed_clean = [];
443 }
444
445
446
447 let clone_tokens_state tr =
448 { rest = tr.rest;
449 rest_clean = tr.rest_clean;
450 current = tr.current;
451 passed = tr.passed;
452 passed_clean = tr.passed_clean;
453 }
454 let copy_tokens_state ~src ~dst =
455 dst.rest <- src.rest;
456 dst.rest_clean <- src.rest_clean;
457 dst.current <- src.current;
458 dst.passed <- src.passed;
459 dst.passed_clean <- src.passed_clean;
460 ()
461
462 (* todo? agglomerate the x##b ? *)
463 let rec filter_noise n xs =
464 match n, xs with
465 | _, [] -> []
466 | 0, xs -> xs
467 | n, x::xs ->
468 (match x with
469 | Parser_c.TMacroAttr _ ->
470 filter_noise (n-1) xs
471 | _ ->
472 x::filter_noise (n-1) xs
473 )
474
475 let clean_for_lookahead xs =
476 match xs with
477 | [] -> []
478 | [x] -> [x]
479 | x::xs ->
480 x::filter_noise 10 xs
481
482
483
484 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
485 * tr means token refs.
486 *)
487 let rec lexer_function ~pass tr = fun lexbuf ->
488 match tr.rest with
489 | [] -> pr2_err "ALREADY AT END"; tr.current
490 | v::xs ->
491 tr.rest <- xs;
492 tr.current <- v;
493
494 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
495
496 if TH.is_comment v
497 then begin
498 tr.passed <- v::tr.passed;
499 lexer_function ~pass tr lexbuf
500 end
501 else begin
502 let x = List.hd tr.rest_clean in
503 tr.rest_clean <- List.tl tr.rest_clean;
504 assert (x =*= v);
505
506 (match v with
507
508 (* fix_define1.
509 *
510 * Why not in parsing_hacks lookahead and do passing like
511 * I do for some ifdef directives ? Because here I also need to
512 * generate some tokens sometimes and so I need access to the
513 * tr.passed, tr.rest, etc.
514 *)
515 | Parser_c.TDefine (tok) ->
516 if not (LP.current_context () =*= LP.InTopLevel) &&
517 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
518 then begin
519 incr Stat.nDefinePassing;
520 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
521 let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
522 in
523 tr.passed <- v'::tr.passed;
524 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
525 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
526 lexer_function ~pass tr lexbuf
527 end
528 else begin
529 tr.passed <- v::tr.passed;
530 tr.passed_clean <- v::tr.passed_clean;
531 v
532 end
533
534 | Parser_c.TInclude (includes, filename, inifdef, info) ->
535 if not (LP.current_context () =*= LP.InTopLevel) &&
536 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
537 then begin
538 incr Stat.nIncludePassing;
539 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
540 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
541 tr.passed <- v::tr.passed;
542 lexer_function ~pass tr lexbuf
543 end
544 else begin
545 let (v,new_tokens) =
546 Parsing_hacks.tokens_include (info, includes, filename, inifdef) in
547 let new_tokens_clean =
548 new_tokens +> List.filter TH.is_not_comment in
549
550 tr.passed <- v::tr.passed;
551 tr.passed_clean <- v::tr.passed_clean;
552 tr.rest <- new_tokens ++ tr.rest;
553 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
554 v
555 end
556
557 | _ ->
558
559 (* typedef_fix1 *)
560 let v = match v with
561 | Parser_c.TIdent (s, ii) ->
562 if
563 LP.is_typedef s &&
564 not (!Flag_parsing_c.disable_add_typedef) &&
565 pass =|= 1
566 then Parser_c.TypedefIdent (s, ii)
567 else Parser_c.TIdent (s, ii)
568 | x -> x
569 in
570
571 let v = Parsing_hacks.lookahead ~pass
572 (clean_for_lookahead (v::tr.rest_clean))
573 tr.passed_clean in
574
575 tr.passed <- v::tr.passed;
576
577 (* the lookahead may have changed the status of the token and
578 * consider it as a comment, for instance some #include are
579 * turned into comments, hence this code. *)
580 match v with
581 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
582 | v ->
583 tr.passed_clean <- v::tr.passed_clean;
584 v
585 )
586 end
587
588
589 let max_pass = 4
590
591
592 let get_one_elem ~pass tr (file, filelines) =
593
594 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
595 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
596
597 (* normally have to do that only when come from an exception in which
598 * case the dt() may not have been done
599 * TODO but if was in scoped scope ? have to let only the last scope
600 * so need do a LP.lexer_reset_typedef ();
601 *)
602 LP.enable_typedef();
603 LP._lexer_hint := (LP.default_hint ());
604 LP.save_typedef_state();
605
606 tr.passed <- [];
607
608 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
609
610 (try
611 (* -------------------------------------------------- *)
612 (* Call parser *)
613 (* -------------------------------------------------- *)
614 Common.profile_code_exclusif "YACC" (fun () ->
615 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
616 )
617 with e ->
618 LP.restore_typedef_state();
619
620 (* must keep here, before the code that adjusts the tr fields *)
621 let line_error = TH.line_of_tok tr.current in
622
623 let passed_before_error = tr.passed in
624 let current = tr.current in
625
626 (* error recovery, go to next synchro point *)
627 let (passed', rest') =
628 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
629 tr.rest <- rest';
630 tr.passed <- passed';
631
632 tr.current <- List.hd passed';
633 tr.passed_clean <- []; (* enough ? *)
634 (* with error recovery, rest and rest_clean may not be in sync *)
635 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
636
637
638 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
639 Right (info_of_bads, line_error,
640 tr.passed, passed_before_error,
641 current, e)
642 )
643
644
645
646 (* Macro problem recovery *)
647 (* used by the multi-pass error recovery expand-on-demand *)
648 (*
649 val candidate_macros_in_passed:
650 defs: (string, define_def) Hashtbl.t ->
651 Parser_c.token list -> (string * define_def) list
652 *)
653
654 let candidate_macros_in_passed2 ~defs passed =
655 let res = ref [] in
656 let res2 = ref [] in
657
658 passed +> List.iter (function
659 | Parser_c.TIdent (s,_)
660 (* bugfix: may have to undo some infered things *)
661 | Parser_c.TMacroIterator (s,_)
662 | Parser_c.TypedefIdent (s,_)
663 ->
664 (match Common.hfind_option s defs with
665 | Some def ->
666 if s ==~ Parsing_hacks.regexp_macro
667 then
668 (* pr2 (spf "candidate: %s" s); *)
669 Common.push2 (s, def) res
670 else
671 Common.push2 (s, def) res2
672 | None -> ()
673 )
674
675 | _ -> ()
676 );
677 if null !res
678 then !res2
679 else !res
680
681 let candidate_macros_in_passed ~defs b =
682 Common.profile_code "MACRO managment" (fun () ->
683 candidate_macros_in_passed2 ~defs b)
684
685
686
687
688
689 let find_optional_macro_to_expand2 ~defs toks =
690
691 let defs = Common.hash_of_list defs in
692
693 let toks = toks +> Common.map (function
694
695 (* special cases to undo *)
696 | Parser_c.TMacroIterator (s, ii) ->
697 if Hashtbl.mem defs s
698 then Parser_c.TIdent (s, ii)
699 else Parser_c.TMacroIterator (s, ii)
700
701 | Parser_c.TypedefIdent (s, ii) ->
702 if Hashtbl.mem defs s
703 then Parser_c.TIdent (s, ii)
704 else Parser_c.TypedefIdent (s, ii)
705
706 | x -> x
707 ) in
708
709 let tokens = toks in
710 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
711
712 (* just calling apply_macro_defs and having a specialized version
713 * of the code in fix_tokens_cpp is not enough as some work such
714 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
715 * will not get the chance to be run on the new expanded tokens.
716 * Hence even if it's expensive, it's currently better to
717 * just call directly fix_tokens_cpp again here.
718
719 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
720 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
721 let paren_grouped = TV.mk_parenthised cleaner in
722 Cpp_token_c.apply_macro_defs
723 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
724 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
725 defs paren_grouped;
726 (* because the before field is used by apply_macro_defs *)
727 tokens2 := TV.rebuild_tokens_extented !tokens2;
728 Parsing_hacks.insert_virtual_positions
729 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
730 *)
731 let find_optional_macro_to_expand ~defs a =
732 Common.profile_code "MACRO managment" (fun () ->
733 find_optional_macro_to_expand2 ~defs a)
734
735
736
737
738
739 (*****************************************************************************)
740 (* Main entry points *)
741 (*****************************************************************************)
742
743 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
744 ref (Hashtbl.create 101)
745
746 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
747 ref (Hashtbl.create 101)
748
749
750 (* can not be put in parsing_hack, cos then mutually recursive problem as
751 * we also want to parse the standard.h file.
752 *)
753 let init_defs_macros std_h =
754 if not (Common.lfile_exists std_h)
755 then pr2 ("warning: Can't find default macro file: " ^ std_h)
756 else begin
757 pr2 ("init_defs: " ^ std_h);
758 _defs := Common.hash_of_list (extract_macros std_h);
759 end
760
761 let init_defs_builtins file_h =
762 if not (Common.lfile_exists file_h)
763 then pr2 ("warning: Can't find macro file: " ^ file_h)
764 else begin
765 pr2 ("init_defs_builtins: " ^ file_h);
766 _defs_builtins :=
767 Common.hash_of_list (extract_macros file_h);
768 end
769
770
771
772 type info_item = string * Parser_c.token list
773
774 type program2 = toplevel2 list
775 and toplevel2 = Ast_c.toplevel * info_item
776
777 let program_of_program2 xs =
778 xs +> List.map fst
779
780 let with_program2 f program2 =
781 program2
782 +> Common.unzip
783 +> (fun (program, infos) ->
784 f program, infos
785 )
786 +> Common.uncurry Common.zip
787
788
789
790
791
792
793 (* note: as now we go in 2 passes, there is first all the error message of
794 * the lexer, and then the error of the parser. It is not anymore
795 * interwinded.
796 *
797 * !!!This function use refs, and is not reentrant !!! so take care.
798 * It use globals defined in Lexer_parser and also the _defs global
799 * in parsing_hack.ml.
800 *
801 * This function uses internally some semi globals in the
802 * tokens_stat record and parsing_stat record.
803 *)
804
805 let parse_print_error_heuristic2 file =
806
807 let filelines = Common.cat_array file in
808 let stat = Parsing_stat.default_stat file in
809
810 (* -------------------------------------------------- *)
811 (* call lexer and get all the tokens *)
812 (* -------------------------------------------------- *)
813 LP.lexer_reset_typedef();
814 Parsing_hacks.ifdef_paren_cnt := 0;
815
816 let toks_orig = tokens file in
817 let toks = Parsing_hacks.fix_tokens_define toks_orig in
818 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
819
820 (* expand macros on demand trick, preparation phase *)
821 let macros =
822 Common.profile_code "MACRO mgmt prep 1" (fun () ->
823 let macros = Hashtbl.copy !_defs in
824 (* include also builtins as some macros may generate some builtins too
825 * like __decl_spec or __stdcall
826 *)
827 !_defs_builtins +> Hashtbl.iter (fun s def ->
828 Hashtbl.replace macros s def;
829 );
830 macros
831 )
832 in
833 Common.profile_code "MACRO mgmt prep 2" (fun () ->
834 let local_macros = extract_macros file in
835 local_macros +> List.iter (fun (s, def) ->
836 Hashtbl.replace macros s def;
837 );
838 );
839
840 let tr = mk_tokens_state toks in
841
842 let rec loop tr =
843
844 (* todo?: I am not sure that it represents current_line, cos maybe
845 * tr.current partipated in the previous parsing phase, so maybe tr.current
846 * is not the first token of the next parsing phase. Same with checkpoint2.
847 * It would be better to record when we have a } or ; in parser.mly,
848 * cos we know that they are the last symbols of external_declaration2.
849 *
850 * bugfix: may not be equal to 'file' as after macro expansions we can
851 * start to parse a new entity from the body of a macro, for instance
852 * when parsing a define_machine() body, cf standard.h
853 *)
854 let checkpoint = TH.line_of_tok tr.current in
855 let checkpoint_file = TH.file_of_tok tr.current in
856
857 (* call the parser *)
858 let elem =
859 let pass1 =
860 Common.profile_code "Parsing: 1st pass" (fun () ->
861 get_one_elem ~pass:1 tr (file, filelines)
862 ) in
863 match pass1 with
864 | Left e -> Left e
865 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
866 if !Flag_parsing_c.disable_multi_pass
867 then pass1
868 else begin
869 Common.profile_code "Parsing: multi pass" (fun () ->
870
871 pr2_err "parsing pass2: try again";
872 let toks = List.rev passed ++ tr.rest in
873 let new_tr = mk_tokens_state toks in
874 copy_tokens_state ~src:new_tr ~dst:tr;
875 let passx = get_one_elem ~pass:2 tr (file, filelines) in
876
877 (match passx with
878 | Left e -> passx
879 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
880 let candidates =
881 candidate_macros_in_passed ~defs:macros passed
882 in
883
884
885 if is_define_passed passed || null candidates
886 then passx
887 else begin
888 (* todo factorize code *)
889
890 pr2_err "parsing pass3: try again";
891 let toks = List.rev passed ++ tr.rest in
892 let toks' =
893 find_optional_macro_to_expand ~defs:candidates toks in
894 let new_tr = mk_tokens_state toks' in
895 copy_tokens_state ~src:new_tr ~dst:tr;
896 let passx = get_one_elem ~pass:3 tr (file, filelines) in
897
898 (match passx with
899 | Left e -> passx
900 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
901 pr2_err "parsing pass4: try again";
902
903 let candidates =
904 candidate_macros_in_passed
905 ~defs:macros passed
906 in
907
908 let toks = List.rev passed ++ tr.rest in
909 let toks' =
910 find_optional_macro_to_expand ~defs:candidates toks in
911 let new_tr = mk_tokens_state toks' in
912 copy_tokens_state ~src:new_tr ~dst:tr;
913 let passx = get_one_elem ~pass:4 tr (file, filelines) in
914 passx
915 )
916 end
917 )
918 )
919 end
920 in
921
922
923 (* again not sure if checkpoint2 corresponds to end of bad region *)
924 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
925 let checkpoint2_file = TH.file_of_tok tr.current in
926
927 let diffline =
928 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
929 then (checkpoint2 - checkpoint)
930 else 0
931 (* TODO? so if error come in middle of something ? where the
932 * start token was from original file but synchro found in body
933 * of macro ? then can have wrong number of lines stat.
934 * Maybe simpler just to look at tr.passed and count
935 * the lines in the token from the correct file ?
936 *)
937 in
938 let info = mk_info_item file (List.rev tr.passed) in
939
940 (* some stat updates *)
941 stat.Stat.commentized <-
942 stat.Stat.commentized + count_lines_commentized (snd info);
943
944 let elem =
945 match elem with
946 | Left e ->
947 stat.Stat.correct <- stat.Stat.correct + diffline;
948 e
949 | Right (info_of_bads, line_error, toks_of_bads,
950 _passed_before_error, cur, exn) ->
951
952 let was_define = is_define_passed tr.passed in
953
954 if was_define && !Flag_parsing_c.filter_msg_define_error
955 then ()
956 else begin
957
958 (match exn with
959 | Lexer_c.Lexical _
960 | Parsing.Parse_error
961 | Semantic_c.Semantic _ -> ()
962 | e -> raise e
963 );
964
965 if !Flag_parsing_c.show_parsing_error
966 then begin
967 (match exn with
968 (* Lexical is not anymore launched I think *)
969 | Lexer_c.Lexical s ->
970 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
971 | Parsing.Parse_error ->
972 pr2 ("parse error \n = " ^ error_msg_tok cur)
973 | Semantic_c.Semantic (s, i) ->
974 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
975 | e -> raise Impossible
976 );
977 (* bugfix: *)
978 if (checkpoint_file =$= checkpoint2_file) &&
979 checkpoint_file =$= file
980 then print_bad line_error (checkpoint, checkpoint2) filelines
981 else pr2 "PB: bad: but on tokens not from original file"
982 end;
983
984
985 let pbline =
986 toks_of_bads
987 +> Common.filter (TH.is_same_line_or_close line_error)
988 +> Common.filter TH.is_ident_like
989 in
990 let error_info =
991 (pbline +> List.map TH.str_of_tok), line_error
992 in
993 stat.Stat.problematic_lines <-
994 error_info::stat.Stat.problematic_lines;
995
996 end;
997
998 if was_define && !Flag_parsing_c.filter_define_error
999 then stat.Stat.correct <- stat.Stat.correct + diffline
1000 else stat.Stat.bad <- stat.Stat.bad + diffline;
1001
1002 Ast_c.NotParsedCorrectly info_of_bads
1003 in
1004
1005 (match elem with
1006 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1007 | xs -> (xs, info):: loop tr (* recurse *)
1008 )
1009 in
1010 let v = loop tr in
1011 let v = with_program2 Parsing_consistency_c.consistency_checking v in
1012 (v, stat)
1013
1014
1015 let time_total_parsing a =
1016 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1017
1018 let parse_print_error_heuristic a =
1019 Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1020
1021
1022 (* alias *)
1023 let parse_c_and_cpp a = parse_print_error_heuristic a
1024
1025 (*****************************************************************************)
1026 (* Same but faster cos memoize stuff *)
1027 (*****************************************************************************)
1028 let parse_cache file =
1029 if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1030 else
1031 let _ = pr2 "TOFIX" in
1032 let need_no_changed_files =
1033 (* should use Sys.argv.(0), would be safer. *)
1034
1035 [
1036 (* TOFIX
1037 Config.path ^ "/parsing_c/c_parser.cma";
1038 (* we may also depend now on the semantic patch because
1039 the SP may use macro and so we will disable some of the
1040 macro expansions from standard.h.
1041 *)
1042 !Config.std_h;
1043 *)
1044 ]
1045 in
1046 let need_no_changed_variables =
1047 (* could add some of the flags of flag_parsing_c.ml *)
1048 []
1049 in
1050 Common.cache_computation_robust
1051 file ".ast_raw"
1052 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1053 (fun () -> parse_print_error_heuristic file)
1054
1055
1056
1057 (*****************************************************************************)
1058 (* Some special cases *)
1059 (*****************************************************************************)
1060
1061 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1062 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1063 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1064 let program = parse_c_and_cpp tmpfile +> fst in
1065 program +> Common.find_some (fun (e,_) ->
1066 match e with
1067 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1068 | _ -> None
1069 )
1070
1071 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1072 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1073 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1074 let program = parse_c_and_cpp tmpfile +> fst in
1075 program +> Common.find_some (fun (e,_) ->
1076 match e with
1077 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1078 (match compound with
1079 | [Ast_c.StmtElem st] ->
1080 (match Ast_c.unwrap_st st with
1081 | Ast_c.ExprStatement (Some e) -> Some e
1082 | _ -> None
1083 )
1084 | _ -> None
1085 )
1086 | _ -> None
1087 )