Coccinelle release-1.0.0-rc11
[bpt/coccinelle.git] / parsing_c / parse_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 module TH = Token_helpers
19 module LP = Lexer_parser
20
21 module Stat = Parsing_stat
22
23 (*****************************************************************************)
24 (* Wrappers *)
25 (*****************************************************************************)
26 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
28 (*****************************************************************************)
29 (* Helpers *)
30 (*****************************************************************************)
31
32 let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34
35 let token_to_strpos tok =
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
39 let mk_info_item2 filename toks =
40 let buf = Buffer.create 100 in
41 let s =
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
44 toks +> List.iter (fun tok ->
45 match TH.pinfo_of_tok tok with
46 | Ast_c.OriginTok _ ->
47 Buffer.add_string buf (TH.str_of_tok tok)
48 | Ast_c.AbstractLineTok _ ->
49 raise Impossible
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
55 (s, toks)
56
57 let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
59 (fun () -> mk_info_item2 a b)
60
61
62 let info_same_line line xs =
63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
64
65
66 (* move in cpp_token_c ? *)
67 let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
69 if List.length xs >= 2
70 then
71 (match Common.head_middle_tail xs with
72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
78 false
79 end
80
81
82 (*****************************************************************************)
83 (* Error diagnostic *)
84 (*****************************************************************************)
85
86 let error_msg_tok tok =
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
89 then Common.error_message file (token_to_strpos tok)
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
93 let print_bad line_error (start_line, end_line) filelines =
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
97 for i = start_line to end_line do
98 let line = filelines.(i) in
99
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
103 done
104 end
105
106
107 (*****************************************************************************)
108 (* Stats on what was passed/commentized *)
109 (*****************************************************************************)
110
111 let commentized xs = xs +> Common.tail_map_filter (function
112 | Parser_c.TCommentCpp (cppkind, ii) ->
113 let s = Ast_c.str_of_info ii in
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
116 | 0 -> false
117 | 1 ->
118 List.mem cppkind [Token_c.CppAttr]
119 ||
120 (s =~ "__.*")
121 | 2 ->
122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
123 ||
124 (s =~ "__.*")
125 | 3 ->
126 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
127 ||
128 (s =~ "__.*")
129 | 4 ->
130 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
131 ||
132 (s =~ "__.*")
133
134
135 | 5 ->
136 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
137 ||
138 (s =~ "__.*")
139
140
141
142
143 | _ -> failwith "not valid level passing number"
144 in
145 if legal_passing then None else Some (ii.Ast_c.pinfo)
146
147 (*
148 | Ast_c.CppOther ->
149 (match s with
150 | s when s =~ "KERN_.*" -> None
151 | s when s =~ "__.*" -> None
152 | _ ->
153 Some (ii.Ast_c.pinfo)
154 )
155 *)
156
157
158 | Parser_c.TCommentMisc ii
159 | Parser_c.TAction ii
160 ->
161 Some (ii.Ast_c.pinfo)
162 | _ ->
163 None
164 )
165
166 let count_lines_commentized xs =
167 let line = ref (-1) in
168 let count = ref 0 in
169 begin
170 commentized xs +>
171 List.iter
172 (function
173 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
174 let newline = pinfo.Common.line in
175 if newline <> !line
176 then begin
177 line := newline;
178 incr count
179 end
180 | _ -> ());
181 !count
182 end
183
184
185
186 let print_commentized xs =
187 let line = ref (-1) in
188 begin
189 let ys = commentized xs in
190 ys +>
191 List.iter
192 (function
193 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
194 let newline = pinfo.Common.line in
195 let s = pinfo.Common.str in
196 let s = Str.global_substitute
197 (Str.regexp "\n") (fun s -> "") s
198 in
199 if newline =|= !line
200 then prerr_string (s ^ " ")
201 else begin
202 if !line =|= -1
203 then pr2_no_nl "passed:"
204 else pr2_no_nl "\npassed:";
205 line := newline;
206 pr2_no_nl (s ^ " ");
207 end
208 | _ -> ());
209 if not (null ys) then pr2 "";
210 end
211
212
213
214
215 (*****************************************************************************)
216 (* Lexing only *)
217 (*****************************************************************************)
218
219 (* called by parse_print_error_heuristic *)
220 let tokens2 file =
221 let table = Common.full_charpos_to_pos_large file in
222
223 Common.with_open_infile file (fun chan ->
224 let lexbuf = Lexing.from_channel chan in
225 try
226 let rec tokens_aux acc =
227 let tok = Lexer_c.token lexbuf in
228 (* fill in the line and col information *)
229 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
230 { ii with Ast_c.pinfo=
231 (* could assert pinfo.filename = file ? *)
232 match Ast_c.pinfo_of_info ii with
233 Ast_c.OriginTok pi ->
234 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
235 | Ast_c.ExpandedTok (pi,vpi) ->
236 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
237 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
238 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
239 })
240 in
241
242 if TH.is_eof tok
243 then List.rev (tok::acc)
244 else tokens_aux (tok::acc)
245 in
246 tokens_aux []
247 with
248 | Lexer_c.Lexical s ->
249 failwith ("lexical error " ^ s ^ "\n =" ^
250 (Common.error_message file (lexbuf_to_strpos lexbuf)))
251 | e -> raise e
252 )
253
254 let time_lexing ?(profile=true) a =
255 if profile
256 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
257 else tokens2 a
258 let tokens ?profile a =
259 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
260
261
262 let tokens_of_string string =
263 let lexbuf = Lexing.from_string string in
264 try
265 let rec tokens_s_aux () =
266 let tok = Lexer_c.token lexbuf in
267 if TH.is_eof tok
268 then [tok]
269 else tok::(tokens_s_aux ())
270 in
271 tokens_s_aux ()
272 with
273 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
274 | e -> raise e
275
276
277 (*****************************************************************************)
278 (* Parsing, but very basic, no more used *)
279 (*****************************************************************************)
280
281 (*
282 * !!!Those function use refs, and are not reentrant !!! so take care.
283 * It use globals defined in Lexer_parser.
284 *
285 * update: because now lexer return comments tokens, those functions
286 * may not work anymore.
287 *)
288
289 let parse file =
290 let lexbuf = Lexing.from_channel (open_in file) in
291 let result = Parser_c.main Lexer_c.token lexbuf in
292 result
293
294
295 let parse_print_error file =
296 let chan = (open_in file) in
297 let lexbuf = Lexing.from_channel chan in
298
299 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
300 try
301 lexbuf +> Parser_c.main Lexer_c.token
302 with
303 | Lexer_c.Lexical s ->
304 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
305 | Parsing.Parse_error ->
306 failwith ("parse error \n = " ^ error_msg ())
307 | Semantic_c.Semantic (s, i) ->
308 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
309 | e -> raise e
310
311
312
313
314 (*****************************************************************************)
315 (* Parsing subelements, useful to debug parser *)
316 (*****************************************************************************)
317
318 (*
319 * !!!Those function use refs, and are not reentrant !!! so take care.
320 * It use globals defined in Lexer_parser.
321 *)
322
323
324 (* old:
325 * let parse_gen parsefunc s =
326 * let lexbuf = Lexing.from_string s in
327 * let result = parsefunc Lexer_c.token lexbuf in
328 * result
329 *)
330
331 let parse_gen parsefunc s =
332 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
333
334
335 (* Why use this lexing scheme ? Why not classically give lexer func
336 * to parser ? Because I now keep comments in lexer. Could
337 * just do a simple wrapper that when comment ask again for a token,
338 * but maybe simpler to use cur_tok technique.
339 *)
340 let all_tokens = ref toks in
341 let cur_tok = ref (List.hd !all_tokens) in
342
343 let lexer_function =
344 (fun _ ->
345 if TH.is_eof !cur_tok
346 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
347 else
348 let v = Common.pop2 all_tokens in
349 cur_tok := v;
350 !cur_tok
351 )
352 in
353 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
354 let result = parsefunc lexer_function lexbuf_fake in
355 result
356
357
358 let type_of_string = parse_gen Parser_c.type_name
359 let statement_of_string = parse_gen Parser_c.statement
360 let expression_of_string = parse_gen Parser_c.expr
361
362 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
363
364
365
366
367
368 (*****************************************************************************)
369 (* Parsing default define macros, usually in a standard.h file *)
370 (*****************************************************************************)
371
372 let extract_macros2 file =
373 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
374 Flag_parsing_c.verbose_lexing := false;
375 let toks = tokens ~profile:false file in
376 let toks = Parsing_hacks.fix_tokens_define toks in
377 Cpp_token_c.extract_macros toks
378 )
379
380 let extract_macros a =
381 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
382
383
384 (*****************************************************************************)
385 (* Helper for main entry point *)
386 (*****************************************************************************)
387
388
389 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
390 * possible error recovery. Indeed, they allow to skip some tokens and
391 * still be able to call again the ocamlyacc parser. It is ugly code
392 * because we cant modify ocamllex and ocamlyacc. As we want some
393 * extended lexing tricks, we have to use such refs.
394 *
395 * Those refs are now also used for my lalr(k) technique. Indeed They
396 * store the futur and previous tokens that were parsed, and so
397 * provide enough context information for powerful lex trick.
398 *
399 * - passed_tokens_last_ckp stores the passed tokens since last
400 * checkpoint. Used for NotParsedCorrectly and also to build the
401 * info_item attached to each program_element.
402 * - passed_tokens_clean is used for lookahead, in fact for lookback.
403 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
404 * contain some comments and so would make pattern matching difficult
405 * in lookahead. Hence this variable. We would like also to get rid
406 * of cpp instruction because sometimes a cpp instruction is between
407 * two tokens and makes a pattern matching fail. But lookahead also
408 * transform some cpp instruction (in comment) so can't remove them.
409 *
410 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
411 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
412 * comment-tokens.
413 *
414 * Normally we have:
415 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
416 * after the call to pop2.
417 * toks = (reverse passed_tok) ++ remaining_tokens
418 * at the and of the lexer_function call.
419 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
420 * At the end of lexer_function call, cur_tok overlap with passed_tok.
421 *
422 * convention: I use "tr" for "tokens refs"
423 *
424 * I now also need this lexing trick because the lexer return comment
425 * tokens.
426 *)
427
428 type tokens_state = {
429 mutable rest : Parser_c.token list;
430 mutable rest_clean : Parser_c.token list;
431 mutable current : Parser_c.token;
432 (* it's passed since last "checkpoint", not passed from the beginning *)
433 mutable passed : Parser_c.token list;
434 mutable passed_clean : Parser_c.token list;
435 }
436
437 let mk_tokens_state toks =
438 {
439 rest = toks;
440 rest_clean = (toks +> List.filter TH.is_not_comment);
441 current = (List.hd toks);
442 passed = [];
443 passed_clean = [];
444 }
445
446
447
448 let clone_tokens_state tr =
449 { rest = tr.rest;
450 rest_clean = tr.rest_clean;
451 current = tr.current;
452 passed = tr.passed;
453 passed_clean = tr.passed_clean;
454 }
455 let copy_tokens_state ~src ~dst =
456 dst.rest <- src.rest;
457 dst.rest_clean <- src.rest_clean;
458 dst.current <- src.current;
459 dst.passed <- src.passed;
460 dst.passed_clean <- src.passed_clean;
461 ()
462
463 (* todo? agglomerate the x##b ? *)
464 let rec filter_noise n xs =
465 match n, xs with
466 | _, [] -> []
467 | 0, xs -> xs
468 | n, x::xs ->
469 (match x with
470 | Parser_c.TMacroAttr _ ->
471 filter_noise (n-1) xs
472 | _ ->
473 x::filter_noise (n-1) xs
474 )
475
476 let clean_for_lookahead xs =
477 match xs with
478 | [] -> []
479 | [x] -> [x]
480 | x::xs ->
481 x::filter_noise 10 xs
482
483
484
485 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
486 * tr means token refs.
487 *)
488 let rec lexer_function ~pass tr = fun lexbuf ->
489 match tr.rest with
490 | [] -> pr2_err "ALREADY AT END"; tr.current
491 | v::xs ->
492 tr.rest <- xs;
493 tr.current <- v;
494
495 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
496
497 if TH.is_comment v
498 then begin
499 tr.passed <- v::tr.passed;
500 lexer_function ~pass tr lexbuf
501 end
502 else begin
503 let x = List.hd tr.rest_clean in
504 tr.rest_clean <- List.tl tr.rest_clean;
505 assert (x =*= v);
506
507 (match v with
508
509 (* fix_define1.
510 *
511 * Why not in parsing_hacks lookahead and do passing like
512 * I do for some ifdef directives ? Because here I also need to
513 * generate some tokens sometimes and so I need access to the
514 * tr.passed, tr.rest, etc.
515 *)
516 | Parser_c.TDefine (tok) ->
517 if not (LP.current_context () =*= LP.InTopLevel) &&
518 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
519 then begin
520 incr Stat.nDefinePassing;
521 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
522 let v' =
523 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
524 in
525 tr.passed <- v'::tr.passed;
526 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
527 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
528 lexer_function ~pass tr lexbuf
529 end
530 else begin
531 tr.passed <- v::tr.passed;
532 tr.passed_clean <- v::tr.passed_clean;
533 v
534 end
535
536 | Parser_c.TUndef (tok) ->
537 if not (LP.current_context () =*= LP.InTopLevel) &&
538 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
539 then begin
540 incr Stat.nUndefPassing;
541 pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
542 let v' =
543 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
544 in
545 tr.passed <- v'::tr.passed;
546 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
547 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
548 lexer_function ~pass tr lexbuf
549 end
550 else begin
551 tr.passed <- v::tr.passed;
552 tr.passed_clean <- v::tr.passed_clean;
553 v
554 end
555
556 | Parser_c.TInclude (includes, filename, inifdef, info) ->
557 if not (LP.current_context () =*= LP.InTopLevel) &&
558 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
559 then begin
560 incr Stat.nIncludePassing;
561 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
562 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
563 tr.passed <- v::tr.passed;
564 lexer_function ~pass tr lexbuf
565 end
566 else begin
567 let (v,new_tokens) =
568 Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
569 let new_tokens_clean =
570 new_tokens +> List.filter TH.is_not_comment in
571
572 tr.passed <- v::tr.passed;
573 tr.passed_clean <- v::tr.passed_clean;
574 tr.rest <- new_tokens ++ tr.rest;
575 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
576 v
577 end
578
579 | _ ->
580
581 (* typedef_fix1 *)
582 let v = match v with
583 | Parser_c.TIdent (s, ii) ->
584 if
585 LP.is_typedef s &&
586 not (!Flag_parsing_c.disable_add_typedef) &&
587 pass =|= 1
588 then Parser_c.TypedefIdent (s, ii)
589 else Parser_c.TIdent (s, ii)
590 | x -> x
591 in
592
593 let v = Parsing_hacks.lookahead ~pass
594 (clean_for_lookahead (v::tr.rest_clean))
595 tr.passed_clean in
596
597 tr.passed <- v::tr.passed;
598
599 (* the lookahead may have changed the status of the token and
600 * consider it as a comment, for instance some #include are
601 * turned into comments, hence this code. *)
602 match v with
603 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
604 | v ->
605 tr.passed_clean <- v::tr.passed_clean;
606 v
607 )
608 end
609
610
611 let max_pass = 4
612
613
614 let get_one_elem ~pass tr (file, filelines) =
615
616 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
617 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
618
619 (* normally have to do that only when come from an exception in which
620 * case the dt() may not have been done
621 * TODO but if was in scoped scope ? have to let only the last scope
622 * so need do a LP.lexer_reset_typedef ();
623 *)
624 LP.enable_typedef();
625 LP._lexer_hint := (LP.default_hint ());
626 LP.save_typedef_state();
627
628 tr.passed <- [];
629
630 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
631
632 (try
633 (* -------------------------------------------------- *)
634 (* Call parser *)
635 (* -------------------------------------------------- *)
636 Common.profile_code_exclusif "YACC" (fun () ->
637 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
638 )
639 with e ->
640 LP.restore_typedef_state();
641
642 (* must keep here, before the code that adjusts the tr fields *)
643 let line_error = TH.line_of_tok tr.current in
644
645 let passed_before_error = tr.passed in
646 let current = tr.current in
647 (* error recovery, go to next synchro point *)
648 let (passed', rest') =
649 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
650 tr.rest <- rest';
651 tr.passed <- passed';
652
653 tr.current <- List.hd passed';
654 tr.passed_clean <- []; (* enough ? *)
655 (* with error recovery, rest and rest_clean may not be in sync *)
656 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
657
658
659 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
660 Right (info_of_bads, line_error,
661 tr.passed, passed_before_error,
662 current, e)
663 )
664
665
666
667 (* Macro problem recovery *)
668 (* used by the multi-pass error recovery expand-on-demand *)
669 (*
670 val candidate_macros_in_passed:
671 defs: (string, define_def) Hashtbl.t ->
672 Parser_c.token list -> (string * define_def) list
673 *)
674
675 let candidate_macros_in_passed2 ~defs passed =
676 let res = ref [] in
677 let res2 = ref [] in
678
679 passed +> List.iter (function
680 | Parser_c.TIdent (s,_)
681 (* bugfix: may have to undo some infered things *)
682 | Parser_c.TMacroIterator (s,_)
683 | Parser_c.TypedefIdent (s,_)
684 ->
685 (match Common.hfind_option s defs with
686 | Some def ->
687 if s ==~ Parsing_hacks.regexp_macro
688 then
689 (* pr2 (spf "candidate: %s" s); *)
690 Common.push2 (s, def) res
691 else
692 Common.push2 (s, def) res2
693 | None -> ()
694 )
695
696 | _ -> ()
697 );
698 if null !res
699 then !res2
700 else !res
701
702 let candidate_macros_in_passed ~defs b =
703 Common.profile_code "MACRO managment" (fun () ->
704 candidate_macros_in_passed2 ~defs b)
705
706
707
708
709
710 let find_optional_macro_to_expand2 ~defs toks =
711
712 let defs = Common.hash_of_list defs in
713
714 let toks = toks +> Common.tail_map (function
715
716 (* special cases to undo *)
717 | Parser_c.TMacroIterator (s, ii) ->
718 if Hashtbl.mem defs s
719 then Parser_c.TIdent (s, ii)
720 else Parser_c.TMacroIterator (s, ii)
721
722 | Parser_c.TypedefIdent (s, ii) ->
723 if Hashtbl.mem defs s
724 then Parser_c.TIdent (s, ii)
725 else Parser_c.TypedefIdent (s, ii)
726
727 | x -> x
728 ) in
729
730 let tokens = toks in
731 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
732
733 (* just calling apply_macro_defs and having a specialized version
734 * of the code in fix_tokens_cpp is not enough as some work such
735 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
736 * will not get the chance to be run on the new expanded tokens.
737 * Hence even if it's expensive, it's currently better to
738 * just call directly fix_tokens_cpp again here.
739
740 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
741 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
742 let paren_grouped = TV.mk_parenthised cleaner in
743 Cpp_token_c.apply_macro_defs
744 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
745 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
746 defs paren_grouped;
747 (* because the before field is used by apply_macro_defs *)
748 tokens2 := TV.rebuild_tokens_extented !tokens2;
749 Parsing_hacks.insert_virtual_positions
750 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
751 *)
752 let find_optional_macro_to_expand ~defs a =
753 Common.profile_code "MACRO managment" (fun () ->
754 find_optional_macro_to_expand2 ~defs a)
755
756
757
758
759
760 (*****************************************************************************)
761 (* Main entry points *)
762 (*****************************************************************************)
763
764 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
765 ref (Hashtbl.create 101)
766
767 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
768 ref (Hashtbl.create 101)
769
770
771 (* can not be put in parsing_hack, cos then mutually recursive problem as
772 * we also want to parse the standard.h file.
773 *)
774 let init_defs_macros std_h =
775 if not (Common.lfile_exists std_h)
776 then pr2 ("warning: Can't find default macro file: " ^ std_h)
777 else begin
778 pr2 ("init_defs: " ^ std_h);
779 _defs := Common.hash_of_list (extract_macros std_h);
780 end
781
782 let init_defs_builtins file_h =
783 if not (Common.lfile_exists file_h)
784 then pr2 ("warning: Can't find macro file: " ^ file_h)
785 else begin
786 pr2 ("init_defs_builtins: " ^ file_h);
787 _defs_builtins :=
788 Common.hash_of_list (extract_macros file_h);
789 end
790
791
792
793 type info_item = string * Parser_c.token list
794
795 type program2 = toplevel2 list
796 and extended_program2 = toplevel2 list *
797 (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
798 (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
799 and toplevel2 = Ast_c.toplevel * info_item
800
801 let program_of_program2 xs =
802 xs +> List.map fst
803
804 let with_program2 f program2 =
805 program2
806 +> Common.unzip
807 +> (fun (program, infos) ->
808 f program, infos
809 )
810 +> Common.uncurry Common.zip
811
812
813
814
815
816
817 (* note: as now we go in 2 passes, there is first all the error message of
818 * the lexer, and then the error of the parser. It is not anymore
819 * interwinded.
820 *
821 * !!!This function use refs, and is not reentrant !!! so take care.
822 * It use globals defined in Lexer_parser and also the _defs global
823 * in parsing_hack.ml.
824 *
825 * This function uses internally some semi globals in the
826 * tokens_stat record and parsing_stat record.
827 *)
828
829 let parse_print_error_heuristic2 saved_typedefs saved_macros file =
830
831 let filelines = Common.cat_array file in
832 let stat = Parsing_stat.default_stat file in
833
834 (* -------------------------------------------------- *)
835 (* call lexer and get all the tokens *)
836 (* -------------------------------------------------- *)
837
838 LP.lexer_reset_typedef saved_typedefs;
839 Parsing_hacks.ifdef_paren_cnt := 0;
840
841 let toks_orig = tokens file in
842 let toks = Parsing_hacks.fix_tokens_define toks_orig in
843 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
844
845 (* expand macros on demand trick, preparation phase *)
846 let macros =
847 Common.profile_code "MACRO mgmt prep 1" (fun () ->
848 let macros =
849 match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
850 (* include also builtins as some macros may generate some builtins too
851 * like __decl_spec or __stdcall
852 *)
853 !_defs_builtins +> Hashtbl.iter (fun s def ->
854 Hashtbl.replace macros s def;
855 );
856 macros
857 )
858 in
859 Common.profile_code "MACRO mgmt prep 2" (fun () ->
860 let local_macros = extract_macros file in
861 local_macros +> List.iter (fun (s, def) ->
862 Hashtbl.replace macros s def;
863 );
864 );
865
866 let tr = mk_tokens_state toks in
867
868 let rec loop tr =
869
870 (* todo?: I am not sure that it represents current_line, cos maybe
871 * tr.current partipated in the previous parsing phase, so maybe tr.current
872 * is not the first token of the next parsing phase. Same with checkpoint2.
873 * It would be better to record when we have a } or ; in parser.mly,
874 * cos we know that they are the last symbols of external_declaration2.
875 *
876 * bugfix: may not be equal to 'file' as after macro expansions we can
877 * start to parse a new entity from the body of a macro, for instance
878 * when parsing a define_machine() body, cf standard.h
879 *)
880 let checkpoint = TH.line_of_tok tr.current in
881 let checkpoint_file = TH.file_of_tok tr.current in
882
883 (* call the parser *)
884 let elem =
885 let pass1 =
886 Common.profile_code "Parsing: 1st pass" (fun () ->
887 get_one_elem ~pass:1 tr (file, filelines)
888 ) in
889 match pass1 with
890 | Left e -> Left e
891 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
892 if !Flag_parsing_c.disable_multi_pass
893 then pass1
894 else begin
895 Common.profile_code "Parsing: multi pass" (fun () ->
896
897 pr2_err "parsing pass2: try again";
898 let toks = List.rev passed ++ tr.rest in
899 let new_tr = mk_tokens_state toks in
900 copy_tokens_state ~src:new_tr ~dst:tr;
901 let passx = get_one_elem ~pass:2 tr (file, filelines) in
902
903 (match passx with
904 | Left e -> passx
905 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
906 let candidates =
907 candidate_macros_in_passed ~defs:macros passed
908 in
909
910
911 if is_define_passed passed || null candidates
912 then passx
913 else begin
914 (* todo factorize code *)
915
916 pr2_err "parsing pass3: try again";
917 let toks = List.rev passed ++ tr.rest in
918 let toks' =
919 find_optional_macro_to_expand ~defs:candidates toks in
920 let new_tr = mk_tokens_state toks' in
921 copy_tokens_state ~src:new_tr ~dst:tr;
922 let passx = get_one_elem ~pass:3 tr (file, filelines) in
923
924 (match passx with
925 | Left e -> passx
926 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
927 pr2_err "parsing pass4: try again";
928
929 let candidates =
930 candidate_macros_in_passed
931 ~defs:macros passed
932 in
933
934 let toks = List.rev passed ++ tr.rest in
935 let toks' =
936 find_optional_macro_to_expand ~defs:candidates toks in
937 let new_tr = mk_tokens_state toks' in
938 copy_tokens_state ~src:new_tr ~dst:tr;
939 let passx = get_one_elem ~pass:4 tr (file, filelines) in
940 passx
941 )
942 end
943 )
944 )
945 end
946 in
947
948
949 (* again not sure if checkpoint2 corresponds to end of bad region *)
950 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
951 let checkpoint2_file = TH.file_of_tok tr.current in
952
953 let diffline =
954 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
955 then (checkpoint2 - checkpoint)
956 else 0
957 (* TODO? so if error come in middle of something ? where the
958 * start token was from original file but synchro found in body
959 * of macro ? then can have wrong number of lines stat.
960 * Maybe simpler just to look at tr.passed and count
961 * the lines in the token from the correct file ?
962 *)
963 in
964 let info = mk_info_item file (List.rev tr.passed) in
965
966 (* some stat updates *)
967 stat.Stat.commentized <-
968 stat.Stat.commentized + count_lines_commentized (snd info);
969
970 let elem =
971 match elem with
972 | Left e ->
973 stat.Stat.correct <- stat.Stat.correct + diffline;
974 e
975 | Right (info_of_bads, line_error, toks_of_bads,
976 _passed_before_error, cur, exn) ->
977
978 let was_define = is_define_passed tr.passed in
979
980 if was_define && !Flag_parsing_c.filter_msg_define_error
981 then ()
982 else begin
983
984 (match exn with
985 | Lexer_c.Lexical _
986 | Parsing.Parse_error
987 | Semantic_c.Semantic _ -> ()
988 | e -> raise e
989 );
990
991 if !Flag_parsing_c.show_parsing_error
992 then begin
993 (match exn with
994 (* Lexical is not anymore launched I think *)
995 | Lexer_c.Lexical s ->
996 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
997 | Parsing.Parse_error ->
998 pr2 ("parse error \n = " ^ error_msg_tok cur)
999 | Semantic_c.Semantic (s, i) ->
1000 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1001 | e -> raise Impossible
1002 );
1003 (* bugfix: *)
1004 if (checkpoint_file =$= checkpoint2_file) &&
1005 checkpoint_file =$= file
1006 then print_bad line_error (checkpoint, checkpoint2) filelines
1007 else pr2 "PB: bad: but on tokens not from original file"
1008 end;
1009
1010
1011 let pbline =
1012 toks_of_bads
1013 +> Common.filter (TH.is_same_line_or_close line_error)
1014 +> Common.filter TH.is_ident_like
1015 in
1016 let error_info =
1017 (pbline +> List.map TH.str_of_tok), line_error
1018 in
1019 stat.Stat.problematic_lines <-
1020 error_info::stat.Stat.problematic_lines;
1021
1022 end;
1023
1024 if was_define && !Flag_parsing_c.filter_define_error
1025 then stat.Stat.correct <- stat.Stat.correct + diffline
1026 else stat.Stat.bad <- stat.Stat.bad + diffline;
1027
1028 Ast_c.NotParsedCorrectly info_of_bads
1029 in
1030
1031 (match elem with
1032 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1033 | xs -> (xs, info):: loop tr (* recurse *)
1034 )
1035 in
1036 let v = loop tr in
1037 let v = with_program2 Parsing_consistency_c.consistency_checking v in
1038 let v =
1039 let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1040 Common.clean_scope_h new_td;
1041 (v, !new_td, macros) in
1042 (v, stat)
1043
1044
1045 let time_total_parsing a b =
1046 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
1047
1048 let parse_print_error_heuristic a b =
1049 Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
1050
1051
1052 (* alias *)
1053 let parse_c_and_cpp a =
1054 let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1055 let parse_c_and_cpp_keep_typedefs td macs a =
1056 parse_print_error_heuristic td macs a
1057
1058 (*****************************************************************************)
1059 (* Same but faster cos memoize stuff *)
1060 (*****************************************************************************)
1061 let parse_cache file =
1062 if not !Flag_parsing_c.use_cache
1063 then parse_print_error_heuristic None None file
1064 else
1065 let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
1066 let need_no_changed_files =
1067 (* should use Sys.argv.(0), would be safer. *)
1068
1069 [
1070 (* TOFIX
1071 Config.path ^ "/parsing_c/c_parser.cma";
1072 (* we may also depend now on the semantic patch because
1073 the SP may use macro and so we will disable some of the
1074 macro expansions from standard.h.
1075 *)
1076 !Config.std_h;
1077 *)
1078 ] in
1079 let need_no_changed_variables =
1080 (* could add some of the flags of flag_parsing_c.ml *)
1081 [] in
1082 Common.cache_computation_robust_in_dir
1083 !Flag_parsing_c.cache_prefix file ".ast_raw"
1084 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1085 (fun () ->
1086 (* check whether to clear the cache *)
1087 (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1088 (None,_) | (_,None) -> ()
1089 | (Some limit,Some prefix) ->
1090 let count =
1091 Common.cmd_to_list
1092 (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1093 prefix prefix) in
1094 match count with
1095 [c] ->
1096 if int_of_string c >= limit
1097 then
1098 let _ =
1099 Sys.command
1100 (Printf.sprintf
1101 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1102 prefix) in
1103 ()
1104 | _ -> ());
1105 (* recompute *)
1106 parse_print_error_heuristic None None file)
1107
1108
1109
1110 (*****************************************************************************)
1111 (* Some special cases *)
1112 (*****************************************************************************)
1113
1114 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1115 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1116 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1117 let program = parse_c_and_cpp tmpfile +> fst in
1118 program +> Common.find_some (fun (e,_) ->
1119 match e with
1120 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1121 | _ -> None
1122 )
1123
1124 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1125 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1126 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1127 let program = parse_c_and_cpp tmpfile +> fst in
1128 program +> Common.find_some (fun (e,_) ->
1129 match e with
1130 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1131 (match compound with
1132 | [Ast_c.StmtElem st] ->
1133 (match Ast_c.unwrap_st st with
1134 | Ast_c.ExprStatement (Some e) -> Some e
1135 | _ -> None
1136 )
1137 | _ -> None
1138 )
1139 | _ -> None
1140 )