permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / parse_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 module TH = Token_helpers
19 module LP = Lexer_parser
20
21 module Stat = Parsing_stat
22
23 (*****************************************************************************)
24 (* Wrappers *)
25 (*****************************************************************************)
26 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
28 (*****************************************************************************)
29 (* Helpers *)
30 (*****************************************************************************)
31
32 let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34
35 let token_to_strpos tok =
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
39 let mk_info_item2 filename toks =
40 let buf = Buffer.create 100 in
41 let s =
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
44 toks +> List.iter (fun tok ->
45 match TH.pinfo_of_tok tok with
46 | Ast_c.OriginTok _ ->
47 Buffer.add_string buf (TH.str_of_tok tok)
48 | Ast_c.AbstractLineTok _ ->
49 raise (Impossible 79)
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
55 (s, toks)
56
57 let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
59 (fun () -> mk_info_item2 a b)
60
61
62 let info_same_line line xs =
63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
64
65
66 (* move in cpp_token_c ? *)
67 let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
69 if List.length xs >= 2
70 then
71 (match Common.head_middle_tail xs with
72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
78 false
79 end
80
81
82 (*****************************************************************************)
83 (* Error diagnostic *)
84 (*****************************************************************************)
85
86 let error_msg_tok tok =
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
89 then Common.error_message file (token_to_strpos tok)
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
93 let print_bad line_error (start_line, end_line) filelines =
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
97 for i = start_line to end_line do
98 let line = filelines.(i) in
99
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
103 done
104 end
105
106
107 (*****************************************************************************)
108 (* Stats on what was passed/commentized *)
109 (*****************************************************************************)
110
111 let commentized xs = xs +> Common.tail_map_filter (function
112 | Parser_c.TCommentCpp (cppkind, ii) ->
113 let s = Ast_c.str_of_info ii in
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
116 | 0 -> false
117 | 1 ->
118 List.mem cppkind [Token_c.CppAttr]
119 ||
120 (s =~ "__.*")
121 | 2 ->
122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
123 ||
124 (s =~ "__.*")
125 | 3 ->
126 (match cppkind with
127 Token_c.CppAttr | Token_c.CppPassingNormal
128 | Token_c.CppDirective | Token_c.CppIfDirective _ -> true
129 | _ -> false)
130 ||
131 (s =~ "__.*")
132 | 4 ->
133 List.mem cppkind
134 [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
135 ||
136 (s =~ "__.*")
137
138
139 | 5 ->
140 (match cppkind with
141 Token_c.CppAttr | Token_c.CppPassingNormal
142 | Token_c.CppDirective | Token_c.CppIfDirective _
143 | Token_c.CppMacro -> true
144 | _ -> false)
145 ||
146 (s =~ "__.*")
147
148
149
150
151 | _ -> failwith "not valid level passing number"
152 in
153 if legal_passing then None else Some (ii.Ast_c.pinfo)
154
155 (*
156 | Ast_c.CppOther ->
157 (match s with
158 | s when s =~ "KERN_.*" -> None
159 | s when s =~ "__.*" -> None
160 | _ ->
161 Some (ii.Ast_c.pinfo)
162 )
163 *)
164
165
166 | Parser_c.TCommentMisc ii
167 | Parser_c.TAction ii
168 ->
169 Some (ii.Ast_c.pinfo)
170 | _ ->
171 None
172 )
173
174 let count_lines_commentized xs =
175 let line = ref (-1) in
176 let count = ref 0 in
177 begin
178 commentized xs +>
179 List.iter
180 (function
181 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
182 let newline = pinfo.Common.line in
183 if newline <> !line
184 then begin
185 line := newline;
186 incr count
187 end
188 | _ -> ());
189 !count
190 end
191
192
193
194 let print_commentized xs =
195 let line = ref (-1) in
196 begin
197 let ys = commentized xs in
198 ys +>
199 List.iter
200 (function
201 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
202 let newline = pinfo.Common.line in
203 let s = pinfo.Common.str in
204 let s = Str.global_substitute
205 (Str.regexp "\n") (fun s -> "") s
206 in
207 if newline =|= !line
208 then prerr_string (s ^ " ")
209 else begin
210 if !line =|= -1
211 then pr2_no_nl "passed:"
212 else pr2_no_nl "\npassed:";
213 line := newline;
214 pr2_no_nl (s ^ " ");
215 end
216 | _ -> ());
217 if not (null ys) then pr2 "";
218 end
219
220
221
222
223 (*****************************************************************************)
224 (* Lexing only *)
225 (*****************************************************************************)
226
227 (* called by parse_print_error_heuristic *)
228 let tokens2 file =
229 let table = Common.full_charpos_to_pos_large file in
230
231 Common.with_open_infile file (fun chan ->
232 let lexbuf = Lexing.from_channel chan in
233 try
234 let rec tokens_aux acc =
235 let tok = Lexer_c.token lexbuf in
236 (* fill in the line and col information *)
237 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
238 { ii with Ast_c.pinfo=
239 (* could assert pinfo.filename = file ? *)
240 match Ast_c.pinfo_of_info ii with
241 Ast_c.OriginTok pi ->
242 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
243 | Ast_c.ExpandedTok (pi,vpi) ->
244 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
245 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
246 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
247 })
248 in
249
250 if TH.is_eof tok
251 then List.rev (tok::acc)
252 else tokens_aux (tok::acc)
253 in
254 tokens_aux []
255 with
256 | Lexer_c.Lexical s ->
257 failwith ("lexical error " ^ s ^ "\n =" ^
258 (Common.error_message file (lexbuf_to_strpos lexbuf)))
259 | e -> raise e
260 )
261
262 let time_lexing ?(profile=true) a =
263 if profile
264 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
265 else tokens2 a
266 let tokens ?profile a =
267 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
268
269
270 let tokens_of_string string =
271 let lexbuf = Lexing.from_string string in
272 try
273 let rec tokens_s_aux () =
274 let tok = Lexer_c.token lexbuf in
275 if TH.is_eof tok
276 then [tok]
277 else tok::(tokens_s_aux ())
278 in
279 tokens_s_aux ()
280 with
281 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
282 | e -> raise e
283
284
285 (*****************************************************************************)
286 (* Parsing, but very basic, no more used *)
287 (*****************************************************************************)
288
289 (*
290 * !!!Those function use refs, and are not reentrant !!! so take care.
291 * It use globals defined in Lexer_parser.
292 *
293 * update: because now lexer return comments tokens, those functions
294 * may not work anymore.
295 *)
296
297 let parse file =
298 let lexbuf = Lexing.from_channel (open_in file) in
299 let result = Parser_c.main Lexer_c.token lexbuf in
300 result
301
302
303 let parse_print_error file =
304 let chan = (open_in file) in
305 let lexbuf = Lexing.from_channel chan in
306
307 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
308 try
309 lexbuf +> Parser_c.main Lexer_c.token
310 with
311 | Lexer_c.Lexical s ->
312 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
313 | Parsing.Parse_error ->
314 failwith ("parse error \n = " ^ error_msg ())
315 | Semantic_c.Semantic (s, i) ->
316 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
317 | e -> raise e
318
319
320
321
322 (*****************************************************************************)
323 (* Parsing subelements, useful to debug parser *)
324 (*****************************************************************************)
325
326 (*
327 * !!!Those function use refs, and are not reentrant !!! so take care.
328 * It use globals defined in Lexer_parser.
329 *)
330
331
332 (* old:
333 * let parse_gen parsefunc s =
334 * let lexbuf = Lexing.from_string s in
335 * let result = parsefunc Lexer_c.token lexbuf in
336 * result
337 *)
338
339 let parse_gen parsefunc s =
340 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
341
342
343 (* Why use this lexing scheme ? Why not classically give lexer func
344 * to parser ? Because I now keep comments in lexer. Could
345 * just do a simple wrapper that when comment ask again for a token,
346 * but maybe simpler to use cur_tok technique.
347 *)
348 let all_tokens = ref toks in
349 let cur_tok = ref (List.hd !all_tokens) in
350
351 let lexer_function =
352 (fun _ ->
353 if TH.is_eof !cur_tok
354 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
355 else
356 let v = Common.pop2 all_tokens in
357 cur_tok := v;
358 !cur_tok
359 )
360 in
361 let lexbuf_fake = Lexing.from_function (fun buf n -> raise (Impossible 80)) in
362 let result = parsefunc lexer_function lexbuf_fake in
363 result
364
365
366 let type_of_string = parse_gen Parser_c.type_name
367 let statement_of_string = parse_gen Parser_c.statement
368 let expression_of_string = parse_gen Parser_c.expr
369
370 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
371
372
373
374
375
376 (*****************************************************************************)
377 (* Parsing default define macros, usually in a standard.h file *)
378 (*****************************************************************************)
379
380 let extract_macros2 file =
381 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
382 Flag_parsing_c.verbose_lexing := false;
383 let toks = tokens ~profile:false file in
384 let toks = Parsing_hacks.fix_tokens_define toks in
385 Cpp_token_c.extract_macros toks
386 )
387
388 let extract_macros a =
389 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
390
391
392 (*****************************************************************************)
393 (* Helper for main entry point *)
394 (*****************************************************************************)
395
396
397 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
398 * possible error recovery. Indeed, they allow to skip some tokens and
399 * still be able to call again the ocamlyacc parser. It is ugly code
400 * because we cant modify ocamllex and ocamlyacc. As we want some
401 * extended lexing tricks, we have to use such refs.
402 *
403 * Those refs are now also used for my lalr(k) technique. Indeed They
404 * store the futur and previous tokens that were parsed, and so
405 * provide enough context information for powerful lex trick.
406 *
407 * - passed_tokens_last_ckp stores the passed tokens since last
408 * checkpoint. Used for NotParsedCorrectly and also to build the
409 * info_item attached to each program_element.
410 * - passed_tokens_clean is used for lookahead, in fact for lookback.
411 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
412 * contain some comments and so would make pattern matching difficult
413 * in lookahead. Hence this variable. We would like also to get rid
414 * of cpp instruction because sometimes a cpp instruction is between
415 * two tokens and makes a pattern matching fail. But lookahead also
416 * transform some cpp instruction (in comment) so can't remove them.
417 *
418 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
419 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
420 * comment-tokens.
421 *
422 * Normally we have:
423 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
424 * after the call to pop2.
425 * toks = (reverse passed_tok) ++ remaining_tokens
426 * at the and of the lexer_function call.
427 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
428 * At the end of lexer_function call, cur_tok overlap with passed_tok.
429 *
430 * convention: I use "tr" for "tokens refs"
431 *
432 * I now also need this lexing trick because the lexer return comment
433 * tokens.
434 *)
435
436 type tokens_state = {
437 mutable rest : Parser_c.token list;
438 mutable rest_clean : Parser_c.token list;
439 mutable current : Parser_c.token;
440 (* it's passed since last "checkpoint", not passed from the beginning *)
441 mutable passed : Parser_c.token list;
442 mutable passed_clean : Parser_c.token list;
443 }
444
445 let mk_tokens_state toks =
446 {
447 rest = toks;
448 rest_clean = (toks +> List.filter TH.is_not_comment);
449 current = (List.hd toks);
450 passed = [];
451 passed_clean = [];
452 }
453
454
455
456 let clone_tokens_state tr =
457 { rest = tr.rest;
458 rest_clean = tr.rest_clean;
459 current = tr.current;
460 passed = tr.passed;
461 passed_clean = tr.passed_clean;
462 }
463 let copy_tokens_state ~src ~dst =
464 dst.rest <- src.rest;
465 dst.rest_clean <- src.rest_clean;
466 dst.current <- src.current;
467 dst.passed <- src.passed;
468 dst.passed_clean <- src.passed_clean;
469 ()
470
471 (* todo? agglomerate the x##b ? *)
472 let rec filter_noise n xs =
473 match n, xs with
474 | _, [] -> []
475 | 0, xs -> xs
476 | n, x::xs ->
477 (match x with
478 | Parser_c.TMacroAttr _ ->
479 filter_noise (n-1) xs
480 | _ ->
481 x::filter_noise (n-1) xs
482 )
483
484 let clean_for_lookahead xs =
485 match xs with
486 | [] -> []
487 | [x] -> [x]
488 | x::xs ->
489 x::filter_noise 10 xs
490
491
492
493 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
494 * tr means token refs.
495 *)
496 let rec lexer_function ~pass tr = fun lexbuf ->
497 match tr.rest with
498 | [] -> pr2_err "ALREADY AT END"; tr.current
499 | v::xs ->
500 tr.rest <- xs;
501 tr.current <- v;
502
503 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
504
505 if TH.is_comment v
506 then begin
507 tr.passed <- v::tr.passed;
508 lexer_function ~pass tr lexbuf
509 end
510 else begin
511 let x = List.hd tr.rest_clean in
512 tr.rest_clean <- List.tl tr.rest_clean;
513 assert (x =*= v);
514
515 (match v with
516
517 (* fix_define1.
518 *
519 * Why not in parsing_hacks lookahead and do passing like
520 * I do for some ifdef directives ? Because here I also need to
521 * generate some tokens sometimes and so I need access to the
522 * tr.passed, tr.rest, etc.
523 *)
524 | Parser_c.TDefine (tok) ->
525 if not (LP.current_context () =*= LP.InTopLevel) &&
526 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
527 then begin
528 incr Stat.nDefinePassing;
529 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
530 let v' =
531 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
532 in
533 tr.passed <- v'::tr.passed;
534 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
535 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
536 lexer_function ~pass tr lexbuf
537 end
538 else begin
539 tr.passed <- v::tr.passed;
540 tr.passed_clean <- v::tr.passed_clean;
541 v
542 end
543
544 | Parser_c.TUndef (tok) ->
545 if not (LP.current_context () =*= LP.InTopLevel) &&
546 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
547 then begin
548 incr Stat.nUndefPassing;
549 pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
550 let v' =
551 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
552 in
553 tr.passed <- v'::tr.passed;
554 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
555 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
556 lexer_function ~pass tr lexbuf
557 end
558 else begin
559 tr.passed <- v::tr.passed;
560 tr.passed_clean <- v::tr.passed_clean;
561 v
562 end
563
564 | Parser_c.TInclude (includes, filename, inifdef, info) ->
565 if not (LP.current_context () =*= LP.InTopLevel) &&
566 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
567 then begin
568 incr Stat.nIncludePassing;
569 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
570 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
571 tr.passed <- v::tr.passed;
572 lexer_function ~pass tr lexbuf
573 end
574 else begin
575 let (v,new_tokens) =
576 Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
577 let new_tokens_clean =
578 new_tokens +> List.filter TH.is_not_comment in
579
580 tr.passed <- v::tr.passed;
581 tr.passed_clean <- v::tr.passed_clean;
582 tr.rest <- new_tokens ++ tr.rest;
583 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
584 v
585 end
586
587 | _ ->
588
589 (* typedef_fix1 *)
590 let v = match v with
591 | Parser_c.TIdent (s, ii) ->
592 if
593 LP.is_typedef s &&
594 not (!Flag_parsing_c.disable_add_typedef) &&
595 pass =|= 1
596 then Parser_c.TypedefIdent (s, ii)
597 else Parser_c.TIdent (s, ii)
598 | x -> x
599 in
600
601 let v = Parsing_hacks.lookahead ~pass
602 (clean_for_lookahead (v::tr.rest_clean))
603 tr.passed_clean in
604
605 tr.passed <- v::tr.passed;
606
607 (* the lookahead may have changed the status of the token and
608 * consider it as a comment, for instance some #include are
609 * turned into comments, hence this code. *)
610 match v with
611 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
612 | v ->
613 tr.passed_clean <- v::tr.passed_clean;
614 v
615 )
616 end
617
618
619 let max_pass = 4
620
621 let get_one_elem ~pass tr (file, filelines) =
622
623 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
624 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
625
626 (* normally have to do that only when come from an exception in which
627 * case the dt() may not have been done
628 * TODO but if was in scoped scope ? have to let only the last scope
629 * so need do a LP.lexer_reset_typedef ();
630 *)
631 LP.enable_typedef();
632 LP._lexer_hint := (LP.default_hint ());
633 LP.save_typedef_state();
634
635 tr.passed <- [];
636
637 let lexbuf_fake = Lexing.from_function (fun buf n -> raise (Impossible 81)) in
638
639 (try
640 (* -------------------------------------------------- *)
641 (* Call parser *)
642 (* -------------------------------------------------- *)
643 Common.profile_code_exclusif "YACC" (fun () ->
644 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
645 )
646 with e ->
647 LP.restore_typedef_state();
648
649 (* must keep here, before the code that adjusts the tr fields *)
650 let line_error = TH.line_of_tok tr.current in
651
652 let passed_before_error = tr.passed in
653 let current = tr.current in
654 (* error recovery, go to next synchro point *)
655 let (passed', rest') =
656 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
657 tr.rest <- rest';
658 tr.passed <- passed';
659
660 tr.current <- List.hd passed';
661 tr.passed_clean <- []; (* enough ? *)
662 (* with error recovery, rest and rest_clean may not be in sync *)
663 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
664
665
666 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
667 Right (info_of_bads, line_error,
668 tr.passed, passed_before_error,
669 current, e)
670 )
671
672
673
674 (* Macro problem recovery *)
675 (* used by the multi-pass error recovery expand-on-demand *)
676 (*
677 val candidate_macros_in_passed:
678 defs: (string, define_def) Hashtbl.t ->
679 Parser_c.token list -> (string * define_def) list
680 *)
681
682 let candidate_macros_in_passed2 ~defs passed =
683 let res = ref [] in
684 let res2 = ref [] in
685
686 passed +> List.iter (function
687 | Parser_c.TIdent (s,_)
688 (* bugfix: may have to undo some infered things *)
689 | Parser_c.TMacroIterator (s,_)
690 | Parser_c.TypedefIdent (s,_)
691 ->
692 (match Common.hfind_option s defs with
693 | Some def ->
694 if s ==~ Parsing_hacks.regexp_macro
695 then
696 (* pr2 (spf "candidate: %s" s); *)
697 Common.push2 (s, def) res
698 else
699 Common.push2 (s, def) res2
700 | None -> ()
701 )
702
703 | _ -> ()
704 );
705 if null !res
706 then !res2
707 else !res
708
709 let candidate_macros_in_passed ~defs b =
710 Common.profile_code "MACRO managment" (fun () ->
711 candidate_macros_in_passed2 ~defs b)
712
713
714
715
716
717 let find_optional_macro_to_expand2 ~defs toks =
718
719 let defs = Common.hash_of_list defs in
720
721 let toks = toks +> Common.tail_map (function
722
723 (* special cases to undo *)
724 | Parser_c.TMacroIterator (s, ii) ->
725 if Hashtbl.mem defs s
726 then Parser_c.TIdent (s, ii)
727 else Parser_c.TMacroIterator (s, ii)
728
729 | Parser_c.TypedefIdent (s, ii) ->
730 if Hashtbl.mem defs s
731 then Parser_c.TIdent (s, ii)
732 else Parser_c.TypedefIdent (s, ii)
733
734 | x -> x
735 ) in
736
737 let tokens = toks in
738 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
739
740 (* just calling apply_macro_defs and having a specialized version
741 * of the code in fix_tokens_cpp is not enough as some work such
742 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
743 * will not get the chance to be run on the new expanded tokens.
744 * Hence even if it's expensive, it's currently better to
745 * just call directly fix_tokens_cpp again here.
746
747 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
748 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
749 let paren_grouped = TV.mk_parenthised cleaner in
750 Cpp_token_c.apply_macro_defs
751 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
752 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
753 defs paren_grouped;
754 (* because the before field is used by apply_macro_defs *)
755 tokens2 := TV.rebuild_tokens_extented !tokens2;
756 Parsing_hacks.insert_virtual_positions
757 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
758 *)
759 let find_optional_macro_to_expand ~defs a =
760 Common.profile_code "MACRO managment" (fun () ->
761 find_optional_macro_to_expand2 ~defs a)
762
763
764
765
766
767 (*****************************************************************************)
768 (* Main entry points *)
769 (*****************************************************************************)
770
771 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
772 ref (Hashtbl.create 101)
773
774 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
775 ref (Hashtbl.create 101)
776
777
778 (* can not be put in parsing_hack, cos then mutually recursive problem as
779 * we also want to parse the standard.h file.
780 *)
781 let init_defs_macros std_h =
782 if not (Common.lfile_exists std_h)
783 then pr2 ("warning: Can't find default macro file: " ^ std_h)
784 else begin
785 pr2 ("init_defs: " ^ std_h);
786 _defs := Common.hash_of_list (extract_macros std_h);
787 end
788
789 let init_defs_builtins file_h =
790 if not (Common.lfile_exists file_h)
791 then pr2 ("warning: Can't find macro file: " ^ file_h)
792 else begin
793 pr2 ("init_defs_builtins: " ^ file_h);
794 _defs_builtins :=
795 Common.hash_of_list (extract_macros file_h);
796 end
797
798
799
800 type info_item = string * Parser_c.token list
801
802 type program2 = toplevel2 list
803 and extended_program2 = toplevel2 list *
804 (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
805 (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
806 and toplevel2 = Ast_c.toplevel * info_item
807
808 let program_of_program2 xs =
809 xs +> List.map fst
810
811 let with_program2 f program2 =
812 program2
813 +> Common.unzip
814 +> (fun (program, infos) ->
815 f program, infos
816 )
817 +> Common.uncurry Common.zip
818
819
820
821
822
823
824 (* note: as now we go in 2 passes, there is first all the error message of
825 * the lexer, and then the error of the parser. It is not anymore
826 * interwinded.
827 *
828 * !!!This function use refs, and is not reentrant !!! so take care.
829 * It use globals defined in Lexer_parser and also the _defs global
830 * in parsing_hack.ml.
831 *
832 * This function uses internally some semi globals in the
833 * tokens_stat record and parsing_stat record.
834 *)
835
836 let parse_print_error_heuristic2 saved_typedefs saved_macros file =
837
838 let filelines = Common.cat_array file in
839 let stat = Parsing_stat.default_stat file in
840
841 (* -------------------------------------------------- *)
842 (* call lexer and get all the tokens *)
843 (* -------------------------------------------------- *)
844
845 LP.lexer_reset_typedef saved_typedefs;
846 Parsing_hacks.ifdef_paren_cnt := 0;
847
848 let toks_orig = tokens file in
849 let toks = Parsing_hacks.fix_tokens_define toks_orig in
850 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
851
852 (* expand macros on demand trick, preparation phase *)
853 let macros =
854 Common.profile_code "MACRO mgmt prep 1" (fun () ->
855 let macros =
856 match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
857 (* include also builtins as some macros may generate some builtins too
858 * like __decl_spec or __stdcall
859 *)
860 !_defs_builtins +> Hashtbl.iter (fun s def ->
861 Hashtbl.replace macros s def;
862 );
863 macros
864 )
865 in
866 Common.profile_code "MACRO mgmt prep 2" (fun () ->
867 let local_macros = extract_macros file in
868 local_macros +> List.iter (fun (s, def) ->
869 Hashtbl.replace macros s def;
870 );
871 );
872
873 let tr = mk_tokens_state toks in
874
875 let rec loop tr =
876
877 (* todo?: I am not sure that it represents current_line, cos maybe
878 * tr.current partipated in the previous parsing phase, so maybe tr.current
879 * is not the first token of the next parsing phase. Same with checkpoint2.
880 * It would be better to record when we have a } or ; in parser.mly,
881 * cos we know that they are the last symbols of external_declaration2.
882 *
883 * bugfix: may not be equal to 'file' as after macro expansions we can
884 * start to parse a new entity from the body of a macro, for instance
885 * when parsing a define_machine() body, cf standard.h
886 *)
887 let checkpoint = TH.line_of_tok tr.current in
888 let checkpoint_file = TH.file_of_tok tr.current in
889
890 (* call the parser *)
891 let elem =
892 let pass1 =
893 Common.profile_code "Parsing: 1st pass" (fun () ->
894 get_one_elem ~pass:1 tr (file, filelines)
895 ) in
896 match pass1 with
897 | Left e -> Left e
898 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
899 if !Flag_parsing_c.disable_multi_pass
900 then pass1
901 else begin
902 Common.profile_code "Parsing: multi pass" (fun () ->
903
904 pr2_err "parsing pass2: try again";
905 let toks = List.rev passed ++ tr.rest in
906 let new_tr = mk_tokens_state toks in
907 copy_tokens_state ~src:new_tr ~dst:tr;
908 let passx = get_one_elem ~pass:2 tr (file, filelines) in
909
910 (match passx with
911 | Left e -> passx
912 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
913 let candidates =
914 candidate_macros_in_passed ~defs:macros passed
915 in
916
917
918 if is_define_passed passed || null candidates
919 then passx
920 else begin
921 (* todo factorize code *)
922
923 pr2_err "parsing pass3: try again";
924 let toks = List.rev passed ++ tr.rest in
925 let toks' =
926 find_optional_macro_to_expand ~defs:candidates toks in
927 let new_tr = mk_tokens_state toks' in
928 copy_tokens_state ~src:new_tr ~dst:tr;
929 let passx = get_one_elem ~pass:3 tr (file, filelines) in
930
931 (match passx with
932 | Left e -> passx
933 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
934 pr2_err "parsing pass4: try again";
935
936 let candidates =
937 candidate_macros_in_passed
938 ~defs:macros passed
939 in
940
941 let toks = List.rev passed ++ tr.rest in
942 let toks' =
943 find_optional_macro_to_expand ~defs:candidates toks in
944 let new_tr = mk_tokens_state toks' in
945 copy_tokens_state ~src:new_tr ~dst:tr;
946 let passx = get_one_elem ~pass:4 tr (file, filelines) in
947 passx
948 )
949 end
950 )
951 )
952 end
953 in
954
955
956 (* again not sure if checkpoint2 corresponds to end of bad region *)
957 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
958 let checkpoint2_file = TH.file_of_tok tr.current in
959
960 let diffline =
961 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
962 then (checkpoint2 - checkpoint)
963 else 0
964 (* TODO? so if error come in middle of something ? where the
965 * start token was from original file but synchro found in body
966 * of macro ? then can have wrong number of lines stat.
967 * Maybe simpler just to look at tr.passed and count
968 * the lines in the token from the correct file ?
969 *)
970 in
971 let info = mk_info_item file (List.rev tr.passed) in
972
973 (* some stat updates *)
974 stat.Stat.commentized <-
975 stat.Stat.commentized + count_lines_commentized (snd info);
976
977 let elem =
978 match elem with
979 | Left e ->
980 stat.Stat.correct <- stat.Stat.correct + diffline;
981 e
982 | Right (info_of_bads, line_error, toks_of_bads,
983 _passed_before_error, cur, exn) ->
984
985 let was_define = is_define_passed tr.passed in
986
987 if was_define && !Flag_parsing_c.filter_msg_define_error
988 then ()
989 else begin
990
991 (match exn with
992 | Lexer_c.Lexical _
993 | Parsing.Parse_error
994 | Semantic_c.Semantic _ -> ()
995 | e -> raise e
996 );
997
998 if !Flag_parsing_c.show_parsing_error
999 then begin
1000 (match exn with
1001 (* Lexical is not anymore launched I think *)
1002 | Lexer_c.Lexical s ->
1003 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
1004 | Parsing.Parse_error ->
1005 pr2 ("parse error \n = " ^ error_msg_tok cur)
1006 | Semantic_c.Semantic (s, i) ->
1007 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1008 | e -> raise (Impossible 82)
1009 );
1010 (* bugfix: *)
1011 if (checkpoint_file =$= checkpoint2_file) &&
1012 checkpoint_file =$= file
1013 then print_bad line_error (checkpoint, checkpoint2) filelines
1014 else pr2 "PB: bad: but on tokens not from original file"
1015 end;
1016
1017
1018 let pbline =
1019 toks_of_bads
1020 +> Common.filter (TH.is_same_line_or_close line_error)
1021 +> Common.filter TH.is_ident_like
1022 in
1023 let error_info =
1024 (pbline +> List.map TH.str_of_tok), line_error
1025 in
1026 stat.Stat.problematic_lines <-
1027 error_info::stat.Stat.problematic_lines;
1028
1029 end;
1030
1031 if was_define && !Flag_parsing_c.filter_define_error
1032 then stat.Stat.correct <- stat.Stat.correct + diffline
1033 else stat.Stat.bad <- stat.Stat.bad + diffline;
1034
1035 Ast_c.NotParsedCorrectly info_of_bads
1036 in
1037
1038 (match elem with
1039 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1040 | xs -> (xs, info):: loop tr (* recurse *)
1041 )
1042 in
1043 let v = loop tr in
1044 let v = with_program2 Parsing_consistency_c.consistency_checking v in
1045 let v =
1046 let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1047 Common.clean_scope_h new_td;
1048 (v, !new_td, macros) in
1049 (v, stat)
1050
1051
1052 let time_total_parsing a b =
1053 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
1054
1055 let parse_print_error_heuristic a b =
1056 Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
1057
1058
1059 (* alias *)
1060 let parse_c_and_cpp a =
1061 let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1062 let parse_c_and_cpp_keep_typedefs td macs a =
1063 parse_print_error_heuristic td macs a
1064
1065 (*****************************************************************************)
1066 (* Same but faster cos memoize stuff *)
1067 (*****************************************************************************)
1068 let parse_cache file =
1069 if not !Flag_parsing_c.use_cache
1070 then parse_print_error_heuristic None None file
1071 else
1072 let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
1073 let need_no_changed_files =
1074 (* should use Sys.argv.(0), would be safer. *)
1075
1076 [
1077 (* TOFIX
1078 Config.path ^ "/parsing_c/c_parser.cma";
1079 (* we may also depend now on the semantic patch because
1080 the SP may use macro and so we will disable some of the
1081 macro expansions from standard.h.
1082 *)
1083 !Config.std_h;
1084 *)
1085 ] in
1086 let need_no_changed_variables =
1087 (* could add some of the flags of flag_parsing_c.ml *)
1088 [] in
1089 Common.cache_computation_robust_in_dir
1090 !Flag_parsing_c.cache_prefix file ".ast_raw"
1091 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1092 (fun () ->
1093 (* check whether to clear the cache *)
1094 (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1095 (None,_) | (_,None) -> ()
1096 | (Some limit,Some prefix) ->
1097 let count =
1098 Common.cmd_to_list
1099 (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1100 prefix prefix) in
1101 match count with
1102 [c] ->
1103 if int_of_string c >= limit
1104 then
1105 let _ =
1106 Sys.command
1107 (Printf.sprintf
1108 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1109 prefix) in
1110 ()
1111 | _ -> ());
1112 (* recompute *)
1113 parse_print_error_heuristic None None file)
1114
1115
1116
1117 (*****************************************************************************)
1118 (* Some special cases *)
1119 (*****************************************************************************)
1120
1121 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1122 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1123 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1124 let program = parse_c_and_cpp tmpfile +> fst in
1125 program +> Common.find_some (fun (e,_) ->
1126 match e with
1127 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1128 | _ -> None
1129 )
1130
1131 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1132 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1133 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1134 let program = parse_c_and_cpp tmpfile +> fst in
1135 program +> Common.find_some (fun (e,_) ->
1136 match e with
1137 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1138 (match compound with
1139 | [Ast_c.StmtElem st] ->
1140 (match Ast_c.unwrap_st st with
1141 | Ast_c.ExprStatement (Some e) -> Some e
1142 | _ -> None
1143 )
1144 | _ -> None
1145 )
1146 | _ -> None
1147 )