permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / parse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
34e49164
C
19module LP = Lexer_parser
20
485bce71
C
21module Stat = Parsing_stat
22
34e49164
C
23(*****************************************************************************)
24(* Wrappers *)
25(*****************************************************************************)
ae4735db
C
26let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
34e49164
C
28(*****************************************************************************)
29(* Helpers *)
30(*****************************************************************************)
31
ae4735db
C
32let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34e49164 34
ae4735db 35let token_to_strpos tok =
34e49164
C
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
ae4735db 39let mk_info_item2 filename toks =
34e49164 40 let buf = Buffer.create 100 in
ae4735db 41 let s =
34e49164
C
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
ae4735db 44 toks +> List.iter (fun tok ->
34e49164 45 match TH.pinfo_of_tok tok with
ae4735db 46 | Ast_c.OriginTok _ ->
91eba41f 47 Buffer.add_string buf (TH.str_of_tok tok)
ae4735db 48 | Ast_c.AbstractLineTok _ ->
abad11c5 49 raise (Impossible 79)
34e49164
C
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
ae4735db 55 (s, toks)
34e49164 56
ae4735db
C
57let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
34e49164
C
59 (fun () -> mk_info_item2 a b)
60
61
ae4735db 62let info_same_line line xs =
b1b2de81 63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
34e49164 64
34e49164 65
978fd7e5
C
66(* move in cpp_token_c ? *)
67let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
ae4735db
C
69 if List.length xs >= 2
70 then
978fd7e5 71 (match Common.head_middle_tail xs with
ae4735db 72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
978fd7e5
C
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
ae4735db 78 false
978fd7e5
C
79 end
80
81
82(*****************************************************************************)
83(* Error diagnostic *)
84(*****************************************************************************)
85
ae4735db 86let error_msg_tok tok =
978fd7e5
C
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
ae4735db 89 then Common.error_message file (token_to_strpos tok)
978fd7e5
C
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
ae4735db 93let print_bad line_error (start_line, end_line) filelines =
978fd7e5
C
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
ae4735db
C
97 for i = start_line to end_line do
98 let line = filelines.(i) in
978fd7e5 99
ae4735db
C
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
978fd7e5
C
103 done
104 end
105
106
34e49164
C
107(*****************************************************************************)
108(* Stats on what was passed/commentized *)
109(*****************************************************************************)
110
c491d8ee 111let commentized xs = xs +> Common.tail_map_filter (function
ae4735db 112 | Parser_c.TCommentCpp (cppkind, ii) ->
485bce71 113 let s = Ast_c.str_of_info ii in
ae4735db
C
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
485bce71 116 | 0 -> false
ae4735db 117 | 1 ->
0708f913 118 List.mem cppkind [Token_c.CppAttr]
ae4735db 119 ||
485bce71 120 (s =~ "__.*")
ae4735db 121 | 2 ->
0708f913 122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
ae4735db 123 ||
485bce71 124 (s =~ "__.*")
ae4735db 125 | 3 ->
feec80c3
C
126 (match cppkind with
127 Token_c.CppAttr | Token_c.CppPassingNormal
128 | Token_c.CppDirective | Token_c.CppIfDirective _ -> true
129 | _ -> false)
ae4735db 130 ||
485bce71 131 (s =~ "__.*")
ae4735db 132 | 4 ->
feec80c3
C
133 List.mem cppkind
134 [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
ae4735db 135 ||
485bce71
C
136 (s =~ "__.*")
137
138
ae4735db 139 | 5 ->
feec80c3
C
140 (match cppkind with
141 Token_c.CppAttr | Token_c.CppPassingNormal
142 | Token_c.CppDirective | Token_c.CppIfDirective _
143 | Token_c.CppMacro -> true
144 | _ -> false)
ae4735db 145 ||
485bce71
C
146 (s =~ "__.*")
147
148
149
150
151 | _ -> failwith "not valid level passing number"
152 in
153 if legal_passing then None else Some (ii.Ast_c.pinfo)
154
155 (*
ae4735db 156 | Ast_c.CppOther ->
34e49164
C
157 (match s with
158 | s when s =~ "KERN_.*" -> None
159 | s when s =~ "__.*" -> None
ae4735db 160 | _ ->
485bce71 161 Some (ii.Ast_c.pinfo)
34e49164 162 )
485bce71
C
163 *)
164
ae4735db 165
34e49164 166 | Parser_c.TCommentMisc ii
ae4735db 167 | Parser_c.TAction ii
34e49164
C
168 ->
169 Some (ii.Ast_c.pinfo)
ae4735db 170 | _ ->
34e49164
C
171 None
172 )
ae4735db
C
173
174let count_lines_commentized xs =
34e49164
C
175 let line = ref (-1) in
176 let count = ref 0 in
177 begin
178 commentized xs +>
179 List.iter
180 (function
ae4735db 181 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
182 let newline = pinfo.Common.line in
183 if newline <> !line
184 then begin
185 line := newline;
186 incr count
187 end
188 | _ -> ());
189 !count
190 end
191
192
193
ae4735db 194let print_commentized xs =
34e49164
C
195 let line = ref (-1) in
196 begin
197 let ys = commentized xs in
198 ys +>
199 List.iter
200 (function
ae4735db 201 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
202 let newline = pinfo.Common.line in
203 let s = pinfo.Common.str in
ae4735db
C
204 let s = Str.global_substitute
205 (Str.regexp "\n") (fun s -> "") s
34e49164 206 in
b1b2de81 207 if newline =|= !line
34e49164
C
208 then prerr_string (s ^ " ")
209 else begin
ae4735db
C
210 if !line =|= -1
211 then pr2_no_nl "passed:"
34e49164
C
212 else pr2_no_nl "\npassed:";
213 line := newline;
214 pr2_no_nl (s ^ " ");
215 end
216 | _ -> ());
217 if not (null ys) then pr2 "";
218 end
ae4735db 219
34e49164
C
220
221
222
223(*****************************************************************************)
224(* Lexing only *)
225(*****************************************************************************)
226
227(* called by parse_print_error_heuristic *)
ae4735db 228let tokens2 file =
708f4980 229 let table = Common.full_charpos_to_pos_large file in
34e49164 230
ae4735db 231 Common.with_open_infile file (fun chan ->
34e49164 232 let lexbuf = Lexing.from_channel chan in
ae4735db
C
233 try
234 let rec tokens_aux acc =
34e49164
C
235 let tok = Lexer_c.token lexbuf in
236 (* fill in the line and col information *)
ae4735db 237 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
34e49164
C
238 { ii with Ast_c.pinfo=
239 (* could assert pinfo.filename = file ? *)
240 match Ast_c.pinfo_of_info ii with
241 Ast_c.OriginTok pi ->
708f4980 242 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
34e49164 243 | Ast_c.ExpandedTok (pi,vpi) ->
708f4980 244 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
34e49164
C
245 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
246 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
247 })
248 in
249
250 if TH.is_eof tok
251 then List.rev (tok::acc)
252 else tokens_aux (tok::acc)
253 in
254 tokens_aux []
255 with
ae4735db
C
256 | Lexer_c.Lexical s ->
257 failwith ("lexical error " ^ s ^ "\n =" ^
34e49164
C
258 (Common.error_message file (lexbuf_to_strpos lexbuf)))
259 | e -> raise e
260 )
261
ae4735db
C
262let time_lexing ?(profile=true) a =
263 if profile
485bce71 264 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
951c7801 265 else tokens2 a
ae4735db 266let tokens ?profile a =
485bce71 267 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
34e49164
C
268
269
ae4735db 270let tokens_of_string string =
34e49164 271 let lexbuf = Lexing.from_string string in
ae4735db
C
272 try
273 let rec tokens_s_aux () =
34e49164
C
274 let tok = Lexer_c.token lexbuf in
275 if TH.is_eof tok
276 then [tok]
277 else tok::(tokens_s_aux ())
278 in
279 tokens_s_aux ()
280 with
281 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
282 | e -> raise e
283
284
285(*****************************************************************************)
286(* Parsing, but very basic, no more used *)
287(*****************************************************************************)
288
289(*
290 * !!!Those function use refs, and are not reentrant !!! so take care.
291 * It use globals defined in Lexer_parser.
ae4735db 292 *
34e49164
C
293 * update: because now lexer return comments tokens, those functions
294 * may not work anymore.
295 *)
296
ae4735db 297let parse file =
34e49164
C
298 let lexbuf = Lexing.from_channel (open_in file) in
299 let result = Parser_c.main Lexer_c.token lexbuf in
300 result
301
302
ae4735db 303let parse_print_error file =
34e49164
C
304 let chan = (open_in file) in
305 let lexbuf = Lexing.from_channel chan in
306
307 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
ae4735db 308 try
34e49164 309 lexbuf +> Parser_c.main Lexer_c.token
ae4735db
C
310 with
311 | Lexer_c.Lexical s ->
34e49164 312 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
ae4735db 313 | Parsing.Parse_error ->
34e49164 314 failwith ("parse error \n = " ^ error_msg ())
ae4735db 315 | Semantic_c.Semantic (s, i) ->
34e49164
C
316 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
317 | e -> raise e
318
319
320
321
322(*****************************************************************************)
323(* Parsing subelements, useful to debug parser *)
324(*****************************************************************************)
325
326(*
327 * !!!Those function use refs, and are not reentrant !!! so take care.
328 * It use globals defined in Lexer_parser.
329 *)
330
331
ae4735db
C
332(* old:
333 * let parse_gen parsefunc s =
34e49164
C
334 * let lexbuf = Lexing.from_string s in
335 * let result = parsefunc Lexer_c.token lexbuf in
336 * result
337 *)
338
ae4735db 339let parse_gen parsefunc s =
485bce71 340 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
34e49164
C
341
342
343 (* Why use this lexing scheme ? Why not classically give lexer func
ae4735db 344 * to parser ? Because I now keep comments in lexer. Could
34e49164
C
345 * just do a simple wrapper that when comment ask again for a token,
346 * but maybe simpler to use cur_tok technique.
347 *)
348 let all_tokens = ref toks in
349 let cur_tok = ref (List.hd !all_tokens) in
350
ae4735db
C
351 let lexer_function =
352 (fun _ ->
34e49164 353 if TH.is_eof !cur_tok
708f4980 354 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
34e49164
C
355 else
356 let v = Common.pop2 all_tokens in
357 cur_tok := v;
358 !cur_tok
ae4735db 359 )
34e49164 360 in
abad11c5 361 let lexbuf_fake = Lexing.from_function (fun buf n -> raise (Impossible 80)) in
34e49164
C
362 let result = parsefunc lexer_function lexbuf_fake in
363 result
364
365
366let type_of_string = parse_gen Parser_c.type_name
367let statement_of_string = parse_gen Parser_c.statement
368let expression_of_string = parse_gen Parser_c.expr
369
370(* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
371
372
373
374
375
34e49164 376(*****************************************************************************)
485bce71 377(* Parsing default define macros, usually in a standard.h file *)
34e49164
C
378(*****************************************************************************)
379
ae4735db
C
380let extract_macros2 file =
381 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
708f4980
C
382 Flag_parsing_c.verbose_lexing := false;
383 let toks = tokens ~profile:false file in
978fd7e5
C
384 let toks = Parsing_hacks.fix_tokens_define toks in
385 Cpp_token_c.extract_macros toks
708f4980 386 )
34e49164 387
ae4735db 388let extract_macros a =
978fd7e5 389 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
485bce71 390
34e49164
C
391
392(*****************************************************************************)
978fd7e5 393(* Helper for main entry point *)
34e49164
C
394(*****************************************************************************)
395
34e49164
C
396
397(* The use of local refs (remaining_tokens, passed_tokens, ...) makes
398 * possible error recovery. Indeed, they allow to skip some tokens and
399 * still be able to call again the ocamlyacc parser. It is ugly code
400 * because we cant modify ocamllex and ocamlyacc. As we want some
401 * extended lexing tricks, we have to use such refs.
ae4735db 402 *
34e49164
C
403 * Those refs are now also used for my lalr(k) technique. Indeed They
404 * store the futur and previous tokens that were parsed, and so
405 * provide enough context information for powerful lex trick.
ae4735db 406 *
34e49164 407 * - passed_tokens_last_ckp stores the passed tokens since last
485bce71 408 * checkpoint. Used for NotParsedCorrectly and also to build the
34e49164
C
409 * info_item attached to each program_element.
410 * - passed_tokens_clean is used for lookahead, in fact for lookback.
411 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
412 * contain some comments and so would make pattern matching difficult
ae4735db 413 * in lookahead. Hence this variable. We would like also to get rid
34e49164
C
414 * of cpp instruction because sometimes a cpp instruction is between
415 * two tokens and makes a pattern matching fail. But lookahead also
416 * transform some cpp instruction (in comment) so can't remove them.
ae4735db 417 *
34e49164
C
418 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
419 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
420 * comment-tokens.
ae4735db 421 *
34e49164 422 * Normally we have:
ae4735db 423 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
34e49164 424 * after the call to pop2.
ae4735db 425 * toks = (reverse passed_tok) ++ remaining_tokens
34e49164
C
426 * at the and of the lexer_function call.
427 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
428 * At the end of lexer_function call, cur_tok overlap with passed_tok.
ae4735db 429 *
34e49164 430 * convention: I use "tr" for "tokens refs"
ae4735db 431 *
485bce71
C
432 * I now also need this lexing trick because the lexer return comment
433 * tokens.
34e49164
C
434 *)
435
436type tokens_state = {
437 mutable rest : Parser_c.token list;
438 mutable rest_clean : Parser_c.token list;
439 mutable current : Parser_c.token;
440 (* it's passed since last "checkpoint", not passed from the beginning *)
441 mutable passed : Parser_c.token list;
442 mutable passed_clean : Parser_c.token list;
443}
708f4980 444
ae4735db
C
445let mk_tokens_state toks =
446 {
708f4980
C
447 rest = toks;
448 rest_clean = (toks +> List.filter TH.is_not_comment);
449 current = (List.hd toks);
ae4735db 450 passed = [];
708f4980
C
451 passed_clean = [];
452 }
453
454
455
ae4735db 456let clone_tokens_state tr =
485bce71
C
457 { rest = tr.rest;
458 rest_clean = tr.rest_clean;
459 current = tr.current;
460 passed = tr.passed;
461 passed_clean = tr.passed_clean;
462 }
ae4735db 463let copy_tokens_state ~src ~dst =
485bce71
C
464 dst.rest <- src.rest;
465 dst.rest_clean <- src.rest_clean;
466 dst.current <- src.current;
467 dst.passed <- src.passed;
468 dst.passed_clean <- src.passed_clean;
469 ()
470
708f4980 471(* todo? agglomerate the x##b ? *)
485bce71
C
472let rec filter_noise n xs =
473 match n, xs with
474 | _, [] -> []
475 | 0, xs -> xs
ae4735db 476 | n, x::xs ->
485bce71 477 (match x with
ae4735db 478 | Parser_c.TMacroAttr _ ->
485bce71 479 filter_noise (n-1) xs
ae4735db 480 | _ ->
485bce71
C
481 x::filter_noise (n-1) xs
482 )
483
ae4735db 484let clean_for_lookahead xs =
485bce71
C
485 match xs with
486 | [] -> []
487 | [x] -> [x]
ae4735db 488 | x::xs ->
485bce71
C
489 x::filter_noise 10 xs
490
34e49164 491
485bce71 492
ae4735db 493(* Hacked lex. This function use refs passed by parse_print_error_heuristic
485bce71
C
494 * tr means token refs.
495 *)
ae4735db 496let rec lexer_function ~pass tr = fun lexbuf ->
34e49164 497 match tr.rest with
708f4980 498 | [] -> pr2_err "ALREADY AT END"; tr.current
ae4735db 499 | v::xs ->
34e49164
C
500 tr.rest <- xs;
501 tr.current <- v;
502
503 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
504
505 if TH.is_comment v
506 then begin
507 tr.passed <- v::tr.passed;
485bce71 508 lexer_function ~pass tr lexbuf
34e49164
C
509 end
510 else begin
511 let x = List.hd tr.rest_clean in
512 tr.rest_clean <- List.tl tr.rest_clean;
b1b2de81 513 assert (x =*= v);
ae4735db 514
34e49164 515 (match v with
113803cf 516
ae4735db 517 (* fix_define1.
113803cf
C
518 *
519 * Why not in parsing_hacks lookahead and do passing like
ae4735db
C
520 * I do for some ifdef directives ? Because here I also need to
521 * generate some tokens sometimes and so I need access to the
113803cf 522 * tr.passed, tr.rest, etc.
485bce71 523 *)
9f8e26f4 524 | Parser_c.TDefine (tok) ->
ae4735db 525 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 526 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 527 then begin
485bce71 528 incr Stat.nDefinePassing;
34e49164 529 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
3a314143
C
530 let v' =
531 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
532 in
533 tr.passed <- v'::tr.passed;
534 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
535 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
536 lexer_function ~pass tr lexbuf
537 end
538 else begin
539 tr.passed <- v::tr.passed;
540 tr.passed_clean <- v::tr.passed_clean;
541 v
542 end
543
544 | Parser_c.TUndef (tok) ->
545 if not (LP.current_context () =*= LP.InTopLevel) &&
546 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
547 then begin
548 incr Stat.nUndefPassing;
549 pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
550 let v' =
551 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
34e49164
C
552 in
553 tr.passed <- v'::tr.passed;
978fd7e5
C
554 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
555 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
485bce71 556 lexer_function ~pass tr lexbuf
34e49164
C
557 end
558 else begin
559 tr.passed <- v::tr.passed;
560 tr.passed_clean <- v::tr.passed_clean;
561 v
562 end
ae4735db
C
563
564 | Parser_c.TInclude (includes, filename, inifdef, info) ->
b1b2de81 565 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 566 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 567 then begin
485bce71 568 incr Stat.nIncludePassing;
34e49164 569 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
0708f913 570 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
34e49164 571 tr.passed <- v::tr.passed;
485bce71 572 lexer_function ~pass tr lexbuf
34e49164
C
573 end
574 else begin
ae4735db 575 let (v,new_tokens) =
3a314143 576 Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
ae4735db 577 let new_tokens_clean =
34e49164
C
578 new_tokens +> List.filter TH.is_not_comment in
579
580 tr.passed <- v::tr.passed;
581 tr.passed_clean <- v::tr.passed_clean;
582 tr.rest <- new_tokens ++ tr.rest;
583 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
584 v
585 end
ae4735db
C
586
587 | _ ->
588
34e49164
C
589 (* typedef_fix1 *)
590 let v = match v with
ae4735db
C
591 | Parser_c.TIdent (s, ii) ->
592 if
593 LP.is_typedef s &&
485bce71 594 not (!Flag_parsing_c.disable_add_typedef) &&
b1b2de81 595 pass =|= 1
34e49164
C
596 then Parser_c.TypedefIdent (s, ii)
597 else Parser_c.TIdent (s, ii)
598 | x -> x
599 in
ae4735db 600
485bce71
C
601 let v = Parsing_hacks.lookahead ~pass
602 (clean_for_lookahead (v::tr.rest_clean))
603 tr.passed_clean in
34e49164
C
604
605 tr.passed <- v::tr.passed;
ae4735db 606
485bce71 607 (* the lookahead may have changed the status of the token and
34e49164 608 * consider it as a comment, for instance some #include are
485bce71 609 * turned into comments, hence this code. *)
34e49164 610 match v with
485bce71 611 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
ae4735db 612 | v ->
34e49164
C
613 tr.passed_clean <- v::tr.passed_clean;
614 v
615 )
616 end
617
618
708f4980
C
619let max_pass = 4
620
ae4735db 621let get_one_elem ~pass tr (file, filelines) =
485bce71
C
622
623 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
708f4980 624 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
485bce71
C
625
626 (* normally have to do that only when come from an exception in which
ae4735db 627 * case the dt() may not have been done
485bce71
C
628 * TODO but if was in scoped scope ? have to let only the last scope
629 * so need do a LP.lexer_reset_typedef ();
630 *)
ae4735db 631 LP.enable_typedef();
485bce71
C
632 LP._lexer_hint := (LP.default_hint ());
633 LP.save_typedef_state();
634
635 tr.passed <- [];
636
abad11c5 637 let lexbuf_fake = Lexing.from_function (fun buf n -> raise (Impossible 81)) in
ae4735db
C
638
639 (try
485bce71
C
640 (* -------------------------------------------------- *)
641 (* Call parser *)
642 (* -------------------------------------------------- *)
ae4735db 643 Common.profile_code_exclusif "YACC" (fun () ->
c491d8ee 644 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
485bce71 645 )
ae4735db 646 with e ->
485bce71
C
647 LP.restore_typedef_state();
648
649 (* must keep here, before the code that adjusts the tr fields *)
650 let line_error = TH.line_of_tok tr.current in
708f4980
C
651
652 let passed_before_error = tr.passed in
653 let current = tr.current in
485bce71 654 (* error recovery, go to next synchro point *)
ae4735db 655 let (passed', rest') =
978fd7e5 656 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
485bce71
C
657 tr.rest <- rest';
658 tr.passed <- passed';
ae4735db 659
485bce71
C
660 tr.current <- List.hd passed';
661 tr.passed_clean <- []; (* enough ? *)
662 (* with error recovery, rest and rest_clean may not be in sync *)
663 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
ae4735db
C
664
665
666 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
667 Right (info_of_bads, line_error,
668 tr.passed, passed_before_error,
708f4980 669 current, e)
485bce71
C
670 )
671
672
673
978fd7e5
C
674(* Macro problem recovery *)
675(* used by the multi-pass error recovery expand-on-demand *)
676(*
ae4735db
C
677val candidate_macros_in_passed:
678 defs: (string, define_def) Hashtbl.t ->
978fd7e5
C
679 Parser_c.token list -> (string * define_def) list
680*)
681
ae4735db 682let candidate_macros_in_passed2 ~defs passed =
978fd7e5
C
683 let res = ref [] in
684 let res2 = ref [] in
685
686 passed +> List.iter (function
687 | Parser_c.TIdent (s,_)
688 (* bugfix: may have to undo some infered things *)
689 | Parser_c.TMacroIterator (s,_)
690 | Parser_c.TypedefIdent (s,_)
ae4735db 691 ->
978fd7e5 692 (match Common.hfind_option s defs with
ae4735db
C
693 | Some def ->
694 if s ==~ Parsing_hacks.regexp_macro
978fd7e5
C
695 then
696 (* pr2 (spf "candidate: %s" s); *)
ae4735db
C
697 Common.push2 (s, def) res
698 else
978fd7e5
C
699 Common.push2 (s, def) res2
700 | None -> ()
701 )
702
703 | _ -> ()
704 );
ae4735db
C
705 if null !res
706 then !res2
978fd7e5
C
707 else !res
708
ae4735db
C
709let candidate_macros_in_passed ~defs b =
710 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 711 candidate_macros_in_passed2 ~defs b)
ae4735db 712
978fd7e5
C
713
714
715
716
717let find_optional_macro_to_expand2 ~defs toks =
718
719 let defs = Common.hash_of_list defs in
720
c491d8ee 721 let toks = toks +> Common.tail_map (function
978fd7e5
C
722
723 (* special cases to undo *)
ae4735db 724 | Parser_c.TMacroIterator (s, ii) ->
978fd7e5
C
725 if Hashtbl.mem defs s
726 then Parser_c.TIdent (s, ii)
727 else Parser_c.TMacroIterator (s, ii)
728
ae4735db 729 | Parser_c.TypedefIdent (s, ii) ->
978fd7e5
C
730 if Hashtbl.mem defs s
731 then Parser_c.TIdent (s, ii)
732 else Parser_c.TypedefIdent (s, ii)
733
734 | x -> x
735 ) in
736
737 let tokens = toks in
738 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
739
740 (* just calling apply_macro_defs and having a specialized version
ae4735db 741 * of the code in fix_tokens_cpp is not enough as some work such
978fd7e5
C
742 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
743 * will not get the chance to be run on the new expanded tokens.
ae4735db 744 * Hence even if it's expensive, it's currently better to
978fd7e5
C
745 * just call directly fix_tokens_cpp again here.
746
747 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
748 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
749 let paren_grouped = TV.mk_parenthised cleaner in
750 Cpp_token_c.apply_macro_defs
751 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
752 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
753 defs paren_grouped;
754 (* because the before field is used by apply_macro_defs *)
ae4735db
C
755 tokens2 := TV.rebuild_tokens_extented !tokens2;
756 Parsing_hacks.insert_virtual_positions
978fd7e5
C
757 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
758 *)
ae4735db
C
759let find_optional_macro_to_expand ~defs a =
760 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 761 find_optional_macro_to_expand2 ~defs a)
ae4735db 762
978fd7e5
C
763
764
765
766
767(*****************************************************************************)
768(* Main entry points *)
769(*****************************************************************************)
770
ae4735db 771let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
772 ref (Hashtbl.create 101)
773
ae4735db 774let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
775 ref (Hashtbl.create 101)
776
777
778(* can not be put in parsing_hack, cos then mutually recursive problem as
779 * we also want to parse the standard.h file.
780 *)
ae4735db 781let init_defs_macros std_h =
978fd7e5
C
782 if not (Common.lfile_exists std_h)
783 then pr2 ("warning: Can't find default macro file: " ^ std_h)
784 else begin
785 pr2 ("init_defs: " ^ std_h);
786 _defs := Common.hash_of_list (extract_macros std_h);
787 end
788
ae4735db 789let init_defs_builtins file_h =
978fd7e5
C
790 if not (Common.lfile_exists file_h)
791 then pr2 ("warning: Can't find macro file: " ^ file_h)
792 else begin
793 pr2 ("init_defs_builtins: " ^ file_h);
ae4735db 794 _defs_builtins :=
978fd7e5
C
795 Common.hash_of_list (extract_macros file_h);
796 end
797
798
799
800type info_item = string * Parser_c.token list
801
802type program2 = toplevel2 list
ca417fcf
C
803 and extended_program2 = toplevel2 list *
804 (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
805 (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
806 and toplevel2 = Ast_c.toplevel * info_item
978fd7e5 807
ae4735db 808let program_of_program2 xs =
978fd7e5
C
809 xs +> List.map fst
810
ae4735db
C
811let with_program2 f program2 =
812 program2
813 +> Common.unzip
814 +> (fun (program, infos) ->
978fd7e5
C
815 f program, infos
816 )
817 +> Common.uncurry Common.zip
818
819
820
821
822
485bce71 823
34e49164 824(* note: as now we go in 2 passes, there is first all the error message of
485bce71 825 * the lexer, and then the error of the parser. It is not anymore
34e49164 826 * interwinded.
ae4735db 827 *
34e49164
C
828 * !!!This function use refs, and is not reentrant !!! so take care.
829 * It use globals defined in Lexer_parser and also the _defs global
ae4735db
C
830 * in parsing_hack.ml.
831 *
485bce71
C
832 * This function uses internally some semi globals in the
833 * tokens_stat record and parsing_stat record.
34e49164
C
834 *)
835
ca417fcf 836let parse_print_error_heuristic2 saved_typedefs saved_macros file =
34e49164 837
91eba41f 838 let filelines = Common.cat_array file in
485bce71
C
839 let stat = Parsing_stat.default_stat file in
840
34e49164
C
841 (* -------------------------------------------------- *)
842 (* call lexer and get all the tokens *)
843 (* -------------------------------------------------- *)
993936c0 844
ca417fcf 845 LP.lexer_reset_typedef saved_typedefs;
485bce71 846 Parsing_hacks.ifdef_paren_cnt := 0;
708f4980 847
485bce71 848 let toks_orig = tokens file in
978fd7e5 849 let toks = Parsing_hacks.fix_tokens_define toks_orig in
708f4980 850 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
34e49164 851
708f4980 852 (* expand macros on demand trick, preparation phase *)
ae4735db
C
853 let macros =
854 Common.profile_code "MACRO mgmt prep 1" (fun () ->
ca417fcf
C
855 let macros =
856 match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
708f4980
C
857 (* include also builtins as some macros may generate some builtins too
858 * like __decl_spec or __stdcall
859 *)
ae4735db 860 !_defs_builtins +> Hashtbl.iter (fun s def ->
708f4980
C
861 Hashtbl.replace macros s def;
862 );
863 macros
864 )
865 in
ae4735db 866 Common.profile_code "MACRO mgmt prep 2" (fun () ->
978fd7e5 867 let local_macros = extract_macros file in
ae4735db 868 local_macros +> List.iter (fun (s, def) ->
708f4980
C
869 Hashtbl.replace macros s def;
870 );
871 );
34e49164 872
708f4980 873 let tr = mk_tokens_state toks in
485bce71
C
874
875 let rec loop tr =
34e49164
C
876
877 (* todo?: I am not sure that it represents current_line, cos maybe
878 * tr.current partipated in the previous parsing phase, so maybe tr.current
879 * is not the first token of the next parsing phase. Same with checkpoint2.
880 * It would be better to record when we have a } or ; in parser.mly,
881 * cos we know that they are the last symbols of external_declaration2.
485bce71
C
882 *
883 * bugfix: may not be equal to 'file' as after macro expansions we can
884 * start to parse a new entity from the body of a macro, for instance
885 * when parsing a define_machine() body, cf standard.h
34e49164
C
886 *)
887 let checkpoint = TH.line_of_tok tr.current in
485bce71 888 let checkpoint_file = TH.file_of_tok tr.current in
34e49164 889
485bce71 890 (* call the parser *)
ae4735db
C
891 let elem =
892 let pass1 =
893 Common.profile_code "Parsing: 1st pass" (fun () ->
708f4980
C
894 get_one_elem ~pass:1 tr (file, filelines)
895 ) in
485bce71
C
896 match pass1 with
897 | Left e -> Left e
ae4735db 898 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
708f4980
C
899 if !Flag_parsing_c.disable_multi_pass
900 then pass1
485bce71 901 else begin
ae4735db 902 Common.profile_code "Parsing: multi pass" (fun () ->
708f4980
C
903
904 pr2_err "parsing pass2: try again";
905 let toks = List.rev passed ++ tr.rest in
906 let new_tr = mk_tokens_state toks in
907 copy_tokens_state ~src:new_tr ~dst:tr;
908 let passx = get_one_elem ~pass:2 tr (file, filelines) in
909
910 (match passx with
911 | Left e -> passx
ae4735db
C
912 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
913 let candidates =
914 candidate_macros_in_passed ~defs:macros passed
708f4980 915 in
ae4735db 916
978fd7e5 917
708f4980
C
918 if is_define_passed passed || null candidates
919 then passx
920 else begin
921 (* todo factorize code *)
922
923 pr2_err "parsing pass3: try again";
924 let toks = List.rev passed ++ tr.rest in
ae4735db 925 let toks' =
708f4980
C
926 find_optional_macro_to_expand ~defs:candidates toks in
927 let new_tr = mk_tokens_state toks' in
928 copy_tokens_state ~src:new_tr ~dst:tr;
929 let passx = get_one_elem ~pass:3 tr (file, filelines) in
930
931 (match passx with
932 | Left e -> passx
ae4735db 933 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
708f4980
C
934 pr2_err "parsing pass4: try again";
935
ae4735db
C
936 let candidates =
937 candidate_macros_in_passed
938 ~defs:macros passed
978fd7e5 939 in
708f4980
C
940
941 let toks = List.rev passed ++ tr.rest in
ae4735db 942 let toks' =
708f4980
C
943 find_optional_macro_to_expand ~defs:candidates toks in
944 let new_tr = mk_tokens_state toks' in
945 copy_tokens_state ~src:new_tr ~dst:tr;
946 let passx = get_one_elem ~pass:4 tr (file, filelines) in
947 passx
948 )
949 end
950 )
951 )
485bce71
C
952 end
953 in
954
955
956 (* again not sure if checkpoint2 corresponds to end of bad region *)
957 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
958 let checkpoint2_file = TH.file_of_tok tr.current in
959
ae4735db 960 let diffline =
b1b2de81 961 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
ae4735db 962 then (checkpoint2 - checkpoint)
485bce71
C
963 else 0
964 (* TODO? so if error come in middle of something ? where the
965 * start token was from original file but synchro found in body
966 * of macro ? then can have wrong number of lines stat.
967 * Maybe simpler just to look at tr.passed and count
968 * the lines in the token from the correct file ?
969 *)
34e49164 970 in
ae4735db 971 let info = mk_info_item file (List.rev tr.passed) in
34e49164 972
485bce71 973 (* some stat updates *)
ae4735db 974 stat.Stat.commentized <-
485bce71
C
975 stat.Stat.commentized + count_lines_commentized (snd info);
976
ae4735db 977 let elem =
485bce71 978 match elem with
ae4735db 979 | Left e ->
91eba41f
C
980 stat.Stat.correct <- stat.Stat.correct + diffline;
981 e
ae4735db
C
982 | Right (info_of_bads, line_error, toks_of_bads,
983 _passed_before_error, cur, exn) ->
708f4980
C
984
985 let was_define = is_define_passed tr.passed in
ae4735db 986
708f4980
C
987 if was_define && !Flag_parsing_c.filter_msg_define_error
988 then ()
989 else begin
990
991 (match exn with
ae4735db 992 | Lexer_c.Lexical _
708f4980
C
993 | Parsing.Parse_error
994 | Semantic_c.Semantic _ -> ()
995 | e -> raise e
996 );
997
998 if !Flag_parsing_c.show_parsing_error
ae4735db 999 then begin
708f4980
C
1000 (match exn with
1001 (* Lexical is not anymore launched I think *)
ae4735db 1002 | Lexer_c.Lexical s ->
708f4980 1003 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
ae4735db 1004 | Parsing.Parse_error ->
708f4980 1005 pr2 ("parse error \n = " ^ error_msg_tok cur)
ae4735db 1006 | Semantic_c.Semantic (s, i) ->
708f4980 1007 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
abad11c5 1008 | e -> raise (Impossible 82)
708f4980
C
1009 );
1010 (* bugfix: *)
ae4735db 1011 if (checkpoint_file =$= checkpoint2_file) &&
708f4980
C
1012 checkpoint_file =$= file
1013 then print_bad line_error (checkpoint, checkpoint2) filelines
1014 else pr2 "PB: bad: but on tokens not from original file"
1015 end;
1016
ae4735db
C
1017
1018 let pbline =
1019 toks_of_bads
708f4980 1020 +> Common.filter (TH.is_same_line_or_close line_error)
ae4735db 1021 +> Common.filter TH.is_ident_like
708f4980 1022 in
ae4735db 1023 let error_info =
708f4980
C
1024 (pbline +> List.map TH.str_of_tok), line_error
1025 in
ae4735db 1026 stat.Stat.problematic_lines <-
708f4980
C
1027 error_info::stat.Stat.problematic_lines;
1028
1029 end;
1030
91eba41f
C
1031 if was_define && !Flag_parsing_c.filter_define_error
1032 then stat.Stat.correct <- stat.Stat.correct + diffline
1033 else stat.Stat.bad <- stat.Stat.bad + diffline;
1034
485bce71
C
1035 Ast_c.NotParsedCorrectly info_of_bads
1036 in
34e49164
C
1037
1038 (match elem with
1039 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
485bce71 1040 | xs -> (xs, info):: loop tr (* recurse *)
34e49164
C
1041 )
1042 in
485bce71 1043 let v = loop tr in
978fd7e5 1044 let v = with_program2 Parsing_consistency_c.consistency_checking v in
ca417fcf
C
1045 let v =
1046 let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1047 Common.clean_scope_h new_td;
1048 (v, !new_td, macros) in
34e49164
C
1049 (v, stat)
1050
1051
ca417fcf
C
1052let time_total_parsing a b =
1053 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
485bce71 1054
ca417fcf
C
1055let parse_print_error_heuristic a b =
1056 Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
485bce71 1057
34e49164
C
1058
1059(* alias *)
ca417fcf
C
1060let parse_c_and_cpp a =
1061 let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1062let parse_c_and_cpp_keep_typedefs td macs a =
1063 parse_print_error_heuristic td macs a
34e49164
C
1064
1065(*****************************************************************************)
1066(* Same but faster cos memoize stuff *)
1067(*****************************************************************************)
ae4735db 1068let parse_cache file =
ca417fcf
C
1069 if not !Flag_parsing_c.use_cache
1070 then parse_print_error_heuristic None None file
ae4735db 1071 else
17ba0788 1072 let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
ae4735db 1073 let need_no_changed_files =
34e49164 1074 (* should use Sys.argv.(0), would be safer. *)
485bce71
C
1075
1076 [
1077 (* TOFIX
1078 Config.path ^ "/parsing_c/c_parser.cma";
ae4735db 1079 (* we may also depend now on the semantic patch because
485bce71 1080 the SP may use macro and so we will disable some of the
ae4735db 1081 macro expansions from standard.h.
485bce71
C
1082 *)
1083 !Config.std_h;
1084 *)
f3c4ece6 1085 ] in
ae4735db 1086 let need_no_changed_variables =
34e49164 1087 (* could add some of the flags of flag_parsing_c.ml *)
f3c4ece6 1088 [] in
5427db06 1089 Common.cache_computation_robust_in_dir
f3c4ece6 1090 !Flag_parsing_c.cache_prefix file ".ast_raw"
ae4735db 1091 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
f3c4ece6
C
1092 (fun () ->
1093 (* check whether to clear the cache *)
1094 (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1095 (None,_) | (_,None) -> ()
1096 | (Some limit,Some prefix) ->
1097 let count =
1098 Common.cmd_to_list
17ba0788
C
1099 (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1100 prefix prefix) in
f3c4ece6
C
1101 match count with
1102 [c] ->
1103 if int_of_string c >= limit
1104 then
1105 let _ =
1106 Sys.command
17ba0788
C
1107 (Printf.sprintf
1108 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1109 prefix) in
f3c4ece6
C
1110 ()
1111 | _ -> ());
1112 (* recompute *)
1113 parse_print_error_heuristic None None file)
34e49164
C
1114
1115
1116
1117(*****************************************************************************)
485bce71 1118(* Some special cases *)
34e49164
C
1119(*****************************************************************************)
1120
485bce71 1121let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
708f4980
C
1122 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1123 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1124 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1125 program +> Common.find_some (fun (e,_) ->
485bce71
C
1126 match e with
1127 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1128 | _ -> None
1129 )
1130
1131let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
708f4980
C
1132 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1133 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1134 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1135 program +> Common.find_some (fun (e,_) ->
485bce71 1136 match e with
ae4735db 1137 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
485bce71 1138 (match compound with
ae4735db 1139 | [Ast_c.StmtElem st] ->
708f4980
C
1140 (match Ast_c.unwrap_st st with
1141 | Ast_c.ExprStatement (Some e) -> Some e
1142 | _ -> None
1143 )
485bce71
C
1144 | _ -> None
1145 )
1146 | _ -> None
1147 )