Coccinelle release 1.0.0-rc12
[bpt/coccinelle.git] / parsing_c / parse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
34e49164
C
19module LP = Lexer_parser
20
485bce71
C
21module Stat = Parsing_stat
22
34e49164
C
23(*****************************************************************************)
24(* Wrappers *)
25(*****************************************************************************)
ae4735db
C
26let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
34e49164
C
28(*****************************************************************************)
29(* Helpers *)
30(*****************************************************************************)
31
ae4735db
C
32let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34e49164 34
ae4735db 35let token_to_strpos tok =
34e49164
C
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
ae4735db 39let mk_info_item2 filename toks =
34e49164 40 let buf = Buffer.create 100 in
ae4735db 41 let s =
34e49164
C
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
ae4735db 44 toks +> List.iter (fun tok ->
34e49164 45 match TH.pinfo_of_tok tok with
ae4735db 46 | Ast_c.OriginTok _ ->
91eba41f 47 Buffer.add_string buf (TH.str_of_tok tok)
ae4735db 48 | Ast_c.AbstractLineTok _ ->
91eba41f 49 raise Impossible
34e49164
C
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
ae4735db 55 (s, toks)
34e49164 56
ae4735db
C
57let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
34e49164
C
59 (fun () -> mk_info_item2 a b)
60
61
ae4735db 62let info_same_line line xs =
b1b2de81 63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
34e49164 64
34e49164 65
978fd7e5
C
66(* move in cpp_token_c ? *)
67let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
ae4735db
C
69 if List.length xs >= 2
70 then
978fd7e5 71 (match Common.head_middle_tail xs with
ae4735db 72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
978fd7e5
C
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
ae4735db 78 false
978fd7e5
C
79 end
80
81
82(*****************************************************************************)
83(* Error diagnostic *)
84(*****************************************************************************)
85
ae4735db 86let error_msg_tok tok =
978fd7e5
C
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
ae4735db 89 then Common.error_message file (token_to_strpos tok)
978fd7e5
C
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
ae4735db 93let print_bad line_error (start_line, end_line) filelines =
978fd7e5
C
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
ae4735db
C
97 for i = start_line to end_line do
98 let line = filelines.(i) in
978fd7e5 99
ae4735db
C
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
978fd7e5
C
103 done
104 end
105
106
34e49164
C
107(*****************************************************************************)
108(* Stats on what was passed/commentized *)
109(*****************************************************************************)
110
c491d8ee 111let commentized xs = xs +> Common.tail_map_filter (function
ae4735db 112 | Parser_c.TCommentCpp (cppkind, ii) ->
485bce71 113 let s = Ast_c.str_of_info ii in
ae4735db
C
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
485bce71 116 | 0 -> false
ae4735db 117 | 1 ->
0708f913 118 List.mem cppkind [Token_c.CppAttr]
ae4735db 119 ||
485bce71 120 (s =~ "__.*")
ae4735db 121 | 2 ->
0708f913 122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
ae4735db 123 ||
485bce71 124 (s =~ "__.*")
ae4735db 125 | 3 ->
feec80c3
C
126 (match cppkind with
127 Token_c.CppAttr | Token_c.CppPassingNormal
128 | Token_c.CppDirective | Token_c.CppIfDirective _ -> true
129 | _ -> false)
ae4735db 130 ||
485bce71 131 (s =~ "__.*")
ae4735db 132 | 4 ->
feec80c3
C
133 List.mem cppkind
134 [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
ae4735db 135 ||
485bce71
C
136 (s =~ "__.*")
137
138
ae4735db 139 | 5 ->
feec80c3
C
140 (match cppkind with
141 Token_c.CppAttr | Token_c.CppPassingNormal
142 | Token_c.CppDirective | Token_c.CppIfDirective _
143 | Token_c.CppMacro -> true
144 | _ -> false)
ae4735db 145 ||
485bce71
C
146 (s =~ "__.*")
147
148
149
150
151 | _ -> failwith "not valid level passing number"
152 in
153 if legal_passing then None else Some (ii.Ast_c.pinfo)
154
155 (*
ae4735db 156 | Ast_c.CppOther ->
34e49164
C
157 (match s with
158 | s when s =~ "KERN_.*" -> None
159 | s when s =~ "__.*" -> None
ae4735db 160 | _ ->
485bce71 161 Some (ii.Ast_c.pinfo)
34e49164 162 )
485bce71
C
163 *)
164
ae4735db 165
34e49164 166 | Parser_c.TCommentMisc ii
ae4735db 167 | Parser_c.TAction ii
34e49164
C
168 ->
169 Some (ii.Ast_c.pinfo)
ae4735db 170 | _ ->
34e49164
C
171 None
172 )
ae4735db
C
173
174let count_lines_commentized xs =
34e49164
C
175 let line = ref (-1) in
176 let count = ref 0 in
177 begin
178 commentized xs +>
179 List.iter
180 (function
ae4735db 181 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
182 let newline = pinfo.Common.line in
183 if newline <> !line
184 then begin
185 line := newline;
186 incr count
187 end
188 | _ -> ());
189 !count
190 end
191
192
193
ae4735db 194let print_commentized xs =
34e49164
C
195 let line = ref (-1) in
196 begin
197 let ys = commentized xs in
198 ys +>
199 List.iter
200 (function
ae4735db 201 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
202 let newline = pinfo.Common.line in
203 let s = pinfo.Common.str in
ae4735db
C
204 let s = Str.global_substitute
205 (Str.regexp "\n") (fun s -> "") s
34e49164 206 in
b1b2de81 207 if newline =|= !line
34e49164
C
208 then prerr_string (s ^ " ")
209 else begin
ae4735db
C
210 if !line =|= -1
211 then pr2_no_nl "passed:"
34e49164
C
212 else pr2_no_nl "\npassed:";
213 line := newline;
214 pr2_no_nl (s ^ " ");
215 end
216 | _ -> ());
217 if not (null ys) then pr2 "";
218 end
ae4735db 219
34e49164
C
220
221
222
223(*****************************************************************************)
224(* Lexing only *)
225(*****************************************************************************)
226
227(* called by parse_print_error_heuristic *)
ae4735db 228let tokens2 file =
708f4980 229 let table = Common.full_charpos_to_pos_large file in
34e49164 230
ae4735db 231 Common.with_open_infile file (fun chan ->
34e49164 232 let lexbuf = Lexing.from_channel chan in
ae4735db
C
233 try
234 let rec tokens_aux acc =
34e49164
C
235 let tok = Lexer_c.token lexbuf in
236 (* fill in the line and col information *)
ae4735db 237 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
34e49164
C
238 { ii with Ast_c.pinfo=
239 (* could assert pinfo.filename = file ? *)
240 match Ast_c.pinfo_of_info ii with
241 Ast_c.OriginTok pi ->
708f4980 242 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
34e49164 243 | Ast_c.ExpandedTok (pi,vpi) ->
708f4980 244 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
34e49164
C
245 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
246 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
247 })
248 in
249
250 if TH.is_eof tok
251 then List.rev (tok::acc)
252 else tokens_aux (tok::acc)
253 in
254 tokens_aux []
255 with
ae4735db
C
256 | Lexer_c.Lexical s ->
257 failwith ("lexical error " ^ s ^ "\n =" ^
34e49164
C
258 (Common.error_message file (lexbuf_to_strpos lexbuf)))
259 | e -> raise e
260 )
261
ae4735db
C
262let time_lexing ?(profile=true) a =
263 if profile
485bce71 264 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
951c7801 265 else tokens2 a
ae4735db 266let tokens ?profile a =
485bce71 267 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
34e49164
C
268
269
ae4735db 270let tokens_of_string string =
34e49164 271 let lexbuf = Lexing.from_string string in
ae4735db
C
272 try
273 let rec tokens_s_aux () =
34e49164
C
274 let tok = Lexer_c.token lexbuf in
275 if TH.is_eof tok
276 then [tok]
277 else tok::(tokens_s_aux ())
278 in
279 tokens_s_aux ()
280 with
281 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
282 | e -> raise e
283
284
285(*****************************************************************************)
286(* Parsing, but very basic, no more used *)
287(*****************************************************************************)
288
289(*
290 * !!!Those function use refs, and are not reentrant !!! so take care.
291 * It use globals defined in Lexer_parser.
ae4735db 292 *
34e49164
C
293 * update: because now lexer return comments tokens, those functions
294 * may not work anymore.
295 *)
296
ae4735db 297let parse file =
34e49164
C
298 let lexbuf = Lexing.from_channel (open_in file) in
299 let result = Parser_c.main Lexer_c.token lexbuf in
300 result
301
302
ae4735db 303let parse_print_error file =
34e49164
C
304 let chan = (open_in file) in
305 let lexbuf = Lexing.from_channel chan in
306
307 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
ae4735db 308 try
34e49164 309 lexbuf +> Parser_c.main Lexer_c.token
ae4735db
C
310 with
311 | Lexer_c.Lexical s ->
34e49164 312 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
ae4735db 313 | Parsing.Parse_error ->
34e49164 314 failwith ("parse error \n = " ^ error_msg ())
ae4735db 315 | Semantic_c.Semantic (s, i) ->
34e49164
C
316 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
317 | e -> raise e
318
319
320
321
322(*****************************************************************************)
323(* Parsing subelements, useful to debug parser *)
324(*****************************************************************************)
325
326(*
327 * !!!Those function use refs, and are not reentrant !!! so take care.
328 * It use globals defined in Lexer_parser.
329 *)
330
331
ae4735db
C
332(* old:
333 * let parse_gen parsefunc s =
34e49164
C
334 * let lexbuf = Lexing.from_string s in
335 * let result = parsefunc Lexer_c.token lexbuf in
336 * result
337 *)
338
ae4735db 339let parse_gen parsefunc s =
485bce71 340 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
34e49164
C
341
342
343 (* Why use this lexing scheme ? Why not classically give lexer func
ae4735db 344 * to parser ? Because I now keep comments in lexer. Could
34e49164
C
345 * just do a simple wrapper that when comment ask again for a token,
346 * but maybe simpler to use cur_tok technique.
347 *)
348 let all_tokens = ref toks in
349 let cur_tok = ref (List.hd !all_tokens) in
350
ae4735db
C
351 let lexer_function =
352 (fun _ ->
34e49164 353 if TH.is_eof !cur_tok
708f4980 354 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
34e49164
C
355 else
356 let v = Common.pop2 all_tokens in
357 cur_tok := v;
358 !cur_tok
ae4735db 359 )
34e49164
C
360 in
361 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
362 let result = parsefunc lexer_function lexbuf_fake in
363 result
364
365
366let type_of_string = parse_gen Parser_c.type_name
367let statement_of_string = parse_gen Parser_c.statement
368let expression_of_string = parse_gen Parser_c.expr
369
370(* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
371
372
373
374
375
34e49164 376(*****************************************************************************)
485bce71 377(* Parsing default define macros, usually in a standard.h file *)
34e49164
C
378(*****************************************************************************)
379
ae4735db
C
380let extract_macros2 file =
381 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
708f4980
C
382 Flag_parsing_c.verbose_lexing := false;
383 let toks = tokens ~profile:false file in
978fd7e5
C
384 let toks = Parsing_hacks.fix_tokens_define toks in
385 Cpp_token_c.extract_macros toks
708f4980 386 )
34e49164 387
ae4735db 388let extract_macros a =
978fd7e5 389 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
485bce71 390
34e49164
C
391
392(*****************************************************************************)
978fd7e5 393(* Helper for main entry point *)
34e49164
C
394(*****************************************************************************)
395
34e49164
C
396
397(* The use of local refs (remaining_tokens, passed_tokens, ...) makes
398 * possible error recovery. Indeed, they allow to skip some tokens and
399 * still be able to call again the ocamlyacc parser. It is ugly code
400 * because we cant modify ocamllex and ocamlyacc. As we want some
401 * extended lexing tricks, we have to use such refs.
ae4735db 402 *
34e49164
C
403 * Those refs are now also used for my lalr(k) technique. Indeed They
404 * store the futur and previous tokens that were parsed, and so
405 * provide enough context information for powerful lex trick.
ae4735db 406 *
34e49164 407 * - passed_tokens_last_ckp stores the passed tokens since last
485bce71 408 * checkpoint. Used for NotParsedCorrectly and also to build the
34e49164
C
409 * info_item attached to each program_element.
410 * - passed_tokens_clean is used for lookahead, in fact for lookback.
411 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
412 * contain some comments and so would make pattern matching difficult
ae4735db 413 * in lookahead. Hence this variable. We would like also to get rid
34e49164
C
414 * of cpp instruction because sometimes a cpp instruction is between
415 * two tokens and makes a pattern matching fail. But lookahead also
416 * transform some cpp instruction (in comment) so can't remove them.
ae4735db 417 *
34e49164
C
418 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
419 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
420 * comment-tokens.
ae4735db 421 *
34e49164 422 * Normally we have:
ae4735db 423 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
34e49164 424 * after the call to pop2.
ae4735db 425 * toks = (reverse passed_tok) ++ remaining_tokens
34e49164
C
426 * at the and of the lexer_function call.
427 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
428 * At the end of lexer_function call, cur_tok overlap with passed_tok.
ae4735db 429 *
34e49164 430 * convention: I use "tr" for "tokens refs"
ae4735db 431 *
485bce71
C
432 * I now also need this lexing trick because the lexer return comment
433 * tokens.
34e49164
C
434 *)
435
436type tokens_state = {
437 mutable rest : Parser_c.token list;
438 mutable rest_clean : Parser_c.token list;
439 mutable current : Parser_c.token;
440 (* it's passed since last "checkpoint", not passed from the beginning *)
441 mutable passed : Parser_c.token list;
442 mutable passed_clean : Parser_c.token list;
443}
708f4980 444
ae4735db
C
445let mk_tokens_state toks =
446 {
708f4980
C
447 rest = toks;
448 rest_clean = (toks +> List.filter TH.is_not_comment);
449 current = (List.hd toks);
ae4735db 450 passed = [];
708f4980
C
451 passed_clean = [];
452 }
453
454
455
ae4735db 456let clone_tokens_state tr =
485bce71
C
457 { rest = tr.rest;
458 rest_clean = tr.rest_clean;
459 current = tr.current;
460 passed = tr.passed;
461 passed_clean = tr.passed_clean;
462 }
ae4735db 463let copy_tokens_state ~src ~dst =
485bce71
C
464 dst.rest <- src.rest;
465 dst.rest_clean <- src.rest_clean;
466 dst.current <- src.current;
467 dst.passed <- src.passed;
468 dst.passed_clean <- src.passed_clean;
469 ()
470
708f4980 471(* todo? agglomerate the x##b ? *)
485bce71
C
472let rec filter_noise n xs =
473 match n, xs with
474 | _, [] -> []
475 | 0, xs -> xs
ae4735db 476 | n, x::xs ->
485bce71 477 (match x with
ae4735db 478 | Parser_c.TMacroAttr _ ->
485bce71 479 filter_noise (n-1) xs
ae4735db 480 | _ ->
485bce71
C
481 x::filter_noise (n-1) xs
482 )
483
ae4735db 484let clean_for_lookahead xs =
485bce71
C
485 match xs with
486 | [] -> []
487 | [x] -> [x]
ae4735db 488 | x::xs ->
485bce71
C
489 x::filter_noise 10 xs
490
34e49164 491
485bce71 492
ae4735db 493(* Hacked lex. This function use refs passed by parse_print_error_heuristic
485bce71
C
494 * tr means token refs.
495 *)
ae4735db 496let rec lexer_function ~pass tr = fun lexbuf ->
34e49164 497 match tr.rest with
708f4980 498 | [] -> pr2_err "ALREADY AT END"; tr.current
ae4735db 499 | v::xs ->
34e49164
C
500 tr.rest <- xs;
501 tr.current <- v;
502
503 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
504
505 if TH.is_comment v
506 then begin
507 tr.passed <- v::tr.passed;
485bce71 508 lexer_function ~pass tr lexbuf
34e49164
C
509 end
510 else begin
511 let x = List.hd tr.rest_clean in
512 tr.rest_clean <- List.tl tr.rest_clean;
b1b2de81 513 assert (x =*= v);
ae4735db 514
34e49164 515 (match v with
113803cf 516
ae4735db 517 (* fix_define1.
113803cf
C
518 *
519 * Why not in parsing_hacks lookahead and do passing like
ae4735db
C
520 * I do for some ifdef directives ? Because here I also need to
521 * generate some tokens sometimes and so I need access to the
113803cf 522 * tr.passed, tr.rest, etc.
485bce71 523 *)
9f8e26f4 524 | Parser_c.TDefine (tok) ->
ae4735db 525 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 526 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 527 then begin
485bce71 528 incr Stat.nDefinePassing;
34e49164 529 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
3a314143
C
530 let v' =
531 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
532 in
533 tr.passed <- v'::tr.passed;
534 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
535 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
536 lexer_function ~pass tr lexbuf
537 end
538 else begin
539 tr.passed <- v::tr.passed;
540 tr.passed_clean <- v::tr.passed_clean;
541 v
542 end
543
544 | Parser_c.TUndef (tok) ->
545 if not (LP.current_context () =*= LP.InTopLevel) &&
546 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
547 then begin
548 incr Stat.nUndefPassing;
549 pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
550 let v' =
551 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
34e49164
C
552 in
553 tr.passed <- v'::tr.passed;
978fd7e5
C
554 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
555 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
485bce71 556 lexer_function ~pass tr lexbuf
34e49164
C
557 end
558 else begin
559 tr.passed <- v::tr.passed;
560 tr.passed_clean <- v::tr.passed_clean;
561 v
562 end
ae4735db
C
563
564 | Parser_c.TInclude (includes, filename, inifdef, info) ->
b1b2de81 565 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 566 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 567 then begin
485bce71 568 incr Stat.nIncludePassing;
34e49164 569 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
0708f913 570 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
34e49164 571 tr.passed <- v::tr.passed;
485bce71 572 lexer_function ~pass tr lexbuf
34e49164
C
573 end
574 else begin
ae4735db 575 let (v,new_tokens) =
3a314143 576 Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
ae4735db 577 let new_tokens_clean =
34e49164
C
578 new_tokens +> List.filter TH.is_not_comment in
579
580 tr.passed <- v::tr.passed;
581 tr.passed_clean <- v::tr.passed_clean;
582 tr.rest <- new_tokens ++ tr.rest;
583 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
584 v
585 end
ae4735db
C
586
587 | _ ->
588
34e49164
C
589 (* typedef_fix1 *)
590 let v = match v with
ae4735db
C
591 | Parser_c.TIdent (s, ii) ->
592 if
593 LP.is_typedef s &&
485bce71 594 not (!Flag_parsing_c.disable_add_typedef) &&
b1b2de81 595 pass =|= 1
34e49164
C
596 then Parser_c.TypedefIdent (s, ii)
597 else Parser_c.TIdent (s, ii)
598 | x -> x
599 in
ae4735db 600
485bce71
C
601 let v = Parsing_hacks.lookahead ~pass
602 (clean_for_lookahead (v::tr.rest_clean))
603 tr.passed_clean in
34e49164
C
604
605 tr.passed <- v::tr.passed;
ae4735db 606
485bce71 607 (* the lookahead may have changed the status of the token and
34e49164 608 * consider it as a comment, for instance some #include are
485bce71 609 * turned into comments, hence this code. *)
34e49164 610 match v with
485bce71 611 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
ae4735db 612 | v ->
34e49164
C
613 tr.passed_clean <- v::tr.passed_clean;
614 v
615 )
616 end
617
618
708f4980
C
619let max_pass = 4
620
34e49164 621
ae4735db 622let get_one_elem ~pass tr (file, filelines) =
485bce71
C
623
624 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
708f4980 625 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
485bce71
C
626
627 (* normally have to do that only when come from an exception in which
ae4735db 628 * case the dt() may not have been done
485bce71
C
629 * TODO but if was in scoped scope ? have to let only the last scope
630 * so need do a LP.lexer_reset_typedef ();
631 *)
ae4735db 632 LP.enable_typedef();
485bce71
C
633 LP._lexer_hint := (LP.default_hint ());
634 LP.save_typedef_state();
635
636 tr.passed <- [];
637
638 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
ae4735db
C
639
640 (try
485bce71
C
641 (* -------------------------------------------------- *)
642 (* Call parser *)
643 (* -------------------------------------------------- *)
ae4735db 644 Common.profile_code_exclusif "YACC" (fun () ->
c491d8ee 645 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
485bce71 646 )
ae4735db 647 with e ->
485bce71
C
648 LP.restore_typedef_state();
649
650 (* must keep here, before the code that adjusts the tr fields *)
651 let line_error = TH.line_of_tok tr.current in
708f4980
C
652
653 let passed_before_error = tr.passed in
654 let current = tr.current in
485bce71 655 (* error recovery, go to next synchro point *)
ae4735db 656 let (passed', rest') =
978fd7e5 657 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
485bce71
C
658 tr.rest <- rest';
659 tr.passed <- passed';
ae4735db 660
485bce71
C
661 tr.current <- List.hd passed';
662 tr.passed_clean <- []; (* enough ? *)
663 (* with error recovery, rest and rest_clean may not be in sync *)
664 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
ae4735db
C
665
666
667 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
668 Right (info_of_bads, line_error,
669 tr.passed, passed_before_error,
708f4980 670 current, e)
485bce71
C
671 )
672
673
674
978fd7e5
C
675(* Macro problem recovery *)
676(* used by the multi-pass error recovery expand-on-demand *)
677(*
ae4735db
C
678val candidate_macros_in_passed:
679 defs: (string, define_def) Hashtbl.t ->
978fd7e5
C
680 Parser_c.token list -> (string * define_def) list
681*)
682
ae4735db 683let candidate_macros_in_passed2 ~defs passed =
978fd7e5
C
684 let res = ref [] in
685 let res2 = ref [] in
686
687 passed +> List.iter (function
688 | Parser_c.TIdent (s,_)
689 (* bugfix: may have to undo some infered things *)
690 | Parser_c.TMacroIterator (s,_)
691 | Parser_c.TypedefIdent (s,_)
ae4735db 692 ->
978fd7e5 693 (match Common.hfind_option s defs with
ae4735db
C
694 | Some def ->
695 if s ==~ Parsing_hacks.regexp_macro
978fd7e5
C
696 then
697 (* pr2 (spf "candidate: %s" s); *)
ae4735db
C
698 Common.push2 (s, def) res
699 else
978fd7e5
C
700 Common.push2 (s, def) res2
701 | None -> ()
702 )
703
704 | _ -> ()
705 );
ae4735db
C
706 if null !res
707 then !res2
978fd7e5
C
708 else !res
709
ae4735db
C
710let candidate_macros_in_passed ~defs b =
711 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 712 candidate_macros_in_passed2 ~defs b)
ae4735db 713
978fd7e5
C
714
715
716
717
718let find_optional_macro_to_expand2 ~defs toks =
719
720 let defs = Common.hash_of_list defs in
721
c491d8ee 722 let toks = toks +> Common.tail_map (function
978fd7e5
C
723
724 (* special cases to undo *)
ae4735db 725 | Parser_c.TMacroIterator (s, ii) ->
978fd7e5
C
726 if Hashtbl.mem defs s
727 then Parser_c.TIdent (s, ii)
728 else Parser_c.TMacroIterator (s, ii)
729
ae4735db 730 | Parser_c.TypedefIdent (s, ii) ->
978fd7e5
C
731 if Hashtbl.mem defs s
732 then Parser_c.TIdent (s, ii)
733 else Parser_c.TypedefIdent (s, ii)
734
735 | x -> x
736 ) in
737
738 let tokens = toks in
739 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
740
741 (* just calling apply_macro_defs and having a specialized version
ae4735db 742 * of the code in fix_tokens_cpp is not enough as some work such
978fd7e5
C
743 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
744 * will not get the chance to be run on the new expanded tokens.
ae4735db 745 * Hence even if it's expensive, it's currently better to
978fd7e5
C
746 * just call directly fix_tokens_cpp again here.
747
748 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
749 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
750 let paren_grouped = TV.mk_parenthised cleaner in
751 Cpp_token_c.apply_macro_defs
752 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
753 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
754 defs paren_grouped;
755 (* because the before field is used by apply_macro_defs *)
ae4735db
C
756 tokens2 := TV.rebuild_tokens_extented !tokens2;
757 Parsing_hacks.insert_virtual_positions
978fd7e5
C
758 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
759 *)
ae4735db
C
760let find_optional_macro_to_expand ~defs a =
761 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 762 find_optional_macro_to_expand2 ~defs a)
ae4735db 763
978fd7e5
C
764
765
766
767
768(*****************************************************************************)
769(* Main entry points *)
770(*****************************************************************************)
771
ae4735db 772let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
773 ref (Hashtbl.create 101)
774
ae4735db 775let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
776 ref (Hashtbl.create 101)
777
778
779(* can not be put in parsing_hack, cos then mutually recursive problem as
780 * we also want to parse the standard.h file.
781 *)
ae4735db 782let init_defs_macros std_h =
978fd7e5
C
783 if not (Common.lfile_exists std_h)
784 then pr2 ("warning: Can't find default macro file: " ^ std_h)
785 else begin
786 pr2 ("init_defs: " ^ std_h);
787 _defs := Common.hash_of_list (extract_macros std_h);
788 end
789
ae4735db 790let init_defs_builtins file_h =
978fd7e5
C
791 if not (Common.lfile_exists file_h)
792 then pr2 ("warning: Can't find macro file: " ^ file_h)
793 else begin
794 pr2 ("init_defs_builtins: " ^ file_h);
ae4735db 795 _defs_builtins :=
978fd7e5
C
796 Common.hash_of_list (extract_macros file_h);
797 end
798
799
800
801type info_item = string * Parser_c.token list
802
803type program2 = toplevel2 list
ca417fcf
C
804 and extended_program2 = toplevel2 list *
805 (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
806 (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
807 and toplevel2 = Ast_c.toplevel * info_item
978fd7e5 808
ae4735db 809let program_of_program2 xs =
978fd7e5
C
810 xs +> List.map fst
811
ae4735db
C
812let with_program2 f program2 =
813 program2
814 +> Common.unzip
815 +> (fun (program, infos) ->
978fd7e5
C
816 f program, infos
817 )
818 +> Common.uncurry Common.zip
819
820
821
822
823
485bce71 824
34e49164 825(* note: as now we go in 2 passes, there is first all the error message of
485bce71 826 * the lexer, and then the error of the parser. It is not anymore
34e49164 827 * interwinded.
ae4735db 828 *
34e49164
C
829 * !!!This function use refs, and is not reentrant !!! so take care.
830 * It use globals defined in Lexer_parser and also the _defs global
ae4735db
C
831 * in parsing_hack.ml.
832 *
485bce71
C
833 * This function uses internally some semi globals in the
834 * tokens_stat record and parsing_stat record.
34e49164
C
835 *)
836
ca417fcf 837let parse_print_error_heuristic2 saved_typedefs saved_macros file =
34e49164 838
91eba41f 839 let filelines = Common.cat_array file in
485bce71
C
840 let stat = Parsing_stat.default_stat file in
841
34e49164
C
842 (* -------------------------------------------------- *)
843 (* call lexer and get all the tokens *)
844 (* -------------------------------------------------- *)
993936c0 845
ca417fcf 846 LP.lexer_reset_typedef saved_typedefs;
485bce71 847 Parsing_hacks.ifdef_paren_cnt := 0;
708f4980 848
485bce71 849 let toks_orig = tokens file in
978fd7e5 850 let toks = Parsing_hacks.fix_tokens_define toks_orig in
708f4980 851 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
34e49164 852
708f4980 853 (* expand macros on demand trick, preparation phase *)
ae4735db
C
854 let macros =
855 Common.profile_code "MACRO mgmt prep 1" (fun () ->
ca417fcf
C
856 let macros =
857 match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
708f4980
C
858 (* include also builtins as some macros may generate some builtins too
859 * like __decl_spec or __stdcall
860 *)
ae4735db 861 !_defs_builtins +> Hashtbl.iter (fun s def ->
708f4980
C
862 Hashtbl.replace macros s def;
863 );
864 macros
865 )
866 in
ae4735db 867 Common.profile_code "MACRO mgmt prep 2" (fun () ->
978fd7e5 868 let local_macros = extract_macros file in
ae4735db 869 local_macros +> List.iter (fun (s, def) ->
708f4980
C
870 Hashtbl.replace macros s def;
871 );
872 );
34e49164 873
708f4980 874 let tr = mk_tokens_state toks in
485bce71
C
875
876 let rec loop tr =
34e49164
C
877
878 (* todo?: I am not sure that it represents current_line, cos maybe
879 * tr.current partipated in the previous parsing phase, so maybe tr.current
880 * is not the first token of the next parsing phase. Same with checkpoint2.
881 * It would be better to record when we have a } or ; in parser.mly,
882 * cos we know that they are the last symbols of external_declaration2.
485bce71
C
883 *
884 * bugfix: may not be equal to 'file' as after macro expansions we can
885 * start to parse a new entity from the body of a macro, for instance
886 * when parsing a define_machine() body, cf standard.h
34e49164
C
887 *)
888 let checkpoint = TH.line_of_tok tr.current in
485bce71 889 let checkpoint_file = TH.file_of_tok tr.current in
34e49164 890
485bce71 891 (* call the parser *)
ae4735db
C
892 let elem =
893 let pass1 =
894 Common.profile_code "Parsing: 1st pass" (fun () ->
708f4980
C
895 get_one_elem ~pass:1 tr (file, filelines)
896 ) in
485bce71
C
897 match pass1 with
898 | Left e -> Left e
ae4735db 899 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
708f4980
C
900 if !Flag_parsing_c.disable_multi_pass
901 then pass1
485bce71 902 else begin
ae4735db 903 Common.profile_code "Parsing: multi pass" (fun () ->
708f4980
C
904
905 pr2_err "parsing pass2: try again";
906 let toks = List.rev passed ++ tr.rest in
907 let new_tr = mk_tokens_state toks in
908 copy_tokens_state ~src:new_tr ~dst:tr;
909 let passx = get_one_elem ~pass:2 tr (file, filelines) in
910
911 (match passx with
912 | Left e -> passx
ae4735db
C
913 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
914 let candidates =
915 candidate_macros_in_passed ~defs:macros passed
708f4980 916 in
ae4735db 917
978fd7e5 918
708f4980
C
919 if is_define_passed passed || null candidates
920 then passx
921 else begin
922 (* todo factorize code *)
923
924 pr2_err "parsing pass3: try again";
925 let toks = List.rev passed ++ tr.rest in
ae4735db 926 let toks' =
708f4980
C
927 find_optional_macro_to_expand ~defs:candidates toks in
928 let new_tr = mk_tokens_state toks' in
929 copy_tokens_state ~src:new_tr ~dst:tr;
930 let passx = get_one_elem ~pass:3 tr (file, filelines) in
931
932 (match passx with
933 | Left e -> passx
ae4735db 934 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
708f4980
C
935 pr2_err "parsing pass4: try again";
936
ae4735db
C
937 let candidates =
938 candidate_macros_in_passed
939 ~defs:macros passed
978fd7e5 940 in
708f4980
C
941
942 let toks = List.rev passed ++ tr.rest in
ae4735db 943 let toks' =
708f4980
C
944 find_optional_macro_to_expand ~defs:candidates toks in
945 let new_tr = mk_tokens_state toks' in
946 copy_tokens_state ~src:new_tr ~dst:tr;
947 let passx = get_one_elem ~pass:4 tr (file, filelines) in
948 passx
949 )
950 end
951 )
952 )
485bce71
C
953 end
954 in
955
956
957 (* again not sure if checkpoint2 corresponds to end of bad region *)
958 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
959 let checkpoint2_file = TH.file_of_tok tr.current in
960
ae4735db 961 let diffline =
b1b2de81 962 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
ae4735db 963 then (checkpoint2 - checkpoint)
485bce71
C
964 else 0
965 (* TODO? so if error come in middle of something ? where the
966 * start token was from original file but synchro found in body
967 * of macro ? then can have wrong number of lines stat.
968 * Maybe simpler just to look at tr.passed and count
969 * the lines in the token from the correct file ?
970 *)
34e49164 971 in
ae4735db 972 let info = mk_info_item file (List.rev tr.passed) in
34e49164 973
485bce71 974 (* some stat updates *)
ae4735db 975 stat.Stat.commentized <-
485bce71
C
976 stat.Stat.commentized + count_lines_commentized (snd info);
977
ae4735db 978 let elem =
485bce71 979 match elem with
ae4735db 980 | Left e ->
91eba41f
C
981 stat.Stat.correct <- stat.Stat.correct + diffline;
982 e
ae4735db
C
983 | Right (info_of_bads, line_error, toks_of_bads,
984 _passed_before_error, cur, exn) ->
708f4980
C
985
986 let was_define = is_define_passed tr.passed in
ae4735db 987
708f4980
C
988 if was_define && !Flag_parsing_c.filter_msg_define_error
989 then ()
990 else begin
991
992 (match exn with
ae4735db 993 | Lexer_c.Lexical _
708f4980
C
994 | Parsing.Parse_error
995 | Semantic_c.Semantic _ -> ()
996 | e -> raise e
997 );
998
999 if !Flag_parsing_c.show_parsing_error
ae4735db 1000 then begin
708f4980
C
1001 (match exn with
1002 (* Lexical is not anymore launched I think *)
ae4735db 1003 | Lexer_c.Lexical s ->
708f4980 1004 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
ae4735db 1005 | Parsing.Parse_error ->
708f4980 1006 pr2 ("parse error \n = " ^ error_msg_tok cur)
ae4735db 1007 | Semantic_c.Semantic (s, i) ->
708f4980
C
1008 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1009 | e -> raise Impossible
1010 );
1011 (* bugfix: *)
ae4735db 1012 if (checkpoint_file =$= checkpoint2_file) &&
708f4980
C
1013 checkpoint_file =$= file
1014 then print_bad line_error (checkpoint, checkpoint2) filelines
1015 else pr2 "PB: bad: but on tokens not from original file"
1016 end;
1017
ae4735db
C
1018
1019 let pbline =
1020 toks_of_bads
708f4980 1021 +> Common.filter (TH.is_same_line_or_close line_error)
ae4735db 1022 +> Common.filter TH.is_ident_like
708f4980 1023 in
ae4735db 1024 let error_info =
708f4980
C
1025 (pbline +> List.map TH.str_of_tok), line_error
1026 in
ae4735db 1027 stat.Stat.problematic_lines <-
708f4980
C
1028 error_info::stat.Stat.problematic_lines;
1029
1030 end;
1031
91eba41f
C
1032 if was_define && !Flag_parsing_c.filter_define_error
1033 then stat.Stat.correct <- stat.Stat.correct + diffline
1034 else stat.Stat.bad <- stat.Stat.bad + diffline;
1035
485bce71
C
1036 Ast_c.NotParsedCorrectly info_of_bads
1037 in
34e49164
C
1038
1039 (match elem with
1040 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
485bce71 1041 | xs -> (xs, info):: loop tr (* recurse *)
34e49164
C
1042 )
1043 in
485bce71 1044 let v = loop tr in
978fd7e5 1045 let v = with_program2 Parsing_consistency_c.consistency_checking v in
ca417fcf
C
1046 let v =
1047 let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1048 Common.clean_scope_h new_td;
1049 (v, !new_td, macros) in
34e49164
C
1050 (v, stat)
1051
1052
ca417fcf
C
1053let time_total_parsing a b =
1054 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
485bce71 1055
ca417fcf
C
1056let parse_print_error_heuristic a b =
1057 Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
485bce71 1058
34e49164
C
1059
1060(* alias *)
ca417fcf
C
1061let parse_c_and_cpp a =
1062 let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1063let parse_c_and_cpp_keep_typedefs td macs a =
1064 parse_print_error_heuristic td macs a
34e49164
C
1065
1066(*****************************************************************************)
1067(* Same but faster cos memoize stuff *)
1068(*****************************************************************************)
ae4735db 1069let parse_cache file =
ca417fcf
C
1070 if not !Flag_parsing_c.use_cache
1071 then parse_print_error_heuristic None None file
ae4735db 1072 else
17ba0788 1073 let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
ae4735db 1074 let need_no_changed_files =
34e49164 1075 (* should use Sys.argv.(0), would be safer. *)
485bce71
C
1076
1077 [
1078 (* TOFIX
1079 Config.path ^ "/parsing_c/c_parser.cma";
ae4735db 1080 (* we may also depend now on the semantic patch because
485bce71 1081 the SP may use macro and so we will disable some of the
ae4735db 1082 macro expansions from standard.h.
485bce71
C
1083 *)
1084 !Config.std_h;
1085 *)
f3c4ece6 1086 ] in
ae4735db 1087 let need_no_changed_variables =
34e49164 1088 (* could add some of the flags of flag_parsing_c.ml *)
f3c4ece6 1089 [] in
5427db06 1090 Common.cache_computation_robust_in_dir
f3c4ece6 1091 !Flag_parsing_c.cache_prefix file ".ast_raw"
ae4735db 1092 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
f3c4ece6
C
1093 (fun () ->
1094 (* check whether to clear the cache *)
1095 (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1096 (None,_) | (_,None) -> ()
1097 | (Some limit,Some prefix) ->
1098 let count =
1099 Common.cmd_to_list
17ba0788
C
1100 (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1101 prefix prefix) in
f3c4ece6
C
1102 match count with
1103 [c] ->
1104 if int_of_string c >= limit
1105 then
1106 let _ =
1107 Sys.command
17ba0788
C
1108 (Printf.sprintf
1109 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1110 prefix) in
f3c4ece6
C
1111 ()
1112 | _ -> ());
1113 (* recompute *)
1114 parse_print_error_heuristic None None file)
34e49164
C
1115
1116
1117
1118(*****************************************************************************)
485bce71 1119(* Some special cases *)
34e49164
C
1120(*****************************************************************************)
1121
485bce71 1122let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
708f4980
C
1123 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1124 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1125 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1126 program +> Common.find_some (fun (e,_) ->
485bce71
C
1127 match e with
1128 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1129 | _ -> None
1130 )
1131
1132let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
708f4980
C
1133 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1134 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1135 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1136 program +> Common.find_some (fun (e,_) ->
485bce71 1137 match e with
ae4735db 1138 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
485bce71 1139 (match compound with
ae4735db 1140 | [Ast_c.StmtElem st] ->
708f4980
C
1141 (match Ast_c.unwrap_st st with
1142 | Ast_c.ExprStatement (Some e) -> Some e
1143 | _ -> None
1144 )
485bce71
C
1145 | _ -> None
1146 )
1147 | _ -> None
1148 )