Coccinelle release-1.0.0-rc11
[bpt/coccinelle.git] / parsing_c / parse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
34e49164
C
19module LP = Lexer_parser
20
485bce71
C
21module Stat = Parsing_stat
22
34e49164
C
23(*****************************************************************************)
24(* Wrappers *)
25(*****************************************************************************)
ae4735db
C
26let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
34e49164
C
28(*****************************************************************************)
29(* Helpers *)
30(*****************************************************************************)
31
ae4735db
C
32let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34e49164 34
ae4735db 35let token_to_strpos tok =
34e49164
C
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
ae4735db 39let mk_info_item2 filename toks =
34e49164 40 let buf = Buffer.create 100 in
ae4735db 41 let s =
34e49164
C
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
ae4735db 44 toks +> List.iter (fun tok ->
34e49164 45 match TH.pinfo_of_tok tok with
ae4735db 46 | Ast_c.OriginTok _ ->
91eba41f 47 Buffer.add_string buf (TH.str_of_tok tok)
ae4735db 48 | Ast_c.AbstractLineTok _ ->
91eba41f 49 raise Impossible
34e49164
C
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
ae4735db 55 (s, toks)
34e49164 56
ae4735db
C
57let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
34e49164
C
59 (fun () -> mk_info_item2 a b)
60
61
ae4735db 62let info_same_line line xs =
b1b2de81 63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
34e49164 64
34e49164 65
978fd7e5
C
66(* move in cpp_token_c ? *)
67let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
ae4735db
C
69 if List.length xs >= 2
70 then
978fd7e5 71 (match Common.head_middle_tail xs with
ae4735db 72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
978fd7e5
C
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
ae4735db 78 false
978fd7e5
C
79 end
80
81
82(*****************************************************************************)
83(* Error diagnostic *)
84(*****************************************************************************)
85
ae4735db 86let error_msg_tok tok =
978fd7e5
C
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
ae4735db 89 then Common.error_message file (token_to_strpos tok)
978fd7e5
C
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
ae4735db 93let print_bad line_error (start_line, end_line) filelines =
978fd7e5
C
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
ae4735db
C
97 for i = start_line to end_line do
98 let line = filelines.(i) in
978fd7e5 99
ae4735db
C
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
978fd7e5
C
103 done
104 end
105
106
34e49164
C
107(*****************************************************************************)
108(* Stats on what was passed/commentized *)
109(*****************************************************************************)
110
c491d8ee 111let commentized xs = xs +> Common.tail_map_filter (function
ae4735db 112 | Parser_c.TCommentCpp (cppkind, ii) ->
485bce71 113 let s = Ast_c.str_of_info ii in
ae4735db
C
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
485bce71 116 | 0 -> false
ae4735db 117 | 1 ->
0708f913 118 List.mem cppkind [Token_c.CppAttr]
ae4735db 119 ||
485bce71 120 (s =~ "__.*")
ae4735db 121 | 2 ->
0708f913 122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
ae4735db 123 ||
485bce71 124 (s =~ "__.*")
ae4735db 125 | 3 ->
0708f913 126 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
ae4735db 127 ||
485bce71 128 (s =~ "__.*")
ae4735db 129 | 4 ->
0708f913 130 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
ae4735db 131 ||
485bce71
C
132 (s =~ "__.*")
133
134
ae4735db 135 | 5 ->
0708f913 136 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
ae4735db 137 ||
485bce71
C
138 (s =~ "__.*")
139
140
141
142
143 | _ -> failwith "not valid level passing number"
144 in
145 if legal_passing then None else Some (ii.Ast_c.pinfo)
146
147 (*
ae4735db 148 | Ast_c.CppOther ->
34e49164
C
149 (match s with
150 | s when s =~ "KERN_.*" -> None
151 | s when s =~ "__.*" -> None
ae4735db 152 | _ ->
485bce71 153 Some (ii.Ast_c.pinfo)
34e49164 154 )
485bce71
C
155 *)
156
ae4735db 157
34e49164 158 | Parser_c.TCommentMisc ii
ae4735db 159 | Parser_c.TAction ii
34e49164
C
160 ->
161 Some (ii.Ast_c.pinfo)
ae4735db 162 | _ ->
34e49164
C
163 None
164 )
ae4735db
C
165
166let count_lines_commentized xs =
34e49164
C
167 let line = ref (-1) in
168 let count = ref 0 in
169 begin
170 commentized xs +>
171 List.iter
172 (function
ae4735db 173 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
174 let newline = pinfo.Common.line in
175 if newline <> !line
176 then begin
177 line := newline;
178 incr count
179 end
180 | _ -> ());
181 !count
182 end
183
184
185
ae4735db 186let print_commentized xs =
34e49164
C
187 let line = ref (-1) in
188 begin
189 let ys = commentized xs in
190 ys +>
191 List.iter
192 (function
ae4735db 193 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
194 let newline = pinfo.Common.line in
195 let s = pinfo.Common.str in
ae4735db
C
196 let s = Str.global_substitute
197 (Str.regexp "\n") (fun s -> "") s
34e49164 198 in
b1b2de81 199 if newline =|= !line
34e49164
C
200 then prerr_string (s ^ " ")
201 else begin
ae4735db
C
202 if !line =|= -1
203 then pr2_no_nl "passed:"
34e49164
C
204 else pr2_no_nl "\npassed:";
205 line := newline;
206 pr2_no_nl (s ^ " ");
207 end
208 | _ -> ());
209 if not (null ys) then pr2 "";
210 end
ae4735db 211
34e49164
C
212
213
214
215(*****************************************************************************)
216(* Lexing only *)
217(*****************************************************************************)
218
219(* called by parse_print_error_heuristic *)
ae4735db 220let tokens2 file =
708f4980 221 let table = Common.full_charpos_to_pos_large file in
34e49164 222
ae4735db 223 Common.with_open_infile file (fun chan ->
34e49164 224 let lexbuf = Lexing.from_channel chan in
ae4735db
C
225 try
226 let rec tokens_aux acc =
34e49164
C
227 let tok = Lexer_c.token lexbuf in
228 (* fill in the line and col information *)
ae4735db 229 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
34e49164
C
230 { ii with Ast_c.pinfo=
231 (* could assert pinfo.filename = file ? *)
232 match Ast_c.pinfo_of_info ii with
233 Ast_c.OriginTok pi ->
708f4980 234 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
34e49164 235 | Ast_c.ExpandedTok (pi,vpi) ->
708f4980 236 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
34e49164
C
237 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
238 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
239 })
240 in
241
242 if TH.is_eof tok
243 then List.rev (tok::acc)
244 else tokens_aux (tok::acc)
245 in
246 tokens_aux []
247 with
ae4735db
C
248 | Lexer_c.Lexical s ->
249 failwith ("lexical error " ^ s ^ "\n =" ^
34e49164
C
250 (Common.error_message file (lexbuf_to_strpos lexbuf)))
251 | e -> raise e
252 )
253
ae4735db
C
254let time_lexing ?(profile=true) a =
255 if profile
485bce71 256 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
951c7801 257 else tokens2 a
ae4735db 258let tokens ?profile a =
485bce71 259 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
34e49164
C
260
261
ae4735db 262let tokens_of_string string =
34e49164 263 let lexbuf = Lexing.from_string string in
ae4735db
C
264 try
265 let rec tokens_s_aux () =
34e49164
C
266 let tok = Lexer_c.token lexbuf in
267 if TH.is_eof tok
268 then [tok]
269 else tok::(tokens_s_aux ())
270 in
271 tokens_s_aux ()
272 with
273 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
274 | e -> raise e
275
276
277(*****************************************************************************)
278(* Parsing, but very basic, no more used *)
279(*****************************************************************************)
280
281(*
282 * !!!Those function use refs, and are not reentrant !!! so take care.
283 * It use globals defined in Lexer_parser.
ae4735db 284 *
34e49164
C
285 * update: because now lexer return comments tokens, those functions
286 * may not work anymore.
287 *)
288
ae4735db 289let parse file =
34e49164
C
290 let lexbuf = Lexing.from_channel (open_in file) in
291 let result = Parser_c.main Lexer_c.token lexbuf in
292 result
293
294
ae4735db 295let parse_print_error file =
34e49164
C
296 let chan = (open_in file) in
297 let lexbuf = Lexing.from_channel chan in
298
299 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
ae4735db 300 try
34e49164 301 lexbuf +> Parser_c.main Lexer_c.token
ae4735db
C
302 with
303 | Lexer_c.Lexical s ->
34e49164 304 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
ae4735db 305 | Parsing.Parse_error ->
34e49164 306 failwith ("parse error \n = " ^ error_msg ())
ae4735db 307 | Semantic_c.Semantic (s, i) ->
34e49164
C
308 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
309 | e -> raise e
310
311
312
313
314(*****************************************************************************)
315(* Parsing subelements, useful to debug parser *)
316(*****************************************************************************)
317
318(*
319 * !!!Those function use refs, and are not reentrant !!! so take care.
320 * It use globals defined in Lexer_parser.
321 *)
322
323
ae4735db
C
324(* old:
325 * let parse_gen parsefunc s =
34e49164
C
326 * let lexbuf = Lexing.from_string s in
327 * let result = parsefunc Lexer_c.token lexbuf in
328 * result
329 *)
330
ae4735db 331let parse_gen parsefunc s =
485bce71 332 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
34e49164
C
333
334
335 (* Why use this lexing scheme ? Why not classically give lexer func
ae4735db 336 * to parser ? Because I now keep comments in lexer. Could
34e49164
C
337 * just do a simple wrapper that when comment ask again for a token,
338 * but maybe simpler to use cur_tok technique.
339 *)
340 let all_tokens = ref toks in
341 let cur_tok = ref (List.hd !all_tokens) in
342
ae4735db
C
343 let lexer_function =
344 (fun _ ->
34e49164 345 if TH.is_eof !cur_tok
708f4980 346 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
34e49164
C
347 else
348 let v = Common.pop2 all_tokens in
349 cur_tok := v;
350 !cur_tok
ae4735db 351 )
34e49164
C
352 in
353 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
354 let result = parsefunc lexer_function lexbuf_fake in
355 result
356
357
358let type_of_string = parse_gen Parser_c.type_name
359let statement_of_string = parse_gen Parser_c.statement
360let expression_of_string = parse_gen Parser_c.expr
361
362(* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
363
364
365
366
367
34e49164 368(*****************************************************************************)
485bce71 369(* Parsing default define macros, usually in a standard.h file *)
34e49164
C
370(*****************************************************************************)
371
ae4735db
C
372let extract_macros2 file =
373 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
708f4980
C
374 Flag_parsing_c.verbose_lexing := false;
375 let toks = tokens ~profile:false file in
978fd7e5
C
376 let toks = Parsing_hacks.fix_tokens_define toks in
377 Cpp_token_c.extract_macros toks
708f4980 378 )
34e49164 379
ae4735db 380let extract_macros a =
978fd7e5 381 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
485bce71 382
34e49164
C
383
384(*****************************************************************************)
978fd7e5 385(* Helper for main entry point *)
34e49164
C
386(*****************************************************************************)
387
34e49164
C
388
389(* The use of local refs (remaining_tokens, passed_tokens, ...) makes
390 * possible error recovery. Indeed, they allow to skip some tokens and
391 * still be able to call again the ocamlyacc parser. It is ugly code
392 * because we cant modify ocamllex and ocamlyacc. As we want some
393 * extended lexing tricks, we have to use such refs.
ae4735db 394 *
34e49164
C
395 * Those refs are now also used for my lalr(k) technique. Indeed They
396 * store the futur and previous tokens that were parsed, and so
397 * provide enough context information for powerful lex trick.
ae4735db 398 *
34e49164 399 * - passed_tokens_last_ckp stores the passed tokens since last
485bce71 400 * checkpoint. Used for NotParsedCorrectly and also to build the
34e49164
C
401 * info_item attached to each program_element.
402 * - passed_tokens_clean is used for lookahead, in fact for lookback.
403 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
404 * contain some comments and so would make pattern matching difficult
ae4735db 405 * in lookahead. Hence this variable. We would like also to get rid
34e49164
C
406 * of cpp instruction because sometimes a cpp instruction is between
407 * two tokens and makes a pattern matching fail. But lookahead also
408 * transform some cpp instruction (in comment) so can't remove them.
ae4735db 409 *
34e49164
C
410 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
411 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
412 * comment-tokens.
ae4735db 413 *
34e49164 414 * Normally we have:
ae4735db 415 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
34e49164 416 * after the call to pop2.
ae4735db 417 * toks = (reverse passed_tok) ++ remaining_tokens
34e49164
C
418 * at the and of the lexer_function call.
419 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
420 * At the end of lexer_function call, cur_tok overlap with passed_tok.
ae4735db 421 *
34e49164 422 * convention: I use "tr" for "tokens refs"
ae4735db 423 *
485bce71
C
424 * I now also need this lexing trick because the lexer return comment
425 * tokens.
34e49164
C
426 *)
427
428type tokens_state = {
429 mutable rest : Parser_c.token list;
430 mutable rest_clean : Parser_c.token list;
431 mutable current : Parser_c.token;
432 (* it's passed since last "checkpoint", not passed from the beginning *)
433 mutable passed : Parser_c.token list;
434 mutable passed_clean : Parser_c.token list;
435}
708f4980 436
ae4735db
C
437let mk_tokens_state toks =
438 {
708f4980
C
439 rest = toks;
440 rest_clean = (toks +> List.filter TH.is_not_comment);
441 current = (List.hd toks);
ae4735db 442 passed = [];
708f4980
C
443 passed_clean = [];
444 }
445
446
447
ae4735db 448let clone_tokens_state tr =
485bce71
C
449 { rest = tr.rest;
450 rest_clean = tr.rest_clean;
451 current = tr.current;
452 passed = tr.passed;
453 passed_clean = tr.passed_clean;
454 }
ae4735db 455let copy_tokens_state ~src ~dst =
485bce71
C
456 dst.rest <- src.rest;
457 dst.rest_clean <- src.rest_clean;
458 dst.current <- src.current;
459 dst.passed <- src.passed;
460 dst.passed_clean <- src.passed_clean;
461 ()
462
708f4980 463(* todo? agglomerate the x##b ? *)
485bce71
C
464let rec filter_noise n xs =
465 match n, xs with
466 | _, [] -> []
467 | 0, xs -> xs
ae4735db 468 | n, x::xs ->
485bce71 469 (match x with
ae4735db 470 | Parser_c.TMacroAttr _ ->
485bce71 471 filter_noise (n-1) xs
ae4735db 472 | _ ->
485bce71
C
473 x::filter_noise (n-1) xs
474 )
475
ae4735db 476let clean_for_lookahead xs =
485bce71
C
477 match xs with
478 | [] -> []
479 | [x] -> [x]
ae4735db 480 | x::xs ->
485bce71
C
481 x::filter_noise 10 xs
482
34e49164 483
485bce71 484
ae4735db 485(* Hacked lex. This function use refs passed by parse_print_error_heuristic
485bce71
C
486 * tr means token refs.
487 *)
ae4735db 488let rec lexer_function ~pass tr = fun lexbuf ->
34e49164 489 match tr.rest with
708f4980 490 | [] -> pr2_err "ALREADY AT END"; tr.current
ae4735db 491 | v::xs ->
34e49164
C
492 tr.rest <- xs;
493 tr.current <- v;
494
495 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
496
497 if TH.is_comment v
498 then begin
499 tr.passed <- v::tr.passed;
485bce71 500 lexer_function ~pass tr lexbuf
34e49164
C
501 end
502 else begin
503 let x = List.hd tr.rest_clean in
504 tr.rest_clean <- List.tl tr.rest_clean;
b1b2de81 505 assert (x =*= v);
ae4735db 506
34e49164 507 (match v with
113803cf 508
ae4735db 509 (* fix_define1.
113803cf
C
510 *
511 * Why not in parsing_hacks lookahead and do passing like
ae4735db
C
512 * I do for some ifdef directives ? Because here I also need to
513 * generate some tokens sometimes and so I need access to the
113803cf 514 * tr.passed, tr.rest, etc.
485bce71 515 *)
9f8e26f4 516 | Parser_c.TDefine (tok) ->
ae4735db 517 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 518 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 519 then begin
485bce71 520 incr Stat.nDefinePassing;
34e49164 521 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
3a314143
C
522 let v' =
523 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
524 in
525 tr.passed <- v'::tr.passed;
526 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
527 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
528 lexer_function ~pass tr lexbuf
529 end
530 else begin
531 tr.passed <- v::tr.passed;
532 tr.passed_clean <- v::tr.passed_clean;
533 v
534 end
535
536 | Parser_c.TUndef (tok) ->
537 if not (LP.current_context () =*= LP.InTopLevel) &&
538 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
539 then begin
540 incr Stat.nUndefPassing;
541 pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
542 let v' =
543 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
34e49164
C
544 in
545 tr.passed <- v'::tr.passed;
978fd7e5
C
546 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
547 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
485bce71 548 lexer_function ~pass tr lexbuf
34e49164
C
549 end
550 else begin
551 tr.passed <- v::tr.passed;
552 tr.passed_clean <- v::tr.passed_clean;
553 v
554 end
ae4735db
C
555
556 | Parser_c.TInclude (includes, filename, inifdef, info) ->
b1b2de81 557 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 558 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 559 then begin
485bce71 560 incr Stat.nIncludePassing;
34e49164 561 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
0708f913 562 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
34e49164 563 tr.passed <- v::tr.passed;
485bce71 564 lexer_function ~pass tr lexbuf
34e49164
C
565 end
566 else begin
ae4735db 567 let (v,new_tokens) =
3a314143 568 Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
ae4735db 569 let new_tokens_clean =
34e49164
C
570 new_tokens +> List.filter TH.is_not_comment in
571
572 tr.passed <- v::tr.passed;
573 tr.passed_clean <- v::tr.passed_clean;
574 tr.rest <- new_tokens ++ tr.rest;
575 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
576 v
577 end
ae4735db
C
578
579 | _ ->
580
34e49164
C
581 (* typedef_fix1 *)
582 let v = match v with
ae4735db
C
583 | Parser_c.TIdent (s, ii) ->
584 if
585 LP.is_typedef s &&
485bce71 586 not (!Flag_parsing_c.disable_add_typedef) &&
b1b2de81 587 pass =|= 1
34e49164
C
588 then Parser_c.TypedefIdent (s, ii)
589 else Parser_c.TIdent (s, ii)
590 | x -> x
591 in
ae4735db 592
485bce71
C
593 let v = Parsing_hacks.lookahead ~pass
594 (clean_for_lookahead (v::tr.rest_clean))
595 tr.passed_clean in
34e49164
C
596
597 tr.passed <- v::tr.passed;
ae4735db 598
485bce71 599 (* the lookahead may have changed the status of the token and
34e49164 600 * consider it as a comment, for instance some #include are
485bce71 601 * turned into comments, hence this code. *)
34e49164 602 match v with
485bce71 603 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
ae4735db 604 | v ->
34e49164
C
605 tr.passed_clean <- v::tr.passed_clean;
606 v
607 )
608 end
609
610
708f4980
C
611let max_pass = 4
612
34e49164 613
ae4735db 614let get_one_elem ~pass tr (file, filelines) =
485bce71
C
615
616 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
708f4980 617 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
485bce71
C
618
619 (* normally have to do that only when come from an exception in which
ae4735db 620 * case the dt() may not have been done
485bce71
C
621 * TODO but if was in scoped scope ? have to let only the last scope
622 * so need do a LP.lexer_reset_typedef ();
623 *)
ae4735db 624 LP.enable_typedef();
485bce71
C
625 LP._lexer_hint := (LP.default_hint ());
626 LP.save_typedef_state();
627
628 tr.passed <- [];
629
630 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
ae4735db
C
631
632 (try
485bce71
C
633 (* -------------------------------------------------- *)
634 (* Call parser *)
635 (* -------------------------------------------------- *)
ae4735db 636 Common.profile_code_exclusif "YACC" (fun () ->
c491d8ee 637 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
485bce71 638 )
ae4735db 639 with e ->
485bce71
C
640 LP.restore_typedef_state();
641
642 (* must keep here, before the code that adjusts the tr fields *)
643 let line_error = TH.line_of_tok tr.current in
708f4980
C
644
645 let passed_before_error = tr.passed in
646 let current = tr.current in
485bce71 647 (* error recovery, go to next synchro point *)
ae4735db 648 let (passed', rest') =
978fd7e5 649 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
485bce71
C
650 tr.rest <- rest';
651 tr.passed <- passed';
ae4735db 652
485bce71
C
653 tr.current <- List.hd passed';
654 tr.passed_clean <- []; (* enough ? *)
655 (* with error recovery, rest and rest_clean may not be in sync *)
656 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
ae4735db
C
657
658
659 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
660 Right (info_of_bads, line_error,
661 tr.passed, passed_before_error,
708f4980 662 current, e)
485bce71
C
663 )
664
665
666
978fd7e5
C
667(* Macro problem recovery *)
668(* used by the multi-pass error recovery expand-on-demand *)
669(*
ae4735db
C
670val candidate_macros_in_passed:
671 defs: (string, define_def) Hashtbl.t ->
978fd7e5
C
672 Parser_c.token list -> (string * define_def) list
673*)
674
ae4735db 675let candidate_macros_in_passed2 ~defs passed =
978fd7e5
C
676 let res = ref [] in
677 let res2 = ref [] in
678
679 passed +> List.iter (function
680 | Parser_c.TIdent (s,_)
681 (* bugfix: may have to undo some infered things *)
682 | Parser_c.TMacroIterator (s,_)
683 | Parser_c.TypedefIdent (s,_)
ae4735db 684 ->
978fd7e5 685 (match Common.hfind_option s defs with
ae4735db
C
686 | Some def ->
687 if s ==~ Parsing_hacks.regexp_macro
978fd7e5
C
688 then
689 (* pr2 (spf "candidate: %s" s); *)
ae4735db
C
690 Common.push2 (s, def) res
691 else
978fd7e5
C
692 Common.push2 (s, def) res2
693 | None -> ()
694 )
695
696 | _ -> ()
697 );
ae4735db
C
698 if null !res
699 then !res2
978fd7e5
C
700 else !res
701
ae4735db
C
702let candidate_macros_in_passed ~defs b =
703 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 704 candidate_macros_in_passed2 ~defs b)
ae4735db 705
978fd7e5
C
706
707
708
709
710let find_optional_macro_to_expand2 ~defs toks =
711
712 let defs = Common.hash_of_list defs in
713
c491d8ee 714 let toks = toks +> Common.tail_map (function
978fd7e5
C
715
716 (* special cases to undo *)
ae4735db 717 | Parser_c.TMacroIterator (s, ii) ->
978fd7e5
C
718 if Hashtbl.mem defs s
719 then Parser_c.TIdent (s, ii)
720 else Parser_c.TMacroIterator (s, ii)
721
ae4735db 722 | Parser_c.TypedefIdent (s, ii) ->
978fd7e5
C
723 if Hashtbl.mem defs s
724 then Parser_c.TIdent (s, ii)
725 else Parser_c.TypedefIdent (s, ii)
726
727 | x -> x
728 ) in
729
730 let tokens = toks in
731 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
732
733 (* just calling apply_macro_defs and having a specialized version
ae4735db 734 * of the code in fix_tokens_cpp is not enough as some work such
978fd7e5
C
735 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
736 * will not get the chance to be run on the new expanded tokens.
ae4735db 737 * Hence even if it's expensive, it's currently better to
978fd7e5
C
738 * just call directly fix_tokens_cpp again here.
739
740 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
741 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
742 let paren_grouped = TV.mk_parenthised cleaner in
743 Cpp_token_c.apply_macro_defs
744 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
745 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
746 defs paren_grouped;
747 (* because the before field is used by apply_macro_defs *)
ae4735db
C
748 tokens2 := TV.rebuild_tokens_extented !tokens2;
749 Parsing_hacks.insert_virtual_positions
978fd7e5
C
750 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
751 *)
ae4735db
C
752let find_optional_macro_to_expand ~defs a =
753 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 754 find_optional_macro_to_expand2 ~defs a)
ae4735db 755
978fd7e5
C
756
757
758
759
760(*****************************************************************************)
761(* Main entry points *)
762(*****************************************************************************)
763
ae4735db 764let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
765 ref (Hashtbl.create 101)
766
ae4735db 767let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
768 ref (Hashtbl.create 101)
769
770
771(* can not be put in parsing_hack, cos then mutually recursive problem as
772 * we also want to parse the standard.h file.
773 *)
ae4735db 774let init_defs_macros std_h =
978fd7e5
C
775 if not (Common.lfile_exists std_h)
776 then pr2 ("warning: Can't find default macro file: " ^ std_h)
777 else begin
778 pr2 ("init_defs: " ^ std_h);
779 _defs := Common.hash_of_list (extract_macros std_h);
780 end
781
ae4735db 782let init_defs_builtins file_h =
978fd7e5
C
783 if not (Common.lfile_exists file_h)
784 then pr2 ("warning: Can't find macro file: " ^ file_h)
785 else begin
786 pr2 ("init_defs_builtins: " ^ file_h);
ae4735db 787 _defs_builtins :=
978fd7e5
C
788 Common.hash_of_list (extract_macros file_h);
789 end
790
791
792
793type info_item = string * Parser_c.token list
794
795type program2 = toplevel2 list
ca417fcf
C
796 and extended_program2 = toplevel2 list *
797 (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
798 (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
799 and toplevel2 = Ast_c.toplevel * info_item
978fd7e5 800
ae4735db 801let program_of_program2 xs =
978fd7e5
C
802 xs +> List.map fst
803
ae4735db
C
804let with_program2 f program2 =
805 program2
806 +> Common.unzip
807 +> (fun (program, infos) ->
978fd7e5
C
808 f program, infos
809 )
810 +> Common.uncurry Common.zip
811
812
813
814
815
485bce71 816
34e49164 817(* note: as now we go in 2 passes, there is first all the error message of
485bce71 818 * the lexer, and then the error of the parser. It is not anymore
34e49164 819 * interwinded.
ae4735db 820 *
34e49164
C
821 * !!!This function use refs, and is not reentrant !!! so take care.
822 * It use globals defined in Lexer_parser and also the _defs global
ae4735db
C
823 * in parsing_hack.ml.
824 *
485bce71
C
825 * This function uses internally some semi globals in the
826 * tokens_stat record and parsing_stat record.
34e49164
C
827 *)
828
ca417fcf 829let parse_print_error_heuristic2 saved_typedefs saved_macros file =
34e49164 830
91eba41f 831 let filelines = Common.cat_array file in
485bce71
C
832 let stat = Parsing_stat.default_stat file in
833
34e49164
C
834 (* -------------------------------------------------- *)
835 (* call lexer and get all the tokens *)
836 (* -------------------------------------------------- *)
993936c0 837
ca417fcf 838 LP.lexer_reset_typedef saved_typedefs;
485bce71 839 Parsing_hacks.ifdef_paren_cnt := 0;
708f4980 840
485bce71 841 let toks_orig = tokens file in
978fd7e5 842 let toks = Parsing_hacks.fix_tokens_define toks_orig in
708f4980 843 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
34e49164 844
708f4980 845 (* expand macros on demand trick, preparation phase *)
ae4735db
C
846 let macros =
847 Common.profile_code "MACRO mgmt prep 1" (fun () ->
ca417fcf
C
848 let macros =
849 match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
708f4980
C
850 (* include also builtins as some macros may generate some builtins too
851 * like __decl_spec or __stdcall
852 *)
ae4735db 853 !_defs_builtins +> Hashtbl.iter (fun s def ->
708f4980
C
854 Hashtbl.replace macros s def;
855 );
856 macros
857 )
858 in
ae4735db 859 Common.profile_code "MACRO mgmt prep 2" (fun () ->
978fd7e5 860 let local_macros = extract_macros file in
ae4735db 861 local_macros +> List.iter (fun (s, def) ->
708f4980
C
862 Hashtbl.replace macros s def;
863 );
864 );
34e49164 865
708f4980 866 let tr = mk_tokens_state toks in
485bce71
C
867
868 let rec loop tr =
34e49164
C
869
870 (* todo?: I am not sure that it represents current_line, cos maybe
871 * tr.current partipated in the previous parsing phase, so maybe tr.current
872 * is not the first token of the next parsing phase. Same with checkpoint2.
873 * It would be better to record when we have a } or ; in parser.mly,
874 * cos we know that they are the last symbols of external_declaration2.
485bce71
C
875 *
876 * bugfix: may not be equal to 'file' as after macro expansions we can
877 * start to parse a new entity from the body of a macro, for instance
878 * when parsing a define_machine() body, cf standard.h
34e49164
C
879 *)
880 let checkpoint = TH.line_of_tok tr.current in
485bce71 881 let checkpoint_file = TH.file_of_tok tr.current in
34e49164 882
485bce71 883 (* call the parser *)
ae4735db
C
884 let elem =
885 let pass1 =
886 Common.profile_code "Parsing: 1st pass" (fun () ->
708f4980
C
887 get_one_elem ~pass:1 tr (file, filelines)
888 ) in
485bce71
C
889 match pass1 with
890 | Left e -> Left e
ae4735db 891 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
708f4980
C
892 if !Flag_parsing_c.disable_multi_pass
893 then pass1
485bce71 894 else begin
ae4735db 895 Common.profile_code "Parsing: multi pass" (fun () ->
708f4980
C
896
897 pr2_err "parsing pass2: try again";
898 let toks = List.rev passed ++ tr.rest in
899 let new_tr = mk_tokens_state toks in
900 copy_tokens_state ~src:new_tr ~dst:tr;
901 let passx = get_one_elem ~pass:2 tr (file, filelines) in
902
903 (match passx with
904 | Left e -> passx
ae4735db
C
905 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
906 let candidates =
907 candidate_macros_in_passed ~defs:macros passed
708f4980 908 in
ae4735db 909
978fd7e5 910
708f4980
C
911 if is_define_passed passed || null candidates
912 then passx
913 else begin
914 (* todo factorize code *)
915
916 pr2_err "parsing pass3: try again";
917 let toks = List.rev passed ++ tr.rest in
ae4735db 918 let toks' =
708f4980
C
919 find_optional_macro_to_expand ~defs:candidates toks in
920 let new_tr = mk_tokens_state toks' in
921 copy_tokens_state ~src:new_tr ~dst:tr;
922 let passx = get_one_elem ~pass:3 tr (file, filelines) in
923
924 (match passx with
925 | Left e -> passx
ae4735db 926 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
708f4980
C
927 pr2_err "parsing pass4: try again";
928
ae4735db
C
929 let candidates =
930 candidate_macros_in_passed
931 ~defs:macros passed
978fd7e5 932 in
708f4980
C
933
934 let toks = List.rev passed ++ tr.rest in
ae4735db 935 let toks' =
708f4980
C
936 find_optional_macro_to_expand ~defs:candidates toks in
937 let new_tr = mk_tokens_state toks' in
938 copy_tokens_state ~src:new_tr ~dst:tr;
939 let passx = get_one_elem ~pass:4 tr (file, filelines) in
940 passx
941 )
942 end
943 )
944 )
485bce71
C
945 end
946 in
947
948
949 (* again not sure if checkpoint2 corresponds to end of bad region *)
950 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
951 let checkpoint2_file = TH.file_of_tok tr.current in
952
ae4735db 953 let diffline =
b1b2de81 954 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
ae4735db 955 then (checkpoint2 - checkpoint)
485bce71
C
956 else 0
957 (* TODO? so if error come in middle of something ? where the
958 * start token was from original file but synchro found in body
959 * of macro ? then can have wrong number of lines stat.
960 * Maybe simpler just to look at tr.passed and count
961 * the lines in the token from the correct file ?
962 *)
34e49164 963 in
ae4735db 964 let info = mk_info_item file (List.rev tr.passed) in
34e49164 965
485bce71 966 (* some stat updates *)
ae4735db 967 stat.Stat.commentized <-
485bce71
C
968 stat.Stat.commentized + count_lines_commentized (snd info);
969
ae4735db 970 let elem =
485bce71 971 match elem with
ae4735db 972 | Left e ->
91eba41f
C
973 stat.Stat.correct <- stat.Stat.correct + diffline;
974 e
ae4735db
C
975 | Right (info_of_bads, line_error, toks_of_bads,
976 _passed_before_error, cur, exn) ->
708f4980
C
977
978 let was_define = is_define_passed tr.passed in
ae4735db 979
708f4980
C
980 if was_define && !Flag_parsing_c.filter_msg_define_error
981 then ()
982 else begin
983
984 (match exn with
ae4735db 985 | Lexer_c.Lexical _
708f4980
C
986 | Parsing.Parse_error
987 | Semantic_c.Semantic _ -> ()
988 | e -> raise e
989 );
990
991 if !Flag_parsing_c.show_parsing_error
ae4735db 992 then begin
708f4980
C
993 (match exn with
994 (* Lexical is not anymore launched I think *)
ae4735db 995 | Lexer_c.Lexical s ->
708f4980 996 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
ae4735db 997 | Parsing.Parse_error ->
708f4980 998 pr2 ("parse error \n = " ^ error_msg_tok cur)
ae4735db 999 | Semantic_c.Semantic (s, i) ->
708f4980
C
1000 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1001 | e -> raise Impossible
1002 );
1003 (* bugfix: *)
ae4735db 1004 if (checkpoint_file =$= checkpoint2_file) &&
708f4980
C
1005 checkpoint_file =$= file
1006 then print_bad line_error (checkpoint, checkpoint2) filelines
1007 else pr2 "PB: bad: but on tokens not from original file"
1008 end;
1009
ae4735db
C
1010
1011 let pbline =
1012 toks_of_bads
708f4980 1013 +> Common.filter (TH.is_same_line_or_close line_error)
ae4735db 1014 +> Common.filter TH.is_ident_like
708f4980 1015 in
ae4735db 1016 let error_info =
708f4980
C
1017 (pbline +> List.map TH.str_of_tok), line_error
1018 in
ae4735db 1019 stat.Stat.problematic_lines <-
708f4980
C
1020 error_info::stat.Stat.problematic_lines;
1021
1022 end;
1023
91eba41f
C
1024 if was_define && !Flag_parsing_c.filter_define_error
1025 then stat.Stat.correct <- stat.Stat.correct + diffline
1026 else stat.Stat.bad <- stat.Stat.bad + diffline;
1027
485bce71
C
1028 Ast_c.NotParsedCorrectly info_of_bads
1029 in
34e49164
C
1030
1031 (match elem with
1032 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
485bce71 1033 | xs -> (xs, info):: loop tr (* recurse *)
34e49164
C
1034 )
1035 in
485bce71 1036 let v = loop tr in
978fd7e5 1037 let v = with_program2 Parsing_consistency_c.consistency_checking v in
ca417fcf
C
1038 let v =
1039 let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1040 Common.clean_scope_h new_td;
1041 (v, !new_td, macros) in
34e49164
C
1042 (v, stat)
1043
1044
ca417fcf
C
1045let time_total_parsing a b =
1046 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
485bce71 1047
ca417fcf
C
1048let parse_print_error_heuristic a b =
1049 Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
485bce71 1050
34e49164
C
1051
1052(* alias *)
ca417fcf
C
1053let parse_c_and_cpp a =
1054 let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1055let parse_c_and_cpp_keep_typedefs td macs a =
1056 parse_print_error_heuristic td macs a
34e49164
C
1057
1058(*****************************************************************************)
1059(* Same but faster cos memoize stuff *)
1060(*****************************************************************************)
ae4735db 1061let parse_cache file =
ca417fcf
C
1062 if not !Flag_parsing_c.use_cache
1063 then parse_print_error_heuristic None None file
ae4735db 1064 else
17ba0788 1065 let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
ae4735db 1066 let need_no_changed_files =
34e49164 1067 (* should use Sys.argv.(0), would be safer. *)
485bce71
C
1068
1069 [
1070 (* TOFIX
1071 Config.path ^ "/parsing_c/c_parser.cma";
ae4735db 1072 (* we may also depend now on the semantic patch because
485bce71 1073 the SP may use macro and so we will disable some of the
ae4735db 1074 macro expansions from standard.h.
485bce71
C
1075 *)
1076 !Config.std_h;
1077 *)
f3c4ece6 1078 ] in
ae4735db 1079 let need_no_changed_variables =
34e49164 1080 (* could add some of the flags of flag_parsing_c.ml *)
f3c4ece6 1081 [] in
5427db06 1082 Common.cache_computation_robust_in_dir
f3c4ece6 1083 !Flag_parsing_c.cache_prefix file ".ast_raw"
ae4735db 1084 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
f3c4ece6
C
1085 (fun () ->
1086 (* check whether to clear the cache *)
1087 (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1088 (None,_) | (_,None) -> ()
1089 | (Some limit,Some prefix) ->
1090 let count =
1091 Common.cmd_to_list
17ba0788
C
1092 (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1093 prefix prefix) in
f3c4ece6
C
1094 match count with
1095 [c] ->
1096 if int_of_string c >= limit
1097 then
1098 let _ =
1099 Sys.command
17ba0788
C
1100 (Printf.sprintf
1101 "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1102 prefix) in
f3c4ece6
C
1103 ()
1104 | _ -> ());
1105 (* recompute *)
1106 parse_print_error_heuristic None None file)
34e49164
C
1107
1108
1109
1110(*****************************************************************************)
485bce71 1111(* Some special cases *)
34e49164
C
1112(*****************************************************************************)
1113
485bce71 1114let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
708f4980
C
1115 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1116 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1117 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1118 program +> Common.find_some (fun (e,_) ->
485bce71
C
1119 match e with
1120 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1121 | _ -> None
1122 )
1123
1124let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
708f4980
C
1125 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1126 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1127 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1128 program +> Common.find_some (fun (e,_) ->
485bce71 1129 match e with
ae4735db 1130 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
485bce71 1131 (match compound with
ae4735db 1132 | [Ast_c.StmtElem st] ->
708f4980
C
1133 (match Ast_c.unwrap_st st with
1134 | Ast_c.ExprStatement (Some e) -> Some e
1135 | _ -> None
1136 )
485bce71
C
1137 | _ -> None
1138 )
1139 | _ -> None
1140 )