Release coccinelle-0.2.5-rc2
[bpt/coccinelle.git] / parsing_c / parse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
34e49164
C
19module LP = Lexer_parser
20
485bce71
C
21module Stat = Parsing_stat
22
34e49164
C
23(*****************************************************************************)
24(* Wrappers *)
25(*****************************************************************************)
ae4735db
C
26let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
34e49164
C
28(*****************************************************************************)
29(* Helpers *)
30(*****************************************************************************)
31
ae4735db
C
32let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34e49164 34
ae4735db 35let token_to_strpos tok =
34e49164
C
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
ae4735db 39let mk_info_item2 filename toks =
34e49164 40 let buf = Buffer.create 100 in
ae4735db 41 let s =
34e49164
C
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
ae4735db 44 toks +> List.iter (fun tok ->
34e49164 45 match TH.pinfo_of_tok tok with
ae4735db 46 | Ast_c.OriginTok _ ->
91eba41f 47 Buffer.add_string buf (TH.str_of_tok tok)
ae4735db 48 | Ast_c.AbstractLineTok _ ->
91eba41f 49 raise Impossible
34e49164
C
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
ae4735db 55 (s, toks)
34e49164 56
ae4735db
C
57let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
34e49164
C
59 (fun () -> mk_info_item2 a b)
60
61
ae4735db 62let info_same_line line xs =
b1b2de81 63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
34e49164 64
34e49164 65
978fd7e5
C
66(* move in cpp_token_c ? *)
67let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
ae4735db
C
69 if List.length xs >= 2
70 then
978fd7e5 71 (match Common.head_middle_tail xs with
ae4735db 72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
978fd7e5
C
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
ae4735db 78 false
978fd7e5
C
79 end
80
81
82(*****************************************************************************)
83(* Error diagnostic *)
84(*****************************************************************************)
85
ae4735db 86let error_msg_tok tok =
978fd7e5
C
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
ae4735db 89 then Common.error_message file (token_to_strpos tok)
978fd7e5
C
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
ae4735db 93let print_bad line_error (start_line, end_line) filelines =
978fd7e5
C
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
ae4735db
C
97 for i = start_line to end_line do
98 let line = filelines.(i) in
978fd7e5 99
ae4735db
C
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
978fd7e5
C
103 done
104 end
105
106
34e49164
C
107(*****************************************************************************)
108(* Stats on what was passed/commentized *)
109(*****************************************************************************)
110
c491d8ee 111let commentized xs = xs +> Common.tail_map_filter (function
ae4735db 112 | Parser_c.TCommentCpp (cppkind, ii) ->
485bce71 113 let s = Ast_c.str_of_info ii in
ae4735db
C
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
485bce71 116 | 0 -> false
ae4735db 117 | 1 ->
0708f913 118 List.mem cppkind [Token_c.CppAttr]
ae4735db 119 ||
485bce71 120 (s =~ "__.*")
ae4735db 121 | 2 ->
0708f913 122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
ae4735db 123 ||
485bce71 124 (s =~ "__.*")
ae4735db 125 | 3 ->
0708f913 126 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
ae4735db 127 ||
485bce71 128 (s =~ "__.*")
ae4735db 129 | 4 ->
0708f913 130 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
ae4735db 131 ||
485bce71
C
132 (s =~ "__.*")
133
134
ae4735db 135 | 5 ->
0708f913 136 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
ae4735db 137 ||
485bce71
C
138 (s =~ "__.*")
139
140
141
142
143 | _ -> failwith "not valid level passing number"
144 in
145 if legal_passing then None else Some (ii.Ast_c.pinfo)
146
147 (*
ae4735db 148 | Ast_c.CppOther ->
34e49164
C
149 (match s with
150 | s when s =~ "KERN_.*" -> None
151 | s when s =~ "__.*" -> None
ae4735db 152 | _ ->
485bce71 153 Some (ii.Ast_c.pinfo)
34e49164 154 )
485bce71
C
155 *)
156
ae4735db 157
34e49164 158 | Parser_c.TCommentMisc ii
ae4735db 159 | Parser_c.TAction ii
34e49164
C
160 ->
161 Some (ii.Ast_c.pinfo)
ae4735db 162 | _ ->
34e49164
C
163 None
164 )
ae4735db
C
165
166let count_lines_commentized xs =
34e49164
C
167 let line = ref (-1) in
168 let count = ref 0 in
169 begin
170 commentized xs +>
171 List.iter
172 (function
ae4735db 173 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
174 let newline = pinfo.Common.line in
175 if newline <> !line
176 then begin
177 line := newline;
178 incr count
179 end
180 | _ -> ());
181 !count
182 end
183
184
185
ae4735db 186let print_commentized xs =
34e49164
C
187 let line = ref (-1) in
188 begin
189 let ys = commentized xs in
190 ys +>
191 List.iter
192 (function
ae4735db 193 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
194 let newline = pinfo.Common.line in
195 let s = pinfo.Common.str in
ae4735db
C
196 let s = Str.global_substitute
197 (Str.regexp "\n") (fun s -> "") s
34e49164 198 in
b1b2de81 199 if newline =|= !line
34e49164
C
200 then prerr_string (s ^ " ")
201 else begin
ae4735db
C
202 if !line =|= -1
203 then pr2_no_nl "passed:"
34e49164
C
204 else pr2_no_nl "\npassed:";
205 line := newline;
206 pr2_no_nl (s ^ " ");
207 end
208 | _ -> ());
209 if not (null ys) then pr2 "";
210 end
ae4735db 211
34e49164
C
212
213
214
215(*****************************************************************************)
216(* Lexing only *)
217(*****************************************************************************)
218
219(* called by parse_print_error_heuristic *)
ae4735db 220let tokens2 file =
708f4980 221 let table = Common.full_charpos_to_pos_large file in
34e49164 222
ae4735db 223 Common.with_open_infile file (fun chan ->
34e49164 224 let lexbuf = Lexing.from_channel chan in
ae4735db
C
225 try
226 let rec tokens_aux acc =
34e49164
C
227 let tok = Lexer_c.token lexbuf in
228 (* fill in the line and col information *)
ae4735db 229 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
34e49164
C
230 { ii with Ast_c.pinfo=
231 (* could assert pinfo.filename = file ? *)
232 match Ast_c.pinfo_of_info ii with
233 Ast_c.OriginTok pi ->
708f4980 234 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
34e49164 235 | Ast_c.ExpandedTok (pi,vpi) ->
708f4980 236 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
34e49164
C
237 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
238 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
239 })
240 in
241
242 if TH.is_eof tok
243 then List.rev (tok::acc)
244 else tokens_aux (tok::acc)
245 in
246 tokens_aux []
247 with
ae4735db
C
248 | Lexer_c.Lexical s ->
249 failwith ("lexical error " ^ s ^ "\n =" ^
34e49164
C
250 (Common.error_message file (lexbuf_to_strpos lexbuf)))
251 | e -> raise e
252 )
253
ae4735db
C
254let time_lexing ?(profile=true) a =
255 if profile
485bce71 256 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
951c7801 257 else tokens2 a
ae4735db 258let tokens ?profile a =
485bce71 259 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
34e49164
C
260
261
ae4735db 262let tokens_of_string string =
34e49164 263 let lexbuf = Lexing.from_string string in
ae4735db
C
264 try
265 let rec tokens_s_aux () =
34e49164
C
266 let tok = Lexer_c.token lexbuf in
267 if TH.is_eof tok
268 then [tok]
269 else tok::(tokens_s_aux ())
270 in
271 tokens_s_aux ()
272 with
273 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
274 | e -> raise e
275
276
277(*****************************************************************************)
278(* Parsing, but very basic, no more used *)
279(*****************************************************************************)
280
281(*
282 * !!!Those function use refs, and are not reentrant !!! so take care.
283 * It use globals defined in Lexer_parser.
ae4735db 284 *
34e49164
C
285 * update: because now lexer return comments tokens, those functions
286 * may not work anymore.
287 *)
288
ae4735db 289let parse file =
34e49164
C
290 let lexbuf = Lexing.from_channel (open_in file) in
291 let result = Parser_c.main Lexer_c.token lexbuf in
292 result
293
294
ae4735db 295let parse_print_error file =
34e49164
C
296 let chan = (open_in file) in
297 let lexbuf = Lexing.from_channel chan in
298
299 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
ae4735db 300 try
34e49164 301 lexbuf +> Parser_c.main Lexer_c.token
ae4735db
C
302 with
303 | Lexer_c.Lexical s ->
34e49164 304 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
ae4735db 305 | Parsing.Parse_error ->
34e49164 306 failwith ("parse error \n = " ^ error_msg ())
ae4735db 307 | Semantic_c.Semantic (s, i) ->
34e49164
C
308 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
309 | e -> raise e
310
311
312
313
314(*****************************************************************************)
315(* Parsing subelements, useful to debug parser *)
316(*****************************************************************************)
317
318(*
319 * !!!Those function use refs, and are not reentrant !!! so take care.
320 * It use globals defined in Lexer_parser.
321 *)
322
323
ae4735db
C
324(* old:
325 * let parse_gen parsefunc s =
34e49164
C
326 * let lexbuf = Lexing.from_string s in
327 * let result = parsefunc Lexer_c.token lexbuf in
328 * result
329 *)
330
ae4735db 331let parse_gen parsefunc s =
485bce71 332 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
34e49164
C
333
334
335 (* Why use this lexing scheme ? Why not classically give lexer func
ae4735db 336 * to parser ? Because I now keep comments in lexer. Could
34e49164
C
337 * just do a simple wrapper that when comment ask again for a token,
338 * but maybe simpler to use cur_tok technique.
339 *)
340 let all_tokens = ref toks in
341 let cur_tok = ref (List.hd !all_tokens) in
342
ae4735db
C
343 let lexer_function =
344 (fun _ ->
34e49164 345 if TH.is_eof !cur_tok
708f4980 346 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
34e49164
C
347 else
348 let v = Common.pop2 all_tokens in
349 cur_tok := v;
350 !cur_tok
ae4735db 351 )
34e49164
C
352 in
353 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
354 let result = parsefunc lexer_function lexbuf_fake in
355 result
356
357
358let type_of_string = parse_gen Parser_c.type_name
359let statement_of_string = parse_gen Parser_c.statement
360let expression_of_string = parse_gen Parser_c.expr
361
362(* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
363
364
365
366
367
34e49164 368(*****************************************************************************)
485bce71 369(* Parsing default define macros, usually in a standard.h file *)
34e49164
C
370(*****************************************************************************)
371
ae4735db
C
372let extract_macros2 file =
373 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
708f4980
C
374 Flag_parsing_c.verbose_lexing := false;
375 let toks = tokens ~profile:false file in
978fd7e5
C
376 let toks = Parsing_hacks.fix_tokens_define toks in
377 Cpp_token_c.extract_macros toks
708f4980 378 )
34e49164 379
ae4735db 380let extract_macros a =
978fd7e5 381 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
485bce71 382
34e49164
C
383
384(*****************************************************************************)
978fd7e5 385(* Helper for main entry point *)
34e49164
C
386(*****************************************************************************)
387
34e49164
C
388
389(* The use of local refs (remaining_tokens, passed_tokens, ...) makes
390 * possible error recovery. Indeed, they allow to skip some tokens and
391 * still be able to call again the ocamlyacc parser. It is ugly code
392 * because we cant modify ocamllex and ocamlyacc. As we want some
393 * extended lexing tricks, we have to use such refs.
ae4735db 394 *
34e49164
C
395 * Those refs are now also used for my lalr(k) technique. Indeed They
396 * store the futur and previous tokens that were parsed, and so
397 * provide enough context information for powerful lex trick.
ae4735db 398 *
34e49164 399 * - passed_tokens_last_ckp stores the passed tokens since last
485bce71 400 * checkpoint. Used for NotParsedCorrectly and also to build the
34e49164
C
401 * info_item attached to each program_element.
402 * - passed_tokens_clean is used for lookahead, in fact for lookback.
403 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
404 * contain some comments and so would make pattern matching difficult
ae4735db 405 * in lookahead. Hence this variable. We would like also to get rid
34e49164
C
406 * of cpp instruction because sometimes a cpp instruction is between
407 * two tokens and makes a pattern matching fail. But lookahead also
408 * transform some cpp instruction (in comment) so can't remove them.
ae4735db 409 *
34e49164
C
410 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
411 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
412 * comment-tokens.
ae4735db 413 *
34e49164 414 * Normally we have:
ae4735db 415 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
34e49164 416 * after the call to pop2.
ae4735db 417 * toks = (reverse passed_tok) ++ remaining_tokens
34e49164
C
418 * at the and of the lexer_function call.
419 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
420 * At the end of lexer_function call, cur_tok overlap with passed_tok.
ae4735db 421 *
34e49164 422 * convention: I use "tr" for "tokens refs"
ae4735db 423 *
485bce71
C
424 * I now also need this lexing trick because the lexer return comment
425 * tokens.
34e49164
C
426 *)
427
428type tokens_state = {
429 mutable rest : Parser_c.token list;
430 mutable rest_clean : Parser_c.token list;
431 mutable current : Parser_c.token;
432 (* it's passed since last "checkpoint", not passed from the beginning *)
433 mutable passed : Parser_c.token list;
434 mutable passed_clean : Parser_c.token list;
435}
708f4980 436
ae4735db
C
437let mk_tokens_state toks =
438 {
708f4980
C
439 rest = toks;
440 rest_clean = (toks +> List.filter TH.is_not_comment);
441 current = (List.hd toks);
ae4735db 442 passed = [];
708f4980
C
443 passed_clean = [];
444 }
445
446
447
ae4735db 448let clone_tokens_state tr =
485bce71
C
449 { rest = tr.rest;
450 rest_clean = tr.rest_clean;
451 current = tr.current;
452 passed = tr.passed;
453 passed_clean = tr.passed_clean;
454 }
ae4735db 455let copy_tokens_state ~src ~dst =
485bce71
C
456 dst.rest <- src.rest;
457 dst.rest_clean <- src.rest_clean;
458 dst.current <- src.current;
459 dst.passed <- src.passed;
460 dst.passed_clean <- src.passed_clean;
461 ()
462
708f4980 463(* todo? agglomerate the x##b ? *)
485bce71
C
464let rec filter_noise n xs =
465 match n, xs with
466 | _, [] -> []
467 | 0, xs -> xs
ae4735db 468 | n, x::xs ->
485bce71 469 (match x with
ae4735db 470 | Parser_c.TMacroAttr _ ->
485bce71 471 filter_noise (n-1) xs
ae4735db 472 | _ ->
485bce71
C
473 x::filter_noise (n-1) xs
474 )
475
ae4735db 476let clean_for_lookahead xs =
485bce71
C
477 match xs with
478 | [] -> []
479 | [x] -> [x]
ae4735db 480 | x::xs ->
485bce71
C
481 x::filter_noise 10 xs
482
34e49164 483
485bce71 484
ae4735db 485(* Hacked lex. This function use refs passed by parse_print_error_heuristic
485bce71
C
486 * tr means token refs.
487 *)
ae4735db 488let rec lexer_function ~pass tr = fun lexbuf ->
34e49164 489 match tr.rest with
708f4980 490 | [] -> pr2_err "ALREADY AT END"; tr.current
ae4735db 491 | v::xs ->
34e49164
C
492 tr.rest <- xs;
493 tr.current <- v;
494
495 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
496
497 if TH.is_comment v
498 then begin
499 tr.passed <- v::tr.passed;
485bce71 500 lexer_function ~pass tr lexbuf
34e49164
C
501 end
502 else begin
503 let x = List.hd tr.rest_clean in
504 tr.rest_clean <- List.tl tr.rest_clean;
b1b2de81 505 assert (x =*= v);
ae4735db 506
34e49164 507 (match v with
113803cf 508
ae4735db 509 (* fix_define1.
113803cf
C
510 *
511 * Why not in parsing_hacks lookahead and do passing like
ae4735db
C
512 * I do for some ifdef directives ? Because here I also need to
513 * generate some tokens sometimes and so I need access to the
113803cf 514 * tr.passed, tr.rest, etc.
485bce71 515 *)
9f8e26f4 516 | Parser_c.TDefine (tok) ->
ae4735db 517 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 518 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 519 then begin
485bce71 520 incr Stat.nDefinePassing;
34e49164 521 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
3a314143
C
522 let v' =
523 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
524 in
525 tr.passed <- v'::tr.passed;
526 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
527 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
528 lexer_function ~pass tr lexbuf
529 end
530 else begin
531 tr.passed <- v::tr.passed;
532 tr.passed_clean <- v::tr.passed_clean;
533 v
534 end
535
536 | Parser_c.TUndef (tok) ->
537 if not (LP.current_context () =*= LP.InTopLevel) &&
538 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
539 then begin
540 incr Stat.nUndefPassing;
541 pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
542 let v' =
543 Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
34e49164
C
544 in
545 tr.passed <- v'::tr.passed;
978fd7e5
C
546 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
547 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
485bce71 548 lexer_function ~pass tr lexbuf
34e49164
C
549 end
550 else begin
551 tr.passed <- v::tr.passed;
552 tr.passed_clean <- v::tr.passed_clean;
553 v
554 end
ae4735db
C
555
556 | Parser_c.TInclude (includes, filename, inifdef, info) ->
b1b2de81 557 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 558 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 559 then begin
485bce71 560 incr Stat.nIncludePassing;
34e49164 561 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
0708f913 562 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
34e49164 563 tr.passed <- v::tr.passed;
485bce71 564 lexer_function ~pass tr lexbuf
34e49164
C
565 end
566 else begin
ae4735db 567 let (v,new_tokens) =
3a314143 568 Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
ae4735db 569 let new_tokens_clean =
34e49164
C
570 new_tokens +> List.filter TH.is_not_comment in
571
572 tr.passed <- v::tr.passed;
573 tr.passed_clean <- v::tr.passed_clean;
574 tr.rest <- new_tokens ++ tr.rest;
575 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
576 v
577 end
ae4735db
C
578
579 | _ ->
580
34e49164
C
581 (* typedef_fix1 *)
582 let v = match v with
ae4735db
C
583 | Parser_c.TIdent (s, ii) ->
584 if
585 LP.is_typedef s &&
485bce71 586 not (!Flag_parsing_c.disable_add_typedef) &&
b1b2de81 587 pass =|= 1
34e49164
C
588 then Parser_c.TypedefIdent (s, ii)
589 else Parser_c.TIdent (s, ii)
590 | x -> x
591 in
ae4735db 592
485bce71
C
593 let v = Parsing_hacks.lookahead ~pass
594 (clean_for_lookahead (v::tr.rest_clean))
595 tr.passed_clean in
34e49164
C
596
597 tr.passed <- v::tr.passed;
ae4735db 598
485bce71 599 (* the lookahead may have changed the status of the token and
34e49164 600 * consider it as a comment, for instance some #include are
485bce71 601 * turned into comments, hence this code. *)
34e49164 602 match v with
485bce71 603 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
ae4735db 604 | v ->
34e49164
C
605 tr.passed_clean <- v::tr.passed_clean;
606 v
607 )
608 end
609
610
708f4980
C
611let max_pass = 4
612
34e49164 613
ae4735db 614let get_one_elem ~pass tr (file, filelines) =
485bce71
C
615
616 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
708f4980 617 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
485bce71
C
618
619 (* normally have to do that only when come from an exception in which
ae4735db 620 * case the dt() may not have been done
485bce71
C
621 * TODO but if was in scoped scope ? have to let only the last scope
622 * so need do a LP.lexer_reset_typedef ();
623 *)
ae4735db 624 LP.enable_typedef();
485bce71
C
625 LP._lexer_hint := (LP.default_hint ());
626 LP.save_typedef_state();
627
628 tr.passed <- [];
629
630 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
ae4735db
C
631
632 (try
485bce71
C
633 (* -------------------------------------------------- *)
634 (* Call parser *)
635 (* -------------------------------------------------- *)
ae4735db 636 Common.profile_code_exclusif "YACC" (fun () ->
c491d8ee 637 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
485bce71 638 )
ae4735db 639 with e ->
485bce71
C
640 LP.restore_typedef_state();
641
642 (* must keep here, before the code that adjusts the tr fields *)
643 let line_error = TH.line_of_tok tr.current in
708f4980
C
644
645 let passed_before_error = tr.passed in
646 let current = tr.current in
485bce71 647 (* error recovery, go to next synchro point *)
ae4735db 648 let (passed', rest') =
978fd7e5 649 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
485bce71
C
650 tr.rest <- rest';
651 tr.passed <- passed';
ae4735db 652
485bce71
C
653 tr.current <- List.hd passed';
654 tr.passed_clean <- []; (* enough ? *)
655 (* with error recovery, rest and rest_clean may not be in sync *)
656 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
ae4735db
C
657
658
659 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
660 Right (info_of_bads, line_error,
661 tr.passed, passed_before_error,
708f4980 662 current, e)
485bce71
C
663 )
664
665
666
978fd7e5
C
667(* Macro problem recovery *)
668(* used by the multi-pass error recovery expand-on-demand *)
669(*
ae4735db
C
670val candidate_macros_in_passed:
671 defs: (string, define_def) Hashtbl.t ->
978fd7e5
C
672 Parser_c.token list -> (string * define_def) list
673*)
674
ae4735db 675let candidate_macros_in_passed2 ~defs passed =
978fd7e5
C
676 let res = ref [] in
677 let res2 = ref [] in
678
679 passed +> List.iter (function
680 | Parser_c.TIdent (s,_)
681 (* bugfix: may have to undo some infered things *)
682 | Parser_c.TMacroIterator (s,_)
683 | Parser_c.TypedefIdent (s,_)
ae4735db 684 ->
978fd7e5 685 (match Common.hfind_option s defs with
ae4735db
C
686 | Some def ->
687 if s ==~ Parsing_hacks.regexp_macro
978fd7e5
C
688 then
689 (* pr2 (spf "candidate: %s" s); *)
ae4735db
C
690 Common.push2 (s, def) res
691 else
978fd7e5
C
692 Common.push2 (s, def) res2
693 | None -> ()
694 )
695
696 | _ -> ()
697 );
ae4735db
C
698 if null !res
699 then !res2
978fd7e5
C
700 else !res
701
ae4735db
C
702let candidate_macros_in_passed ~defs b =
703 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 704 candidate_macros_in_passed2 ~defs b)
ae4735db 705
978fd7e5
C
706
707
708
709
710let find_optional_macro_to_expand2 ~defs toks =
711
712 let defs = Common.hash_of_list defs in
713
c491d8ee 714 let toks = toks +> Common.tail_map (function
978fd7e5
C
715
716 (* special cases to undo *)
ae4735db 717 | Parser_c.TMacroIterator (s, ii) ->
978fd7e5
C
718 if Hashtbl.mem defs s
719 then Parser_c.TIdent (s, ii)
720 else Parser_c.TMacroIterator (s, ii)
721
ae4735db 722 | Parser_c.TypedefIdent (s, ii) ->
978fd7e5
C
723 if Hashtbl.mem defs s
724 then Parser_c.TIdent (s, ii)
725 else Parser_c.TypedefIdent (s, ii)
726
727 | x -> x
728 ) in
729
730 let tokens = toks in
731 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
732
733 (* just calling apply_macro_defs and having a specialized version
ae4735db 734 * of the code in fix_tokens_cpp is not enough as some work such
978fd7e5
C
735 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
736 * will not get the chance to be run on the new expanded tokens.
ae4735db 737 * Hence even if it's expensive, it's currently better to
978fd7e5
C
738 * just call directly fix_tokens_cpp again here.
739
740 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
741 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
742 let paren_grouped = TV.mk_parenthised cleaner in
743 Cpp_token_c.apply_macro_defs
744 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
745 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
746 defs paren_grouped;
747 (* because the before field is used by apply_macro_defs *)
ae4735db
C
748 tokens2 := TV.rebuild_tokens_extented !tokens2;
749 Parsing_hacks.insert_virtual_positions
978fd7e5
C
750 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
751 *)
ae4735db
C
752let find_optional_macro_to_expand ~defs a =
753 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 754 find_optional_macro_to_expand2 ~defs a)
ae4735db 755
978fd7e5
C
756
757
758
759
760(*****************************************************************************)
761(* Main entry points *)
762(*****************************************************************************)
763
ae4735db 764let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
765 ref (Hashtbl.create 101)
766
ae4735db 767let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
768 ref (Hashtbl.create 101)
769
770
771(* can not be put in parsing_hack, cos then mutually recursive problem as
772 * we also want to parse the standard.h file.
773 *)
ae4735db 774let init_defs_macros std_h =
978fd7e5
C
775 if not (Common.lfile_exists std_h)
776 then pr2 ("warning: Can't find default macro file: " ^ std_h)
777 else begin
778 pr2 ("init_defs: " ^ std_h);
779 _defs := Common.hash_of_list (extract_macros std_h);
780 end
781
ae4735db 782let init_defs_builtins file_h =
978fd7e5
C
783 if not (Common.lfile_exists file_h)
784 then pr2 ("warning: Can't find macro file: " ^ file_h)
785 else begin
786 pr2 ("init_defs_builtins: " ^ file_h);
ae4735db 787 _defs_builtins :=
978fd7e5
C
788 Common.hash_of_list (extract_macros file_h);
789 end
790
791
792
793type info_item = string * Parser_c.token list
794
795type program2 = toplevel2 list
796 and toplevel2 = Ast_c.toplevel * info_item
797
ae4735db 798let program_of_program2 xs =
978fd7e5
C
799 xs +> List.map fst
800
ae4735db
C
801let with_program2 f program2 =
802 program2
803 +> Common.unzip
804 +> (fun (program, infos) ->
978fd7e5
C
805 f program, infos
806 )
807 +> Common.uncurry Common.zip
808
809
810
811
812
485bce71 813
34e49164 814(* note: as now we go in 2 passes, there is first all the error message of
485bce71 815 * the lexer, and then the error of the parser. It is not anymore
34e49164 816 * interwinded.
ae4735db 817 *
34e49164
C
818 * !!!This function use refs, and is not reentrant !!! so take care.
819 * It use globals defined in Lexer_parser and also the _defs global
ae4735db
C
820 * in parsing_hack.ml.
821 *
485bce71
C
822 * This function uses internally some semi globals in the
823 * tokens_stat record and parsing_stat record.
34e49164
C
824 *)
825
ae4735db 826let parse_print_error_heuristic2 file =
34e49164 827
91eba41f 828 let filelines = Common.cat_array file in
485bce71
C
829 let stat = Parsing_stat.default_stat file in
830
34e49164
C
831 (* -------------------------------------------------- *)
832 (* call lexer and get all the tokens *)
833 (* -------------------------------------------------- *)
ae4735db 834 LP.lexer_reset_typedef();
485bce71 835 Parsing_hacks.ifdef_paren_cnt := 0;
708f4980 836
485bce71 837 let toks_orig = tokens file in
978fd7e5 838 let toks = Parsing_hacks.fix_tokens_define toks_orig in
708f4980 839 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
34e49164 840
708f4980 841 (* expand macros on demand trick, preparation phase *)
ae4735db
C
842 let macros =
843 Common.profile_code "MACRO mgmt prep 1" (fun () ->
708f4980
C
844 let macros = Hashtbl.copy !_defs in
845 (* include also builtins as some macros may generate some builtins too
846 * like __decl_spec or __stdcall
847 *)
ae4735db 848 !_defs_builtins +> Hashtbl.iter (fun s def ->
708f4980
C
849 Hashtbl.replace macros s def;
850 );
851 macros
852 )
853 in
ae4735db 854 Common.profile_code "MACRO mgmt prep 2" (fun () ->
978fd7e5 855 let local_macros = extract_macros file in
ae4735db 856 local_macros +> List.iter (fun (s, def) ->
708f4980
C
857 Hashtbl.replace macros s def;
858 );
859 );
34e49164 860
708f4980 861 let tr = mk_tokens_state toks in
485bce71
C
862
863 let rec loop tr =
34e49164
C
864
865 (* todo?: I am not sure that it represents current_line, cos maybe
866 * tr.current partipated in the previous parsing phase, so maybe tr.current
867 * is not the first token of the next parsing phase. Same with checkpoint2.
868 * It would be better to record when we have a } or ; in parser.mly,
869 * cos we know that they are the last symbols of external_declaration2.
485bce71
C
870 *
871 * bugfix: may not be equal to 'file' as after macro expansions we can
872 * start to parse a new entity from the body of a macro, for instance
873 * when parsing a define_machine() body, cf standard.h
34e49164
C
874 *)
875 let checkpoint = TH.line_of_tok tr.current in
485bce71 876 let checkpoint_file = TH.file_of_tok tr.current in
34e49164 877
485bce71 878 (* call the parser *)
ae4735db
C
879 let elem =
880 let pass1 =
881 Common.profile_code "Parsing: 1st pass" (fun () ->
708f4980
C
882 get_one_elem ~pass:1 tr (file, filelines)
883 ) in
485bce71
C
884 match pass1 with
885 | Left e -> Left e
ae4735db 886 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
708f4980
C
887 if !Flag_parsing_c.disable_multi_pass
888 then pass1
485bce71 889 else begin
ae4735db 890 Common.profile_code "Parsing: multi pass" (fun () ->
708f4980
C
891
892 pr2_err "parsing pass2: try again";
893 let toks = List.rev passed ++ tr.rest in
894 let new_tr = mk_tokens_state toks in
895 copy_tokens_state ~src:new_tr ~dst:tr;
896 let passx = get_one_elem ~pass:2 tr (file, filelines) in
897
898 (match passx with
899 | Left e -> passx
ae4735db
C
900 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
901 let candidates =
902 candidate_macros_in_passed ~defs:macros passed
708f4980 903 in
ae4735db 904
978fd7e5 905
708f4980
C
906 if is_define_passed passed || null candidates
907 then passx
908 else begin
909 (* todo factorize code *)
910
911 pr2_err "parsing pass3: try again";
912 let toks = List.rev passed ++ tr.rest in
ae4735db 913 let toks' =
708f4980
C
914 find_optional_macro_to_expand ~defs:candidates toks in
915 let new_tr = mk_tokens_state toks' in
916 copy_tokens_state ~src:new_tr ~dst:tr;
917 let passx = get_one_elem ~pass:3 tr (file, filelines) in
918
919 (match passx with
920 | Left e -> passx
ae4735db 921 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
708f4980
C
922 pr2_err "parsing pass4: try again";
923
ae4735db
C
924 let candidates =
925 candidate_macros_in_passed
926 ~defs:macros passed
978fd7e5 927 in
708f4980
C
928
929 let toks = List.rev passed ++ tr.rest in
ae4735db 930 let toks' =
708f4980
C
931 find_optional_macro_to_expand ~defs:candidates toks in
932 let new_tr = mk_tokens_state toks' in
933 copy_tokens_state ~src:new_tr ~dst:tr;
934 let passx = get_one_elem ~pass:4 tr (file, filelines) in
935 passx
936 )
937 end
938 )
939 )
485bce71
C
940 end
941 in
942
943
944 (* again not sure if checkpoint2 corresponds to end of bad region *)
945 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
946 let checkpoint2_file = TH.file_of_tok tr.current in
947
ae4735db 948 let diffline =
b1b2de81 949 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
ae4735db 950 then (checkpoint2 - checkpoint)
485bce71
C
951 else 0
952 (* TODO? so if error come in middle of something ? where the
953 * start token was from original file but synchro found in body
954 * of macro ? then can have wrong number of lines stat.
955 * Maybe simpler just to look at tr.passed and count
956 * the lines in the token from the correct file ?
957 *)
34e49164 958 in
ae4735db 959 let info = mk_info_item file (List.rev tr.passed) in
34e49164 960
485bce71 961 (* some stat updates *)
ae4735db 962 stat.Stat.commentized <-
485bce71
C
963 stat.Stat.commentized + count_lines_commentized (snd info);
964
ae4735db 965 let elem =
485bce71 966 match elem with
ae4735db 967 | Left e ->
91eba41f
C
968 stat.Stat.correct <- stat.Stat.correct + diffline;
969 e
ae4735db
C
970 | Right (info_of_bads, line_error, toks_of_bads,
971 _passed_before_error, cur, exn) ->
708f4980
C
972
973 let was_define = is_define_passed tr.passed in
ae4735db 974
708f4980
C
975 if was_define && !Flag_parsing_c.filter_msg_define_error
976 then ()
977 else begin
978
979 (match exn with
ae4735db 980 | Lexer_c.Lexical _
708f4980
C
981 | Parsing.Parse_error
982 | Semantic_c.Semantic _ -> ()
983 | e -> raise e
984 );
985
986 if !Flag_parsing_c.show_parsing_error
ae4735db 987 then begin
708f4980
C
988 (match exn with
989 (* Lexical is not anymore launched I think *)
ae4735db 990 | Lexer_c.Lexical s ->
708f4980 991 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
ae4735db 992 | Parsing.Parse_error ->
708f4980 993 pr2 ("parse error \n = " ^ error_msg_tok cur)
ae4735db 994 | Semantic_c.Semantic (s, i) ->
708f4980
C
995 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
996 | e -> raise Impossible
997 );
998 (* bugfix: *)
ae4735db 999 if (checkpoint_file =$= checkpoint2_file) &&
708f4980
C
1000 checkpoint_file =$= file
1001 then print_bad line_error (checkpoint, checkpoint2) filelines
1002 else pr2 "PB: bad: but on tokens not from original file"
1003 end;
1004
ae4735db
C
1005
1006 let pbline =
1007 toks_of_bads
708f4980 1008 +> Common.filter (TH.is_same_line_or_close line_error)
ae4735db 1009 +> Common.filter TH.is_ident_like
708f4980 1010 in
ae4735db 1011 let error_info =
708f4980
C
1012 (pbline +> List.map TH.str_of_tok), line_error
1013 in
ae4735db 1014 stat.Stat.problematic_lines <-
708f4980
C
1015 error_info::stat.Stat.problematic_lines;
1016
1017 end;
1018
91eba41f
C
1019 if was_define && !Flag_parsing_c.filter_define_error
1020 then stat.Stat.correct <- stat.Stat.correct + diffline
1021 else stat.Stat.bad <- stat.Stat.bad + diffline;
1022
485bce71
C
1023 Ast_c.NotParsedCorrectly info_of_bads
1024 in
34e49164
C
1025
1026 (match elem with
1027 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
485bce71 1028 | xs -> (xs, info):: loop tr (* recurse *)
34e49164
C
1029 )
1030 in
485bce71 1031 let v = loop tr in
978fd7e5 1032 let v = with_program2 Parsing_consistency_c.consistency_checking v in
34e49164
C
1033 (v, stat)
1034
1035
ae4735db 1036let time_total_parsing a =
485bce71
C
1037 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1038
ae4735db 1039let parse_print_error_heuristic a =
485bce71
C
1040 Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1041
34e49164
C
1042
1043(* alias *)
1044let parse_c_and_cpp a = parse_print_error_heuristic a
1045
1046(*****************************************************************************)
1047(* Same but faster cos memoize stuff *)
1048(*****************************************************************************)
ae4735db
C
1049let parse_cache file =
1050 if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1051 else
485bce71 1052 let _ = pr2 "TOFIX" in
ae4735db 1053 let need_no_changed_files =
34e49164 1054 (* should use Sys.argv.(0), would be safer. *)
485bce71
C
1055
1056 [
1057 (* TOFIX
1058 Config.path ^ "/parsing_c/c_parser.cma";
ae4735db 1059 (* we may also depend now on the semantic patch because
485bce71 1060 the SP may use macro and so we will disable some of the
ae4735db 1061 macro expansions from standard.h.
485bce71
C
1062 *)
1063 !Config.std_h;
1064 *)
ae4735db 1065 ]
34e49164 1066 in
ae4735db 1067 let need_no_changed_variables =
34e49164 1068 (* could add some of the flags of flag_parsing_c.ml *)
ae4735db 1069 []
34e49164 1070 in
ae4735db
C
1071 Common.cache_computation_robust
1072 file ".ast_raw"
1073 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
34e49164
C
1074 (fun () -> parse_print_error_heuristic file)
1075
1076
1077
1078(*****************************************************************************)
485bce71 1079(* Some special cases *)
34e49164
C
1080(*****************************************************************************)
1081
485bce71 1082let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
708f4980
C
1083 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1084 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1085 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1086 program +> Common.find_some (fun (e,_) ->
485bce71
C
1087 match e with
1088 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1089 | _ -> None
1090 )
1091
1092let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
708f4980
C
1093 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1094 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1095 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1096 program +> Common.find_some (fun (e,_) ->
485bce71 1097 match e with
ae4735db 1098 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
485bce71 1099 (match compound with
ae4735db 1100 | [Ast_c.StmtElem st] ->
708f4980
C
1101 (match Ast_c.unwrap_st st with
1102 | Ast_c.ExprStatement (Some e) -> Some e
1103 | _ -> None
1104 )
485bce71
C
1105 | _ -> None
1106 )
1107 | _ -> None
1108 )