Release coccinelle-0.2.4rc5
[bpt/coccinelle.git] / parsing_c / parse_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
34e49164
C
19module LP = Lexer_parser
20
485bce71
C
21module Stat = Parsing_stat
22
34e49164
C
23(*****************************************************************************)
24(* Wrappers *)
25(*****************************************************************************)
ae4735db
C
26let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
27
34e49164
C
28(*****************************************************************************)
29(* Helpers *)
30(*****************************************************************************)
31
ae4735db
C
32let lexbuf_to_strpos lexbuf =
33 (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
34e49164 34
ae4735db 35let token_to_strpos tok =
34e49164
C
36 (TH.str_of_tok tok, TH.pos_of_tok tok)
37
38
ae4735db 39let mk_info_item2 filename toks =
34e49164 40 let buf = Buffer.create 100 in
ae4735db 41 let s =
34e49164
C
42 (* old: get_slice_file filename (line1, line2) *)
43 begin
ae4735db 44 toks +> List.iter (fun tok ->
34e49164 45 match TH.pinfo_of_tok tok with
ae4735db 46 | Ast_c.OriginTok _ ->
91eba41f 47 Buffer.add_string buf (TH.str_of_tok tok)
ae4735db 48 | Ast_c.AbstractLineTok _ ->
91eba41f 49 raise Impossible
34e49164
C
50 | _ -> ()
51 );
52 Buffer.contents buf
53 end
54 in
ae4735db 55 (s, toks)
34e49164 56
ae4735db
C
57let mk_info_item a b =
58 Common.profile_code "C parsing.mk_info_item"
34e49164
C
59 (fun () -> mk_info_item2 a b)
60
61
ae4735db 62let info_same_line line xs =
b1b2de81 63 xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
34e49164 64
34e49164 65
978fd7e5
C
66(* move in cpp_token_c ? *)
67let is_define_passed passed =
68 let xs = passed +> List.rev +> List.filter TH.is_not_comment in
ae4735db
C
69 if List.length xs >= 2
70 then
978fd7e5 71 (match Common.head_middle_tail xs with
ae4735db 72 | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
978fd7e5
C
73 true
74 | _ -> false
75 )
76 else begin
77 pr2_err "WEIRD: length list of error recovery tokens < 2 ";
ae4735db 78 false
978fd7e5
C
79 end
80
81
82(*****************************************************************************)
83(* Error diagnostic *)
84(*****************************************************************************)
85
ae4735db 86let error_msg_tok tok =
978fd7e5
C
87 let file = TH.file_of_tok tok in
88 if !Flag_parsing_c.verbose_parsing
ae4735db 89 then Common.error_message file (token_to_strpos tok)
978fd7e5
C
90 else ("error in " ^ file ^ "; set verbose_parsing for more info")
91
92
ae4735db 93let print_bad line_error (start_line, end_line) filelines =
978fd7e5
C
94 begin
95 pr2 ("badcount: " ^ i_to_s (end_line - start_line));
96
ae4735db
C
97 for i = start_line to end_line do
98 let line = filelines.(i) in
978fd7e5 99
ae4735db
C
100 if i =|= line_error
101 then pr2 ("BAD:!!!!!" ^ " " ^ line)
102 else pr2 ("bad:" ^ " " ^ line)
978fd7e5
C
103 done
104 end
105
106
34e49164
C
107(*****************************************************************************)
108(* Stats on what was passed/commentized *)
109(*****************************************************************************)
110
c491d8ee 111let commentized xs = xs +> Common.tail_map_filter (function
ae4735db 112 | Parser_c.TCommentCpp (cppkind, ii) ->
485bce71 113 let s = Ast_c.str_of_info ii in
ae4735db
C
114 let legal_passing =
115 match !Flag_parsing_c.filter_passed_level with
485bce71 116 | 0 -> false
ae4735db 117 | 1 ->
0708f913 118 List.mem cppkind [Token_c.CppAttr]
ae4735db 119 ||
485bce71 120 (s =~ "__.*")
ae4735db 121 | 2 ->
0708f913 122 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
ae4735db 123 ||
485bce71 124 (s =~ "__.*")
ae4735db 125 | 3 ->
0708f913 126 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
ae4735db 127 ||
485bce71 128 (s =~ "__.*")
ae4735db 129 | 4 ->
0708f913 130 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
ae4735db 131 ||
485bce71
C
132 (s =~ "__.*")
133
134
ae4735db 135 | 5 ->
0708f913 136 List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
ae4735db 137 ||
485bce71
C
138 (s =~ "__.*")
139
140
141
142
143 | _ -> failwith "not valid level passing number"
144 in
145 if legal_passing then None else Some (ii.Ast_c.pinfo)
146
147 (*
ae4735db 148 | Ast_c.CppOther ->
34e49164
C
149 (match s with
150 | s when s =~ "KERN_.*" -> None
151 | s when s =~ "__.*" -> None
ae4735db 152 | _ ->
485bce71 153 Some (ii.Ast_c.pinfo)
34e49164 154 )
485bce71
C
155 *)
156
ae4735db 157
34e49164 158 | Parser_c.TCommentMisc ii
ae4735db 159 | Parser_c.TAction ii
34e49164
C
160 ->
161 Some (ii.Ast_c.pinfo)
ae4735db 162 | _ ->
34e49164
C
163 None
164 )
ae4735db
C
165
166let count_lines_commentized xs =
34e49164
C
167 let line = ref (-1) in
168 let count = ref 0 in
169 begin
170 commentized xs +>
171 List.iter
172 (function
ae4735db 173 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
174 let newline = pinfo.Common.line in
175 if newline <> !line
176 then begin
177 line := newline;
178 incr count
179 end
180 | _ -> ());
181 !count
182 end
183
184
185
ae4735db 186let print_commentized xs =
34e49164
C
187 let line = ref (-1) in
188 begin
189 let ys = commentized xs in
190 ys +>
191 List.iter
192 (function
ae4735db 193 Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
34e49164
C
194 let newline = pinfo.Common.line in
195 let s = pinfo.Common.str in
ae4735db
C
196 let s = Str.global_substitute
197 (Str.regexp "\n") (fun s -> "") s
34e49164 198 in
b1b2de81 199 if newline =|= !line
34e49164
C
200 then prerr_string (s ^ " ")
201 else begin
ae4735db
C
202 if !line =|= -1
203 then pr2_no_nl "passed:"
34e49164
C
204 else pr2_no_nl "\npassed:";
205 line := newline;
206 pr2_no_nl (s ^ " ");
207 end
208 | _ -> ());
209 if not (null ys) then pr2 "";
210 end
ae4735db 211
34e49164
C
212
213
214
215(*****************************************************************************)
216(* Lexing only *)
217(*****************************************************************************)
218
219(* called by parse_print_error_heuristic *)
ae4735db 220let tokens2 file =
708f4980 221 let table = Common.full_charpos_to_pos_large file in
34e49164 222
ae4735db 223 Common.with_open_infile file (fun chan ->
34e49164 224 let lexbuf = Lexing.from_channel chan in
ae4735db
C
225 try
226 let rec tokens_aux acc =
34e49164
C
227 let tok = Lexer_c.token lexbuf in
228 (* fill in the line and col information *)
ae4735db 229 let tok = tok +> TH.visitor_info_of_tok (fun ii ->
34e49164
C
230 { ii with Ast_c.pinfo=
231 (* could assert pinfo.filename = file ? *)
232 match Ast_c.pinfo_of_info ii with
233 Ast_c.OriginTok pi ->
708f4980 234 Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
34e49164 235 | Ast_c.ExpandedTok (pi,vpi) ->
708f4980 236 Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
34e49164
C
237 | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
238 | Ast_c.AbstractLineTok pi -> failwith "should not occur"
239 })
240 in
241
242 if TH.is_eof tok
243 then List.rev (tok::acc)
244 else tokens_aux (tok::acc)
245 in
246 tokens_aux []
247 with
ae4735db
C
248 | Lexer_c.Lexical s ->
249 failwith ("lexical error " ^ s ^ "\n =" ^
34e49164
C
250 (Common.error_message file (lexbuf_to_strpos lexbuf)))
251 | e -> raise e
252 )
253
ae4735db
C
254let time_lexing ?(profile=true) a =
255 if profile
485bce71 256 then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
951c7801 257 else tokens2 a
ae4735db 258let tokens ?profile a =
485bce71 259 Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
34e49164
C
260
261
ae4735db 262let tokens_of_string string =
34e49164 263 let lexbuf = Lexing.from_string string in
ae4735db
C
264 try
265 let rec tokens_s_aux () =
34e49164
C
266 let tok = Lexer_c.token lexbuf in
267 if TH.is_eof tok
268 then [tok]
269 else tok::(tokens_s_aux ())
270 in
271 tokens_s_aux ()
272 with
273 | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
274 | e -> raise e
275
276
277(*****************************************************************************)
278(* Parsing, but very basic, no more used *)
279(*****************************************************************************)
280
281(*
282 * !!!Those function use refs, and are not reentrant !!! so take care.
283 * It use globals defined in Lexer_parser.
ae4735db 284 *
34e49164
C
285 * update: because now lexer return comments tokens, those functions
286 * may not work anymore.
287 *)
288
ae4735db 289let parse file =
34e49164
C
290 let lexbuf = Lexing.from_channel (open_in file) in
291 let result = Parser_c.main Lexer_c.token lexbuf in
292 result
293
294
ae4735db 295let parse_print_error file =
34e49164
C
296 let chan = (open_in file) in
297 let lexbuf = Lexing.from_channel chan in
298
299 let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
ae4735db 300 try
34e49164 301 lexbuf +> Parser_c.main Lexer_c.token
ae4735db
C
302 with
303 | Lexer_c.Lexical s ->
34e49164 304 failwith ("lexical error " ^s^ "\n =" ^ error_msg ())
ae4735db 305 | Parsing.Parse_error ->
34e49164 306 failwith ("parse error \n = " ^ error_msg ())
ae4735db 307 | Semantic_c.Semantic (s, i) ->
34e49164
C
308 failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
309 | e -> raise e
310
311
312
313
314(*****************************************************************************)
315(* Parsing subelements, useful to debug parser *)
316(*****************************************************************************)
317
318(*
319 * !!!Those function use refs, and are not reentrant !!! so take care.
320 * It use globals defined in Lexer_parser.
321 *)
322
323
ae4735db
C
324(* old:
325 * let parse_gen parsefunc s =
34e49164
C
326 * let lexbuf = Lexing.from_string s in
327 * let result = parsefunc Lexer_c.token lexbuf in
328 * result
329 *)
330
ae4735db 331let parse_gen parsefunc s =
485bce71 332 let toks = tokens_of_string s +> List.filter TH.is_not_comment in
34e49164
C
333
334
335 (* Why use this lexing scheme ? Why not classically give lexer func
ae4735db 336 * to parser ? Because I now keep comments in lexer. Could
34e49164
C
337 * just do a simple wrapper that when comment ask again for a token,
338 * but maybe simpler to use cur_tok technique.
339 *)
340 let all_tokens = ref toks in
341 let cur_tok = ref (List.hd !all_tokens) in
342
ae4735db
C
343 let lexer_function =
344 (fun _ ->
34e49164 345 if TH.is_eof !cur_tok
708f4980 346 then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
34e49164
C
347 else
348 let v = Common.pop2 all_tokens in
349 cur_tok := v;
350 !cur_tok
ae4735db 351 )
34e49164
C
352 in
353 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
354 let result = parsefunc lexer_function lexbuf_fake in
355 result
356
357
358let type_of_string = parse_gen Parser_c.type_name
359let statement_of_string = parse_gen Parser_c.statement
360let expression_of_string = parse_gen Parser_c.expr
361
362(* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
363
364
365
366
367
34e49164 368(*****************************************************************************)
485bce71 369(* Parsing default define macros, usually in a standard.h file *)
34e49164
C
370(*****************************************************************************)
371
ae4735db
C
372let extract_macros2 file =
373 Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
708f4980
C
374 Flag_parsing_c.verbose_lexing := false;
375 let toks = tokens ~profile:false file in
978fd7e5
C
376 let toks = Parsing_hacks.fix_tokens_define toks in
377 Cpp_token_c.extract_macros toks
708f4980 378 )
34e49164 379
ae4735db 380let extract_macros a =
978fd7e5 381 Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
485bce71 382
34e49164
C
383
384(*****************************************************************************)
978fd7e5 385(* Helper for main entry point *)
34e49164
C
386(*****************************************************************************)
387
34e49164
C
388
389(* The use of local refs (remaining_tokens, passed_tokens, ...) makes
390 * possible error recovery. Indeed, they allow to skip some tokens and
391 * still be able to call again the ocamlyacc parser. It is ugly code
392 * because we cant modify ocamllex and ocamlyacc. As we want some
393 * extended lexing tricks, we have to use such refs.
ae4735db 394 *
34e49164
C
395 * Those refs are now also used for my lalr(k) technique. Indeed They
396 * store the futur and previous tokens that were parsed, and so
397 * provide enough context information for powerful lex trick.
ae4735db 398 *
34e49164 399 * - passed_tokens_last_ckp stores the passed tokens since last
485bce71 400 * checkpoint. Used for NotParsedCorrectly and also to build the
34e49164
C
401 * info_item attached to each program_element.
402 * - passed_tokens_clean is used for lookahead, in fact for lookback.
403 * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
404 * contain some comments and so would make pattern matching difficult
ae4735db 405 * in lookahead. Hence this variable. We would like also to get rid
34e49164
C
406 * of cpp instruction because sometimes a cpp instruction is between
407 * two tokens and makes a pattern matching fail. But lookahead also
408 * transform some cpp instruction (in comment) so can't remove them.
ae4735db 409 *
34e49164
C
410 * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
411 * whereas passed_tokens_clean and remaining_tokens_clean does not contain
412 * comment-tokens.
ae4735db 413 *
34e49164 414 * Normally we have:
ae4735db 415 * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
34e49164 416 * after the call to pop2.
ae4735db 417 * toks = (reverse passed_tok) ++ remaining_tokens
34e49164
C
418 * at the and of the lexer_function call.
419 * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
420 * At the end of lexer_function call, cur_tok overlap with passed_tok.
ae4735db 421 *
34e49164 422 * convention: I use "tr" for "tokens refs"
ae4735db 423 *
485bce71
C
424 * I now also need this lexing trick because the lexer return comment
425 * tokens.
34e49164
C
426 *)
427
428type tokens_state = {
429 mutable rest : Parser_c.token list;
430 mutable rest_clean : Parser_c.token list;
431 mutable current : Parser_c.token;
432 (* it's passed since last "checkpoint", not passed from the beginning *)
433 mutable passed : Parser_c.token list;
434 mutable passed_clean : Parser_c.token list;
435}
708f4980 436
ae4735db
C
437let mk_tokens_state toks =
438 {
708f4980
C
439 rest = toks;
440 rest_clean = (toks +> List.filter TH.is_not_comment);
441 current = (List.hd toks);
ae4735db 442 passed = [];
708f4980
C
443 passed_clean = [];
444 }
445
446
447
ae4735db 448let clone_tokens_state tr =
485bce71
C
449 { rest = tr.rest;
450 rest_clean = tr.rest_clean;
451 current = tr.current;
452 passed = tr.passed;
453 passed_clean = tr.passed_clean;
454 }
ae4735db 455let copy_tokens_state ~src ~dst =
485bce71
C
456 dst.rest <- src.rest;
457 dst.rest_clean <- src.rest_clean;
458 dst.current <- src.current;
459 dst.passed <- src.passed;
460 dst.passed_clean <- src.passed_clean;
461 ()
462
708f4980 463(* todo? agglomerate the x##b ? *)
485bce71
C
464let rec filter_noise n xs =
465 match n, xs with
466 | _, [] -> []
467 | 0, xs -> xs
ae4735db 468 | n, x::xs ->
485bce71 469 (match x with
ae4735db 470 | Parser_c.TMacroAttr _ ->
485bce71 471 filter_noise (n-1) xs
ae4735db 472 | _ ->
485bce71
C
473 x::filter_noise (n-1) xs
474 )
475
ae4735db 476let clean_for_lookahead xs =
485bce71
C
477 match xs with
478 | [] -> []
479 | [x] -> [x]
ae4735db 480 | x::xs ->
485bce71
C
481 x::filter_noise 10 xs
482
34e49164 483
485bce71 484
ae4735db 485(* Hacked lex. This function use refs passed by parse_print_error_heuristic
485bce71
C
486 * tr means token refs.
487 *)
ae4735db 488let rec lexer_function ~pass tr = fun lexbuf ->
34e49164 489 match tr.rest with
708f4980 490 | [] -> pr2_err "ALREADY AT END"; tr.current
ae4735db 491 | v::xs ->
34e49164
C
492 tr.rest <- xs;
493 tr.current <- v;
494
495 if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
496
497 if TH.is_comment v
498 then begin
499 tr.passed <- v::tr.passed;
485bce71 500 lexer_function ~pass tr lexbuf
34e49164
C
501 end
502 else begin
503 let x = List.hd tr.rest_clean in
504 tr.rest_clean <- List.tl tr.rest_clean;
b1b2de81 505 assert (x =*= v);
ae4735db 506
34e49164 507 (match v with
113803cf 508
ae4735db 509 (* fix_define1.
113803cf
C
510 *
511 * Why not in parsing_hacks lookahead and do passing like
ae4735db
C
512 * I do for some ifdef directives ? Because here I also need to
513 * generate some tokens sometimes and so I need access to the
113803cf 514 * tr.passed, tr.rest, etc.
485bce71 515 *)
9f8e26f4 516 | Parser_c.TDefine (tok) ->
ae4735db 517 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 518 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 519 then begin
485bce71 520 incr Stat.nDefinePassing;
34e49164 521 pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
0708f913 522 let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
34e49164
C
523 in
524 tr.passed <- v'::tr.passed;
978fd7e5
C
525 tr.rest <- Parsing_hacks.comment_until_defeol tr.rest;
526 tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
485bce71 527 lexer_function ~pass tr lexbuf
34e49164
C
528 end
529 else begin
530 tr.passed <- v::tr.passed;
531 tr.passed_clean <- v::tr.passed_clean;
532 v
533 end
ae4735db
C
534
535 | Parser_c.TInclude (includes, filename, inifdef, info) ->
b1b2de81 536 if not (LP.current_context () =*= LP.InTopLevel) &&
708f4980 537 (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
34e49164 538 then begin
485bce71 539 incr Stat.nIncludePassing;
34e49164 540 pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
0708f913 541 let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
34e49164 542 tr.passed <- v::tr.passed;
485bce71 543 lexer_function ~pass tr lexbuf
34e49164
C
544 end
545 else begin
ae4735db 546 let (v,new_tokens) =
978fd7e5 547 Parsing_hacks.tokens_include (info, includes, filename, inifdef) in
ae4735db 548 let new_tokens_clean =
34e49164
C
549 new_tokens +> List.filter TH.is_not_comment in
550
551 tr.passed <- v::tr.passed;
552 tr.passed_clean <- v::tr.passed_clean;
553 tr.rest <- new_tokens ++ tr.rest;
554 tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
555 v
556 end
ae4735db
C
557
558 | _ ->
559
34e49164
C
560 (* typedef_fix1 *)
561 let v = match v with
ae4735db
C
562 | Parser_c.TIdent (s, ii) ->
563 if
564 LP.is_typedef s &&
485bce71 565 not (!Flag_parsing_c.disable_add_typedef) &&
b1b2de81 566 pass =|= 1
34e49164
C
567 then Parser_c.TypedefIdent (s, ii)
568 else Parser_c.TIdent (s, ii)
569 | x -> x
570 in
ae4735db 571
485bce71
C
572 let v = Parsing_hacks.lookahead ~pass
573 (clean_for_lookahead (v::tr.rest_clean))
574 tr.passed_clean in
34e49164
C
575
576 tr.passed <- v::tr.passed;
ae4735db 577
485bce71 578 (* the lookahead may have changed the status of the token and
34e49164 579 * consider it as a comment, for instance some #include are
485bce71 580 * turned into comments, hence this code. *)
34e49164 581 match v with
485bce71 582 | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
ae4735db 583 | v ->
34e49164
C
584 tr.passed_clean <- v::tr.passed_clean;
585 v
586 )
587 end
588
589
708f4980
C
590let max_pass = 4
591
34e49164 592
ae4735db 593let get_one_elem ~pass tr (file, filelines) =
485bce71
C
594
595 if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
708f4980 596 then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
485bce71
C
597
598 (* normally have to do that only when come from an exception in which
ae4735db 599 * case the dt() may not have been done
485bce71
C
600 * TODO but if was in scoped scope ? have to let only the last scope
601 * so need do a LP.lexer_reset_typedef ();
602 *)
ae4735db 603 LP.enable_typedef();
485bce71
C
604 LP._lexer_hint := (LP.default_hint ());
605 LP.save_typedef_state();
606
607 tr.passed <- [];
608
609 let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
ae4735db
C
610
611 (try
485bce71
C
612 (* -------------------------------------------------- *)
613 (* Call parser *)
614 (* -------------------------------------------------- *)
ae4735db 615 Common.profile_code_exclusif "YACC" (fun () ->
c491d8ee 616 Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
485bce71 617 )
ae4735db 618 with e ->
485bce71
C
619 LP.restore_typedef_state();
620
621 (* must keep here, before the code that adjusts the tr fields *)
622 let line_error = TH.line_of_tok tr.current in
708f4980
C
623
624 let passed_before_error = tr.passed in
625 let current = tr.current in
485bce71 626 (* error recovery, go to next synchro point *)
ae4735db 627 let (passed', rest') =
978fd7e5 628 Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
485bce71
C
629 tr.rest <- rest';
630 tr.passed <- passed';
ae4735db 631
485bce71
C
632 tr.current <- List.hd passed';
633 tr.passed_clean <- []; (* enough ? *)
634 (* with error recovery, rest and rest_clean may not be in sync *)
635 tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
ae4735db
C
636
637
638 let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
639 Right (info_of_bads, line_error,
640 tr.passed, passed_before_error,
708f4980 641 current, e)
485bce71
C
642 )
643
644
645
978fd7e5
C
646(* Macro problem recovery *)
647(* used by the multi-pass error recovery expand-on-demand *)
648(*
ae4735db
C
649val candidate_macros_in_passed:
650 defs: (string, define_def) Hashtbl.t ->
978fd7e5
C
651 Parser_c.token list -> (string * define_def) list
652*)
653
ae4735db 654let candidate_macros_in_passed2 ~defs passed =
978fd7e5
C
655 let res = ref [] in
656 let res2 = ref [] in
657
658 passed +> List.iter (function
659 | Parser_c.TIdent (s,_)
660 (* bugfix: may have to undo some infered things *)
661 | Parser_c.TMacroIterator (s,_)
662 | Parser_c.TypedefIdent (s,_)
ae4735db 663 ->
978fd7e5 664 (match Common.hfind_option s defs with
ae4735db
C
665 | Some def ->
666 if s ==~ Parsing_hacks.regexp_macro
978fd7e5
C
667 then
668 (* pr2 (spf "candidate: %s" s); *)
ae4735db
C
669 Common.push2 (s, def) res
670 else
978fd7e5
C
671 Common.push2 (s, def) res2
672 | None -> ()
673 )
674
675 | _ -> ()
676 );
ae4735db
C
677 if null !res
678 then !res2
978fd7e5
C
679 else !res
680
ae4735db
C
681let candidate_macros_in_passed ~defs b =
682 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 683 candidate_macros_in_passed2 ~defs b)
ae4735db 684
978fd7e5
C
685
686
687
688
689let find_optional_macro_to_expand2 ~defs toks =
690
691 let defs = Common.hash_of_list defs in
692
c491d8ee 693 let toks = toks +> Common.tail_map (function
978fd7e5
C
694
695 (* special cases to undo *)
ae4735db 696 | Parser_c.TMacroIterator (s, ii) ->
978fd7e5
C
697 if Hashtbl.mem defs s
698 then Parser_c.TIdent (s, ii)
699 else Parser_c.TMacroIterator (s, ii)
700
ae4735db 701 | Parser_c.TypedefIdent (s, ii) ->
978fd7e5
C
702 if Hashtbl.mem defs s
703 then Parser_c.TIdent (s, ii)
704 else Parser_c.TypedefIdent (s, ii)
705
706 | x -> x
707 ) in
708
709 let tokens = toks in
710 Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
711
712 (* just calling apply_macro_defs and having a specialized version
ae4735db 713 * of the code in fix_tokens_cpp is not enough as some work such
978fd7e5
C
714 * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
715 * will not get the chance to be run on the new expanded tokens.
ae4735db 716 * Hence even if it's expensive, it's currently better to
978fd7e5
C
717 * just call directly fix_tokens_cpp again here.
718
719 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
720 let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
721 let paren_grouped = TV.mk_parenthised cleaner in
722 Cpp_token_c.apply_macro_defs
723 ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
724 ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
725 defs paren_grouped;
726 (* because the before field is used by apply_macro_defs *)
ae4735db
C
727 tokens2 := TV.rebuild_tokens_extented !tokens2;
728 Parsing_hacks.insert_virtual_positions
978fd7e5
C
729 (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
730 *)
ae4735db
C
731let find_optional_macro_to_expand ~defs a =
732 Common.profile_code "MACRO managment" (fun () ->
978fd7e5 733 find_optional_macro_to_expand2 ~defs a)
ae4735db 734
978fd7e5
C
735
736
737
738
739(*****************************************************************************)
740(* Main entry points *)
741(*****************************************************************************)
742
ae4735db 743let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
744 ref (Hashtbl.create 101)
745
ae4735db 746let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref) =
978fd7e5
C
747 ref (Hashtbl.create 101)
748
749
750(* can not be put in parsing_hack, cos then mutually recursive problem as
751 * we also want to parse the standard.h file.
752 *)
ae4735db 753let init_defs_macros std_h =
978fd7e5
C
754 if not (Common.lfile_exists std_h)
755 then pr2 ("warning: Can't find default macro file: " ^ std_h)
756 else begin
757 pr2 ("init_defs: " ^ std_h);
758 _defs := Common.hash_of_list (extract_macros std_h);
759 end
760
ae4735db 761let init_defs_builtins file_h =
978fd7e5
C
762 if not (Common.lfile_exists file_h)
763 then pr2 ("warning: Can't find macro file: " ^ file_h)
764 else begin
765 pr2 ("init_defs_builtins: " ^ file_h);
ae4735db 766 _defs_builtins :=
978fd7e5
C
767 Common.hash_of_list (extract_macros file_h);
768 end
769
770
771
772type info_item = string * Parser_c.token list
773
774type program2 = toplevel2 list
775 and toplevel2 = Ast_c.toplevel * info_item
776
ae4735db 777let program_of_program2 xs =
978fd7e5
C
778 xs +> List.map fst
779
ae4735db
C
780let with_program2 f program2 =
781 program2
782 +> Common.unzip
783 +> (fun (program, infos) ->
978fd7e5
C
784 f program, infos
785 )
786 +> Common.uncurry Common.zip
787
788
789
790
791
485bce71 792
34e49164 793(* note: as now we go in 2 passes, there is first all the error message of
485bce71 794 * the lexer, and then the error of the parser. It is not anymore
34e49164 795 * interwinded.
ae4735db 796 *
34e49164
C
797 * !!!This function use refs, and is not reentrant !!! so take care.
798 * It use globals defined in Lexer_parser and also the _defs global
ae4735db
C
799 * in parsing_hack.ml.
800 *
485bce71
C
801 * This function uses internally some semi globals in the
802 * tokens_stat record and parsing_stat record.
34e49164
C
803 *)
804
ae4735db 805let parse_print_error_heuristic2 file =
34e49164 806
91eba41f 807 let filelines = Common.cat_array file in
485bce71
C
808 let stat = Parsing_stat.default_stat file in
809
34e49164
C
810 (* -------------------------------------------------- *)
811 (* call lexer and get all the tokens *)
812 (* -------------------------------------------------- *)
ae4735db 813 LP.lexer_reset_typedef();
485bce71 814 Parsing_hacks.ifdef_paren_cnt := 0;
708f4980 815
485bce71 816 let toks_orig = tokens file in
978fd7e5 817 let toks = Parsing_hacks.fix_tokens_define toks_orig in
708f4980 818 let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
34e49164 819
708f4980 820 (* expand macros on demand trick, preparation phase *)
ae4735db
C
821 let macros =
822 Common.profile_code "MACRO mgmt prep 1" (fun () ->
708f4980
C
823 let macros = Hashtbl.copy !_defs in
824 (* include also builtins as some macros may generate some builtins too
825 * like __decl_spec or __stdcall
826 *)
ae4735db 827 !_defs_builtins +> Hashtbl.iter (fun s def ->
708f4980
C
828 Hashtbl.replace macros s def;
829 );
830 macros
831 )
832 in
ae4735db 833 Common.profile_code "MACRO mgmt prep 2" (fun () ->
978fd7e5 834 let local_macros = extract_macros file in
ae4735db 835 local_macros +> List.iter (fun (s, def) ->
708f4980
C
836 Hashtbl.replace macros s def;
837 );
838 );
34e49164 839
708f4980 840 let tr = mk_tokens_state toks in
485bce71
C
841
842 let rec loop tr =
34e49164
C
843
844 (* todo?: I am not sure that it represents current_line, cos maybe
845 * tr.current partipated in the previous parsing phase, so maybe tr.current
846 * is not the first token of the next parsing phase. Same with checkpoint2.
847 * It would be better to record when we have a } or ; in parser.mly,
848 * cos we know that they are the last symbols of external_declaration2.
485bce71
C
849 *
850 * bugfix: may not be equal to 'file' as after macro expansions we can
851 * start to parse a new entity from the body of a macro, for instance
852 * when parsing a define_machine() body, cf standard.h
34e49164
C
853 *)
854 let checkpoint = TH.line_of_tok tr.current in
485bce71 855 let checkpoint_file = TH.file_of_tok tr.current in
34e49164 856
485bce71 857 (* call the parser *)
ae4735db
C
858 let elem =
859 let pass1 =
860 Common.profile_code "Parsing: 1st pass" (fun () ->
708f4980
C
861 get_one_elem ~pass:1 tr (file, filelines)
862 ) in
485bce71
C
863 match pass1 with
864 | Left e -> Left e
ae4735db 865 | Right (info,line_err, passed, passed_before_error, cur, exn) ->
708f4980
C
866 if !Flag_parsing_c.disable_multi_pass
867 then pass1
485bce71 868 else begin
ae4735db 869 Common.profile_code "Parsing: multi pass" (fun () ->
708f4980
C
870
871 pr2_err "parsing pass2: try again";
872 let toks = List.rev passed ++ tr.rest in
873 let new_tr = mk_tokens_state toks in
874 copy_tokens_state ~src:new_tr ~dst:tr;
875 let passx = get_one_elem ~pass:2 tr (file, filelines) in
876
877 (match passx with
878 | Left e -> passx
ae4735db
C
879 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
880 let candidates =
881 candidate_macros_in_passed ~defs:macros passed
708f4980 882 in
ae4735db 883
978fd7e5 884
708f4980
C
885 if is_define_passed passed || null candidates
886 then passx
887 else begin
888 (* todo factorize code *)
889
890 pr2_err "parsing pass3: try again";
891 let toks = List.rev passed ++ tr.rest in
ae4735db 892 let toks' =
708f4980
C
893 find_optional_macro_to_expand ~defs:candidates toks in
894 let new_tr = mk_tokens_state toks' in
895 copy_tokens_state ~src:new_tr ~dst:tr;
896 let passx = get_one_elem ~pass:3 tr (file, filelines) in
897
898 (match passx with
899 | Left e -> passx
ae4735db 900 | Right (info,line_err,passed,passed_before_error,cur,exn) ->
708f4980
C
901 pr2_err "parsing pass4: try again";
902
ae4735db
C
903 let candidates =
904 candidate_macros_in_passed
905 ~defs:macros passed
978fd7e5 906 in
708f4980
C
907
908 let toks = List.rev passed ++ tr.rest in
ae4735db 909 let toks' =
708f4980
C
910 find_optional_macro_to_expand ~defs:candidates toks in
911 let new_tr = mk_tokens_state toks' in
912 copy_tokens_state ~src:new_tr ~dst:tr;
913 let passx = get_one_elem ~pass:4 tr (file, filelines) in
914 passx
915 )
916 end
917 )
918 )
485bce71
C
919 end
920 in
921
922
923 (* again not sure if checkpoint2 corresponds to end of bad region *)
924 let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
925 let checkpoint2_file = TH.file_of_tok tr.current in
926
ae4735db 927 let diffline =
b1b2de81 928 if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
ae4735db 929 then (checkpoint2 - checkpoint)
485bce71
C
930 else 0
931 (* TODO? so if error come in middle of something ? where the
932 * start token was from original file but synchro found in body
933 * of macro ? then can have wrong number of lines stat.
934 * Maybe simpler just to look at tr.passed and count
935 * the lines in the token from the correct file ?
936 *)
34e49164 937 in
ae4735db 938 let info = mk_info_item file (List.rev tr.passed) in
34e49164 939
485bce71 940 (* some stat updates *)
ae4735db 941 stat.Stat.commentized <-
485bce71
C
942 stat.Stat.commentized + count_lines_commentized (snd info);
943
ae4735db 944 let elem =
485bce71 945 match elem with
ae4735db 946 | Left e ->
91eba41f
C
947 stat.Stat.correct <- stat.Stat.correct + diffline;
948 e
ae4735db
C
949 | Right (info_of_bads, line_error, toks_of_bads,
950 _passed_before_error, cur, exn) ->
708f4980
C
951
952 let was_define = is_define_passed tr.passed in
ae4735db 953
708f4980
C
954 if was_define && !Flag_parsing_c.filter_msg_define_error
955 then ()
956 else begin
957
958 (match exn with
ae4735db 959 | Lexer_c.Lexical _
708f4980
C
960 | Parsing.Parse_error
961 | Semantic_c.Semantic _ -> ()
962 | e -> raise e
963 );
964
965 if !Flag_parsing_c.show_parsing_error
ae4735db 966 then begin
708f4980
C
967 (match exn with
968 (* Lexical is not anymore launched I think *)
ae4735db 969 | Lexer_c.Lexical s ->
708f4980 970 pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
ae4735db 971 | Parsing.Parse_error ->
708f4980 972 pr2 ("parse error \n = " ^ error_msg_tok cur)
ae4735db 973 | Semantic_c.Semantic (s, i) ->
708f4980
C
974 pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
975 | e -> raise Impossible
976 );
977 (* bugfix: *)
ae4735db 978 if (checkpoint_file =$= checkpoint2_file) &&
708f4980
C
979 checkpoint_file =$= file
980 then print_bad line_error (checkpoint, checkpoint2) filelines
981 else pr2 "PB: bad: but on tokens not from original file"
982 end;
983
ae4735db
C
984
985 let pbline =
986 toks_of_bads
708f4980 987 +> Common.filter (TH.is_same_line_or_close line_error)
ae4735db 988 +> Common.filter TH.is_ident_like
708f4980 989 in
ae4735db 990 let error_info =
708f4980
C
991 (pbline +> List.map TH.str_of_tok), line_error
992 in
ae4735db 993 stat.Stat.problematic_lines <-
708f4980
C
994 error_info::stat.Stat.problematic_lines;
995
996 end;
997
91eba41f
C
998 if was_define && !Flag_parsing_c.filter_define_error
999 then stat.Stat.correct <- stat.Stat.correct + diffline
1000 else stat.Stat.bad <- stat.Stat.bad + diffline;
1001
485bce71
C
1002 Ast_c.NotParsedCorrectly info_of_bads
1003 in
34e49164
C
1004
1005 (match elem with
1006 | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
485bce71 1007 | xs -> (xs, info):: loop tr (* recurse *)
34e49164
C
1008 )
1009 in
485bce71 1010 let v = loop tr in
978fd7e5 1011 let v = with_program2 Parsing_consistency_c.consistency_checking v in
34e49164
C
1012 (v, stat)
1013
1014
ae4735db 1015let time_total_parsing a =
485bce71
C
1016 Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1017
ae4735db 1018let parse_print_error_heuristic a =
485bce71
C
1019 Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1020
34e49164
C
1021
1022(* alias *)
1023let parse_c_and_cpp a = parse_print_error_heuristic a
1024
1025(*****************************************************************************)
1026(* Same but faster cos memoize stuff *)
1027(*****************************************************************************)
ae4735db
C
1028let parse_cache file =
1029 if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1030 else
485bce71 1031 let _ = pr2 "TOFIX" in
ae4735db 1032 let need_no_changed_files =
34e49164 1033 (* should use Sys.argv.(0), would be safer. *)
485bce71
C
1034
1035 [
1036 (* TOFIX
1037 Config.path ^ "/parsing_c/c_parser.cma";
ae4735db 1038 (* we may also depend now on the semantic patch because
485bce71 1039 the SP may use macro and so we will disable some of the
ae4735db 1040 macro expansions from standard.h.
485bce71
C
1041 *)
1042 !Config.std_h;
1043 *)
ae4735db 1044 ]
34e49164 1045 in
ae4735db 1046 let need_no_changed_variables =
34e49164 1047 (* could add some of the flags of flag_parsing_c.ml *)
ae4735db 1048 []
34e49164 1049 in
ae4735db
C
1050 Common.cache_computation_robust
1051 file ".ast_raw"
1052 (need_no_changed_files, need_no_changed_variables) ".depend_raw"
34e49164
C
1053 (fun () -> parse_print_error_heuristic file)
1054
1055
1056
1057(*****************************************************************************)
485bce71 1058(* Some special cases *)
34e49164
C
1059(*****************************************************************************)
1060
485bce71 1061let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
708f4980
C
1062 let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1063 Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1064 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1065 program +> Common.find_some (fun (e,_) ->
485bce71
C
1066 match e with
1067 | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1068 | _ -> None
1069 )
1070
1071let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
708f4980
C
1072 let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1073 Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1074 let program = parse_c_and_cpp tmpfile +> fst in
ae4735db 1075 program +> Common.find_some (fun (e,_) ->
485bce71 1076 match e with
ae4735db 1077 | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
485bce71 1078 (match compound with
ae4735db 1079 | [Ast_c.StmtElem st] ->
708f4980
C
1080 (match Ast_c.unwrap_st st with
1081 | Ast_c.ExprStatement (Some e) -> Some e
1082 | _ -> None
1083 )
485bce71
C
1084 | _ -> None
1085 )
1086 | _ -> None
1087 )