parsing_c/parse_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *)
  15
  16 open Common
  17
  18 module TH = Token_helpers
  19 module LP = Lexer_parser
  20
  21 module Stat = Parsing_stat
  22
  23 (*****************************************************************************)
  24 (* Wrappers *)
  25 (*****************************************************************************)
  26 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
  27
  28 (*****************************************************************************)
  29 (* Helpers *)
  30 (*****************************************************************************)
  31
  32 let lexbuf_to_strpos lexbuf     =
  33   (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
  34
  35 let token_to_strpos tok =
  36   (TH.str_of_tok tok, TH.pos_of_tok tok)
  37
  38
  39 let mk_info_item2 filename toks =
  40   let buf = Buffer.create 100 in
  41   let s =
  42     (* old: get_slice_file filename (line1, line2) *)
  43     begin
  44       toks +> List.iter (fun tok ->
  45         match TH.pinfo_of_tok tok with
  46         | Ast_c.OriginTok _ ->
  47             Buffer.add_string buf (TH.str_of_tok tok)
  48         | Ast_c.AbstractLineTok _ ->
  49             raise (Impossible 79)
  50         | _ -> ()
  51       );
  52       Buffer.contents buf
  53     end
  54   in
  55   (s, toks)
  56
  57 let mk_info_item a b =
  58   Common.profile_code "C parsing.mk_info_item"
  59     (fun () -> mk_info_item2 a b)
  60
  61
  62 let info_same_line line xs =
  63   xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
  64
  65
  66 (* move in cpp_token_c ? *)
  67 let is_define_passed passed =
  68   let xs = passed +> List.rev +> List.filter TH.is_not_comment in
  69   if List.length xs >= 2
  70   then
  71     (match Common.head_middle_tail xs with
  72     | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
  73         true
  74     | _ -> false
  75     )
  76   else begin
  77     pr2_err "WEIRD: length list of error recovery tokens < 2 ";
  78     false
  79   end
  80
  81
  82 (*****************************************************************************)
  83 (* Error diagnostic  *)
  84 (*****************************************************************************)
  85
  86 let error_msg_tok tok =
  87   let file = TH.file_of_tok tok in
  88   if !Flag_parsing_c.verbose_parsing
  89   then Common.error_message file (token_to_strpos tok)
  90   else ("error in " ^ file  ^ "; set verbose_parsing for more info")
  91
  92
  93 let print_bad line_error (start_line, end_line) filelines  =
  94   begin
  95     pr2 ("badcount: " ^ i_to_s (end_line - start_line));
  96
  97     for i = start_line to end_line do
  98       let line = filelines.(i) in
  99
 100       if i =|= line_error
 101       then  pr2 ("BAD:!!!!!" ^ " " ^ line)
 102       else  pr2 ("bad:" ^ " " ^      line)
 103     done
 104   end
 105
 106
 107 (*****************************************************************************)
 108 (* Stats on what was passed/commentized  *)
 109 (*****************************************************************************)
 110
 111 let commentized xs = xs +> Common.tail_map_filter (function
 112   | Parser_c.TCommentCpp (cppkind, ii) ->
 113       let s = Ast_c.str_of_info ii in
 114       let legal_passing =
 115         match !Flag_parsing_c.filter_passed_level with
 116         | 0 -> false
 117         | 1 ->
 118             List.mem cppkind [Token_c.CppAttr]
 119             ||
 120             (s =~ "__.*")
 121         | 2 ->
 122             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
 123             ||
 124             (s =~ "__.*")
 125         | 3 ->
 126             (match cppkind with
 127               Token_c.CppAttr | Token_c.CppPassingNormal
 128             | Token_c.CppDirective | Token_c.CppIfDirective _ -> true
 129             | _ -> false)
 130             ||
 131             (s =~ "__.*")
 132         | 4 ->
 133             List.mem cppkind
 134               [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
 135             ||
 136             (s =~ "__.*")
 137
 138
 139         | 5 ->
 140             (match cppkind with
 141               Token_c.CppAttr | Token_c.CppPassingNormal
 142             | Token_c.CppDirective | Token_c.CppIfDirective _
 143             | Token_c.CppMacro -> true
 144             | _ -> false)
 145             ||
 146             (s =~ "__.*")
 147
 148
 149
 150
 151         | _ -> failwith "not valid level passing number"
 152       in
 153       if legal_passing then None else Some (ii.Ast_c.pinfo)
 154
 155         (*
 156         | Ast_c.CppOther ->
 157             (match s with
 158             | s when s =~ "KERN_.*" -> None
 159             | s when s =~ "__.*" -> None
 160             | _ ->
 161                 Some (ii.Ast_c.pinfo)
 162             )
 163         *)
 164
 165
 166   | Parser_c.TCommentMisc ii
 167   | Parser_c.TAction ii
 168     ->
 169       Some (ii.Ast_c.pinfo)
 170   | _ ->
 171       None
 172  )
 173
 174 let count_lines_commentized xs =
 175   let line = ref (-1) in
 176   let count = ref 0 in
 177   begin
 178     commentized xs +>
 179     List.iter
 180       (function
 181           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 182             let newline = pinfo.Common.line in
 183             if newline <> !line
 184             then begin
 185               line := newline;
 186               incr count
 187             end
 188         | _ -> ());
 189     !count
 190   end
 191
 192
 193
 194 let print_commentized xs =
 195   let line = ref (-1) in
 196   begin
 197     let ys = commentized xs in
 198     ys +>
 199     List.iter
 200       (function
 201           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 202             let newline = pinfo.Common.line in
 203             let s = pinfo.Common.str in
 204             let s = Str.global_substitute
 205                 (Str.regexp "\n") (fun s -> "") s
 206             in
 207             if newline =|= !line
 208             then prerr_string (s ^ " ")
 209             else begin
 210               if !line =|= -1
 211               then pr2_no_nl "passed:"
 212               else pr2_no_nl "\npassed:";
 213               line := newline;
 214               pr2_no_nl (s ^ " ");
 215             end
 216         | _ -> ());
 217     if not (null ys) then pr2 "";
 218   end
 219
 220
 221
 222
 223 (*****************************************************************************)
 224 (* Lexing only *)
 225 (*****************************************************************************)
 226
 227 (* called by parse_print_error_heuristic *)
 228 let tokens2 file =
 229  let table     = Common.full_charpos_to_pos_large file in
 230
 231  Common.with_open_infile file (fun chan ->
 232   let lexbuf = Lexing.from_channel chan in
 233   try
 234     let rec tokens_aux acc =
 235       let tok = Lexer_c.token lexbuf in
 236       (* fill in the line and col information *)
 237       let tok = tok +> TH.visitor_info_of_tok (fun ii ->
 238         { ii with Ast_c.pinfo=
 239           (* could assert pinfo.filename = file ? *)
 240           match Ast_c.pinfo_of_info ii with
 241             Ast_c.OriginTok pi ->
 242               Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
 243           | Ast_c.ExpandedTok (pi,vpi) ->
 244               Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
 245           | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
 246           | Ast_c.AbstractLineTok pi -> failwith "should not occur"
 247       })
 248       in
 249
 250       if TH.is_eof tok
 251       then List.rev (tok::acc)
 252       else tokens_aux (tok::acc)
 253     in
 254     tokens_aux []
 255   with
 256     | Lexer_c.Lexical s ->
 257         failwith ("lexical error " ^ s ^ "\n =" ^
 258                   (Common.error_message file (lexbuf_to_strpos lexbuf)))
 259     | e -> raise e
 260  )
 261
 262 let time_lexing ?(profile=true) a =
 263   if profile
 264   then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
 265   else tokens2 a
 266 let tokens ?profile a =
 267   Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
 268
 269
 270 let tokens_of_string string =
 271   let lexbuf = Lexing.from_string string in
 272   try
 273     let rec tokens_s_aux () =
 274       let tok = Lexer_c.token lexbuf in
 275       if TH.is_eof tok
 276       then [tok]
 277       else tok::(tokens_s_aux ())
 278     in
 279     tokens_s_aux ()
 280   with
 281     | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
 282     | e -> raise e
 283
 284
 285 (*****************************************************************************)
 286 (* Parsing, but very basic, no more used *)
 287 (*****************************************************************************)
 288
 289 (*
 290  * !!!Those function use refs, and are not reentrant !!! so take care.
 291  * It use globals defined in Lexer_parser.
 292  *
 293  * update: because now lexer return comments tokens, those functions
 294  * may not work anymore.
 295  *)
 296
 297 let parse file =
 298   let lexbuf = Lexing.from_channel (open_in file) in
 299   let result = Parser_c.main Lexer_c.token lexbuf in
 300   result
 301
 302
 303 let parse_print_error file =
 304   let chan = (open_in file) in
 305   let lexbuf = Lexing.from_channel chan in
 306
 307   let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
 308   try
 309     lexbuf +> Parser_c.main Lexer_c.token
 310   with
 311   | Lexer_c.Lexical s ->
 312       failwith ("lexical error " ^s^ "\n =" ^  error_msg ())
 313   | Parsing.Parse_error ->
 314       failwith ("parse error \n = " ^ error_msg ())
 315   | Semantic_c.Semantic (s, i) ->
 316       failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
 317   | e -> raise e
 318
 319
 320
 321
 322 (*****************************************************************************)
 323 (* Parsing subelements, useful to debug parser *)
 324 (*****************************************************************************)
 325
 326 (*
 327  * !!!Those function use refs, and are not reentrant !!! so take care.
 328  * It use globals defined in Lexer_parser.
 329  *)
 330
 331
 332 (* old:
 333  *   let parse_gen parsefunc s =
 334  *     let lexbuf = Lexing.from_string s in
 335  *     let result = parsefunc Lexer_c.token lexbuf in
 336  *     result
 337  *)
 338
 339 let parse_gen parsefunc s =
 340   let toks = tokens_of_string s +> List.filter TH.is_not_comment in
 341
 342
 343   (* Why use this lexing scheme ? Why not classically give lexer func
 344    * to parser ? Because I now keep comments in lexer. Could
 345    * just do a simple wrapper that when comment ask again for a token,
 346    * but maybe simpler to use cur_tok technique.
 347    *)
 348   let all_tokens = ref toks in
 349   let cur_tok    = ref (List.hd !all_tokens) in
 350
 351   let lexer_function =
 352     (fun _ ->
 353       if TH.is_eof !cur_tok
 354       then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
 355       else
 356         let v = Common.pop2 all_tokens in
 357         cur_tok := v;
 358         !cur_tok
 359     )
 360   in
 361   let lexbuf_fake = Lexing.from_function (fun buf n -> raise (Impossible 80)) in
 362   let result = parsefunc lexer_function lexbuf_fake in
 363   result
 364
 365
 366 let type_of_string       = parse_gen Parser_c.type_name
 367 let statement_of_string  = parse_gen Parser_c.statement
 368 let expression_of_string = parse_gen Parser_c.expr
 369
 370 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
 371
 372
 373
 374
 375
 376 (*****************************************************************************)
 377 (* Parsing default define macros, usually in a standard.h file *)
 378 (*****************************************************************************)
 379
 380 let extract_macros2 file =
 381   Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
 382     Flag_parsing_c.verbose_lexing := false;
 383     let toks = tokens ~profile:false file in
 384     let toks = Parsing_hacks.fix_tokens_define toks in
 385     Cpp_token_c.extract_macros toks
 386   )
 387
 388 let extract_macros a =
 389   Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
 390
 391
 392 (*****************************************************************************)
 393 (* Helper for main entry point *)
 394 (*****************************************************************************)
 395
 396
 397 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
 398  * possible error recovery. Indeed, they allow to skip some tokens and
 399  * still be able to call again the ocamlyacc parser. It is ugly code
 400  * because we cant modify ocamllex and ocamlyacc. As we want some
 401  * extended lexing tricks, we have to use such refs.
 402  *
 403  * Those refs are now also used for my lalr(k) technique. Indeed They
 404  * store the futur and previous tokens that were parsed, and so
 405  * provide enough context information for powerful lex trick.
 406  *
 407  * - passed_tokens_last_ckp stores the passed tokens since last
 408  *   checkpoint. Used for NotParsedCorrectly and also to build the
 409  *   info_item attached to each program_element.
 410  * - passed_tokens_clean is used for lookahead, in fact for lookback.
 411  * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
 412  *   contain some comments and so would make pattern matching difficult
 413  *   in lookahead. Hence this variable. We would like also to get rid
 414  *   of cpp instruction because sometimes a cpp instruction is between
 415  *   two tokens and makes a pattern matching fail. But lookahead also
 416  *   transform some cpp instruction (in comment) so can't remove them.
 417  *
 418  * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
 419  * whereas passed_tokens_clean and remaining_tokens_clean does not contain
 420  * comment-tokens.
 421  *
 422  * Normally we have:
 423  * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
 424  *    after the call to pop2.
 425  * toks = (reverse passed_tok) ++ remaining_tokens
 426  *     at the and of the lexer_function call.
 427  * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
 428  * At the end of lexer_function call,  cur_tok  overlap  with passed_tok.
 429  *
 430  * convention: I use "tr"  for "tokens refs"
 431  *
 432  * I now also need this lexing trick because the lexer return comment
 433  * tokens.
 434  *)
 435
 436 type tokens_state = {
 437   mutable rest :         Parser_c.token list;
 438   mutable rest_clean :   Parser_c.token list;
 439   mutable current :      Parser_c.token;
 440   (* it's passed since last "checkpoint", not passed from the beginning *)
 441   mutable passed :       Parser_c.token list;
 442   mutable passed_clean : Parser_c.token list;
 443 }
 444
 445 let mk_tokens_state toks =
 446   {
 447     rest       = toks;
 448     rest_clean = (toks +> List.filter TH.is_not_comment);
 449     current    = (List.hd toks);
 450     passed = [];
 451     passed_clean = [];
 452   }
 453
 454
 455
 456 let clone_tokens_state tr =
 457   { rest = tr.rest;
 458     rest_clean = tr.rest_clean;
 459     current = tr.current;
 460     passed = tr.passed;
 461     passed_clean = tr.passed_clean;
 462   }
 463 let copy_tokens_state ~src ~dst =
 464   dst.rest <- src.rest;
 465   dst.rest_clean <- src.rest_clean;
 466   dst.current <- src.current;
 467   dst.passed <- src.passed;
 468   dst.passed_clean <-  src.passed_clean;
 469   ()
 470
 471 (* todo? agglomerate the x##b ? *)
 472 let rec filter_noise n xs =
 473   match n, xs with
 474   | _, [] -> []
 475   | 0, xs -> xs
 476   | n, x::xs ->
 477       (match x with
 478       | Parser_c.TMacroAttr _ ->
 479           filter_noise (n-1) xs
 480       | _ ->
 481           x::filter_noise (n-1) xs
 482       )
 483
 484 let clean_for_lookahead xs =
 485   match xs with
 486   | [] -> []
 487   | [x] -> [x]
 488   | x::xs ->
 489       x::filter_noise 10 xs
 490
 491
 492
 493 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
 494  * tr means token refs.
 495  *)
 496 let rec lexer_function ~pass tr = fun lexbuf ->
 497   match tr.rest with
 498   | [] -> pr2_err "ALREADY AT END"; tr.current
 499   | v::xs ->
 500     tr.rest <- xs;
 501     tr.current <- v;
 502
 503     if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
 504
 505     if TH.is_comment v
 506     then begin
 507       tr.passed <- v::tr.passed;
 508       lexer_function ~pass tr lexbuf
 509     end
 510     else begin
 511       let x = List.hd tr.rest_clean  in
 512       tr.rest_clean <- List.tl tr.rest_clean;
 513       assert (x =*= v);
 514
 515       (match v with
 516
 517       (* fix_define1.
 518        *
 519        * Why not in parsing_hacks lookahead and do passing like
 520        * I do for some ifdef directives ? Because here I also need to
 521        * generate some tokens sometimes and so I need access to the
 522        * tr.passed, tr.rest, etc.
 523        *)
 524       | Parser_c.TDefine (tok) ->
 525           if not (LP.current_context () =*= LP.InTopLevel) &&
 526             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 527           then begin
 528             incr Stat.nDefinePassing;
 529             pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
 530             let v' =
 531               Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 532             in
 533             tr.passed <- v'::tr.passed;
 534             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 535             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 536             lexer_function ~pass tr lexbuf
 537           end
 538           else begin
 539             tr.passed <- v::tr.passed;
 540             tr.passed_clean <- v::tr.passed_clean;
 541             v
 542           end
 543
 544       | Parser_c.TUndef (tok) ->
 545           if not (LP.current_context () =*= LP.InTopLevel) &&
 546             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 547           then begin
 548             incr Stat.nUndefPassing;
 549             pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
 550             let v' =
 551               Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 552             in
 553             tr.passed <- v'::tr.passed;
 554             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 555             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 556             lexer_function ~pass tr lexbuf
 557           end
 558           else begin
 559             tr.passed <- v::tr.passed;
 560             tr.passed_clean <- v::tr.passed_clean;
 561             v
 562           end
 563
 564       | Parser_c.TInclude (includes, filename, inifdef, info) ->
 565           if not (LP.current_context () =*= LP.InTopLevel)  &&
 566             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 567           then begin
 568             incr Stat.nIncludePassing;
 569             pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
 570             let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
 571             tr.passed <- v::tr.passed;
 572             lexer_function ~pass tr lexbuf
 573           end
 574           else begin
 575             let (v,new_tokens) =
 576               Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
 577             let new_tokens_clean =
 578               new_tokens +> List.filter TH.is_not_comment  in
 579
 580             tr.passed <- v::tr.passed;
 581             tr.passed_clean <- v::tr.passed_clean;
 582             tr.rest <- new_tokens ++ tr.rest;
 583             tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
 584             v
 585           end
 586
 587       | _ ->
 588
 589           (* typedef_fix1 *)
 590           let v = match v with
 591             | Parser_c.TIdent (s, ii) ->
 592                 if
 593                   LP.is_typedef s &&
 594                     not (!Flag_parsing_c.disable_add_typedef) &&
 595                     pass =|= 1
 596                 then Parser_c.TypedefIdent (s, ii)
 597                 else Parser_c.TIdent (s, ii)
 598             | x -> x
 599           in
 600
 601           let v = Parsing_hacks.lookahead ~pass
 602             (clean_for_lookahead (v::tr.rest_clean))
 603             tr.passed_clean in
 604
 605           tr.passed <- v::tr.passed;
 606
 607           (* the lookahead may have changed the status of the token and
 608            * consider it as a comment, for instance some #include are
 609            * turned into comments, hence this code. *)
 610           match v with
 611           | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
 612           | v ->
 613               tr.passed_clean <- v::tr.passed_clean;
 614               v
 615       )
 616     end
 617
 618
 619 let max_pass = 4
 620
 621 let get_one_elem ~pass tr (file, filelines) =
 622
 623   if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
 624   then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
 625
 626   (* normally have to do that only when come from an exception in which
 627    * case the dt() may not have been done
 628    * TODO but if was in scoped scope ? have to let only the last scope
 629    * so need do a LP.lexer_reset_typedef ();
 630    *)
 631   LP.enable_typedef();
 632   LP._lexer_hint := (LP.default_hint ());
 633   LP.save_typedef_state();
 634
 635   tr.passed <- [];
 636
 637   let lexbuf_fake = Lexing.from_function (fun buf n -> raise (Impossible 81)) in
 638
 639   (try
 640       (* -------------------------------------------------- *)
 641       (* Call parser *)
 642       (* -------------------------------------------------- *)
 643       Common.profile_code_exclusif "YACC" (fun () ->
 644         Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
 645       )
 646     with e ->
 647       LP.restore_typedef_state();
 648
 649       (* must keep here, before the code that adjusts the tr fields *)
 650       let line_error = TH.line_of_tok tr.current in
 651
 652       let passed_before_error = tr.passed in
 653       let current = tr.current in
 654       (*  error recovery, go to next synchro point *)
 655       let (passed', rest') =
 656         Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
 657       tr.rest <- rest';
 658       tr.passed <- passed';
 659
 660       tr.current <- List.hd passed';
 661       tr.passed_clean <- [];           (* enough ? *)
 662       (* with error recovery, rest and rest_clean may not be in sync *)
 663       tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
 664
 665
 666       let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
 667       Right (info_of_bads,  line_error,
 668             tr.passed, passed_before_error,
 669             current, e)
 670   )
 671
 672
 673
 674 (* Macro problem recovery *)
 675 (* used by the multi-pass error recovery expand-on-demand *)
 676 (*
 677 val candidate_macros_in_passed:
 678   defs: (string, define_def) Hashtbl.t ->
 679   Parser_c.token list -> (string * define_def) list
 680 *)
 681
 682 let candidate_macros_in_passed2 ~defs passed  =
 683   let res = ref [] in
 684   let res2 = ref [] in
 685
 686   passed +> List.iter (function
 687   | Parser_c.TIdent (s,_)
 688    (* bugfix: may have to undo some infered things *)
 689   | Parser_c.TMacroIterator (s,_)
 690   | Parser_c.TypedefIdent (s,_)
 691     ->
 692       (match Common.hfind_option s defs with
 693       | Some def ->
 694           if s ==~ Parsing_hacks.regexp_macro
 695           then
 696             (* pr2 (spf "candidate: %s" s); *)
 697             Common.push2 (s, def) res
 698           else
 699             Common.push2 (s, def) res2
 700         | None -> ()
 701         )
 702
 703   | _ -> ()
 704   );
 705   if null !res
 706   then !res2
 707   else !res
 708
 709 let candidate_macros_in_passed ~defs b =
 710   Common.profile_code "MACRO managment" (fun () ->
 711     candidate_macros_in_passed2 ~defs b)
 712
 713
 714
 715
 716
 717 let find_optional_macro_to_expand2 ~defs toks =
 718
 719   let defs = Common.hash_of_list defs in
 720
 721   let toks = toks +> Common.tail_map (function
 722
 723     (* special cases to undo *)
 724     | Parser_c.TMacroIterator (s, ii) ->
 725         if Hashtbl.mem defs s
 726         then Parser_c.TIdent (s, ii)
 727         else Parser_c.TMacroIterator (s, ii)
 728
 729     | Parser_c.TypedefIdent (s, ii) ->
 730         if Hashtbl.mem defs s
 731         then Parser_c.TIdent (s, ii)
 732         else Parser_c.TypedefIdent (s, ii)
 733
 734     | x -> x
 735   ) in
 736
 737   let tokens = toks in
 738   Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
 739
 740   (* just calling apply_macro_defs and having a specialized version
 741    * of the code in fix_tokens_cpp is not enough as some work such
 742    * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
 743    * will not get the chance to be run on the new expanded tokens.
 744    * Hence even if it's expensive, it's currently better to
 745    * just call directly fix_tokens_cpp again here.
 746
 747   let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
 748   let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
 749   let paren_grouped = TV.mk_parenthised  cleaner in
 750   Cpp_token_c.apply_macro_defs
 751     ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
 752     ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
 753     defs paren_grouped;
 754   (* because the before field is used by apply_macro_defs *)
 755   tokens2 := TV.rebuild_tokens_extented !tokens2;
 756   Parsing_hacks.insert_virtual_positions
 757     (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
 758   *)
 759 let find_optional_macro_to_expand ~defs a =
 760     Common.profile_code "MACRO managment" (fun () ->
 761       find_optional_macro_to_expand2 ~defs a)
 762
 763
 764
 765
 766
 767 (*****************************************************************************)
 768 (* Main entry points *)
 769 (*****************************************************************************)
 770
 771 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 772   ref (Hashtbl.create 101)
 773
 774 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 775   ref (Hashtbl.create 101)
 776
 777
 778 (* can not be put in parsing_hack, cos then mutually recursive problem as
 779  * we also want to parse the standard.h file.
 780  *)
 781 let init_defs_macros std_h =
 782   if not (Common.lfile_exists std_h)
 783   then pr2 ("warning: Can't find default macro file: " ^ std_h)
 784   else begin
 785     pr2 ("init_defs: " ^ std_h);
 786     _defs := Common.hash_of_list (extract_macros std_h);
 787   end
 788
 789 let init_defs_builtins file_h =
 790   if not (Common.lfile_exists file_h)
 791   then pr2 ("warning: Can't find macro file: " ^ file_h)
 792   else begin
 793     pr2 ("init_defs_builtins: " ^ file_h);
 794     _defs_builtins :=
 795       Common.hash_of_list (extract_macros file_h);
 796   end
 797
 798
 799
 800 type info_item =  string * Parser_c.token list
 801
 802 type program2 = toplevel2 list
 803    and extended_program2 = toplevel2 list *
 804       (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
 805       (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
 806    and toplevel2 = Ast_c.toplevel * info_item
 807
 808 let program_of_program2 xs =
 809   xs +> List.map fst
 810
 811 let with_program2 f program2 =
 812   program2
 813   +> Common.unzip
 814   +> (fun (program, infos) ->
 815     f program, infos
 816   )
 817   +> Common.uncurry Common.zip
 818
 819
 820
 821
 822
 823
 824 (* note: as now we go in 2 passes, there is first all the error message of
 825  * the lexer, and then the error of the parser. It is not anymore
 826  * interwinded.
 827  *
 828  * !!!This function use refs, and is not reentrant !!! so take care.
 829  * It use globals defined in Lexer_parser and also the _defs global
 830  * in parsing_hack.ml.
 831  *
 832  * This function uses internally some semi globals in the
 833  * tokens_stat record and parsing_stat record.
 834  *)
 835
 836 let parse_print_error_heuristic2 saved_typedefs saved_macros file =
 837
 838   let filelines = Common.cat_array file in
 839   let stat = Parsing_stat.default_stat file in
 840
 841   (* -------------------------------------------------- *)
 842   (* call lexer and get all the tokens *)
 843   (* -------------------------------------------------- *)
 844
 845   LP.lexer_reset_typedef saved_typedefs;
 846   Parsing_hacks.ifdef_paren_cnt := 0;
 847
 848   let toks_orig = tokens file in
 849   let toks = Parsing_hacks.fix_tokens_define toks_orig in
 850   let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
 851
 852   (* expand macros on demand trick, preparation phase *)
 853   let macros =
 854     Common.profile_code "MACRO mgmt prep 1" (fun () ->
 855       let macros =
 856         match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
 857       (* include also builtins as some macros may generate some builtins too
 858        * like __decl_spec or __stdcall
 859        *)
 860       !_defs_builtins +> Hashtbl.iter (fun s def ->
 861         Hashtbl.replace macros   s def;
 862       );
 863       macros
 864     )
 865   in
 866   Common.profile_code "MACRO mgmt prep 2" (fun () ->
 867     let local_macros = extract_macros file in
 868     local_macros +> List.iter (fun (s, def) ->
 869       Hashtbl.replace macros   s def;
 870     );
 871   );
 872
 873   let tr = mk_tokens_state toks in
 874
 875   let rec loop tr =
 876
 877     (* todo?: I am not sure that it represents current_line, cos maybe
 878      * tr.current partipated in the previous parsing phase, so maybe tr.current
 879      * is not the first token of the next parsing phase. Same with checkpoint2.
 880      * It would be better to record when we have a } or ; in parser.mly,
 881      *  cos we know that they are the last symbols of external_declaration2.
 882      *
 883      * bugfix: may not be equal to 'file' as after macro expansions we can
 884      * start to parse a new entity from the body of a macro, for instance
 885      * when parsing a define_machine() body, cf standard.h
 886      *)
 887     let checkpoint = TH.line_of_tok tr.current in
 888     let checkpoint_file = TH.file_of_tok tr.current in
 889
 890     (* call the parser *)
 891     let elem =
 892       let pass1 =
 893         Common.profile_code "Parsing: 1st pass" (fun () ->
 894           get_one_elem ~pass:1 tr (file, filelines)
 895         ) in
 896       match pass1 with
 897       | Left e -> Left e
 898       | Right (info,line_err, passed, passed_before_error, cur, exn) ->
 899           if !Flag_parsing_c.disable_multi_pass
 900           then pass1
 901           else begin
 902             Common.profile_code "Parsing: multi pass" (fun () ->
 903
 904             pr2_err "parsing pass2: try again";
 905             let toks = List.rev passed ++ tr.rest in
 906             let new_tr = mk_tokens_state toks in
 907             copy_tokens_state ~src:new_tr ~dst:tr;
 908             let passx = get_one_elem ~pass:2 tr (file, filelines) in
 909
 910             (match passx with
 911             | Left e -> passx
 912             | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 913                 let candidates =
 914                   candidate_macros_in_passed ~defs:macros passed
 915                 in
 916
 917
 918                 if is_define_passed passed || null candidates
 919                 then passx
 920                 else begin
 921                   (* todo factorize code *)
 922
 923                   pr2_err "parsing pass3: try again";
 924                   let toks = List.rev passed ++ tr.rest in
 925                   let toks' =
 926                     find_optional_macro_to_expand ~defs:candidates toks in
 927                   let new_tr = mk_tokens_state toks' in
 928                   copy_tokens_state ~src:new_tr ~dst:tr;
 929                   let passx = get_one_elem ~pass:3 tr (file, filelines) in
 930
 931                   (match passx with
 932                   | Left e -> passx
 933                   | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 934                       pr2_err "parsing pass4: try again";
 935
 936                       let candidates =
 937                         candidate_macros_in_passed
 938                           ~defs:macros passed
 939                       in
 940
 941                       let toks = List.rev passed ++ tr.rest in
 942                       let toks' =
 943                       find_optional_macro_to_expand ~defs:candidates toks in
 944                       let new_tr = mk_tokens_state toks' in
 945                       copy_tokens_state ~src:new_tr ~dst:tr;
 946                       let passx = get_one_elem ~pass:4 tr (file, filelines) in
 947                       passx
 948                   )
 949                  end
 950             )
 951             )
 952           end
 953     in
 954
 955
 956     (* again not sure if checkpoint2 corresponds to end of bad region *)
 957     let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
 958     let checkpoint2_file = TH.file_of_tok tr.current in
 959
 960     let diffline =
 961       if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
 962       then (checkpoint2 - checkpoint)
 963       else 0
 964         (* TODO? so if error come in middle of something ? where the
 965          * start token was from original file but synchro found in body
 966          * of macro ? then can have wrong number of lines stat.
 967          * Maybe simpler just to look at tr.passed and count
 968          * the lines in the token from the correct file ?
 969          *)
 970     in
 971     let info = mk_info_item file (List.rev tr.passed) in
 972
 973     (* some stat updates *)
 974     stat.Stat.commentized <-
 975       stat.Stat.commentized + count_lines_commentized (snd info);
 976
 977     let elem =
 978       match elem with
 979       | Left e ->
 980           stat.Stat.correct <- stat.Stat.correct + diffline;
 981           e
 982       | Right (info_of_bads, line_error, toks_of_bads,
 983               _passed_before_error, cur, exn) ->
 984
 985           let was_define = is_define_passed tr.passed in
 986
 987           if was_define && !Flag_parsing_c.filter_msg_define_error
 988           then ()
 989           else begin
 990
 991             (match exn with
 992             | Lexer_c.Lexical _
 993             | Parsing.Parse_error
 994             | Semantic_c.Semantic _ -> ()
 995             | e -> raise e
 996             );
 997
 998             if !Flag_parsing_c.show_parsing_error
 999             then begin
1000               (match exn with
1001               (* Lexical is not anymore launched I think *)
1002               | Lexer_c.Lexical s ->
1003                   pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
1004               | Parsing.Parse_error ->
1005                   pr2 ("parse error \n = " ^ error_msg_tok cur)
1006               | Semantic_c.Semantic (s, i) ->
1007                   pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1008               | e -> raise (Impossible 82)
1009               );
1010               (* bugfix: *)
1011               if (checkpoint_file =$= checkpoint2_file) &&
1012                 checkpoint_file =$= file
1013               then print_bad line_error (checkpoint, checkpoint2) filelines
1014               else pr2 "PB: bad: but on tokens not from original file"
1015             end;
1016
1017
1018             let pbline =
1019               toks_of_bads
1020               +> Common.filter (TH.is_same_line_or_close line_error)
1021               +> Common.filter TH.is_ident_like
1022             in
1023             let error_info =
1024               (pbline +> List.map TH.str_of_tok), line_error
1025             in
1026             stat.Stat.problematic_lines <-
1027               error_info::stat.Stat.problematic_lines;
1028
1029           end;
1030
1031           if was_define && !Flag_parsing_c.filter_define_error
1032           then stat.Stat.correct <- stat.Stat.correct + diffline
1033           else stat.Stat.bad     <- stat.Stat.bad     + diffline;
1034
1035           Ast_c.NotParsedCorrectly info_of_bads
1036     in
1037
1038     (match elem with
1039     | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1040     | xs -> (xs, info):: loop tr (* recurse *)
1041     )
1042   in
1043   let v = loop tr in
1044   let v = with_program2 Parsing_consistency_c.consistency_checking v in
1045   let v =
1046     let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1047     Common.clean_scope_h new_td;
1048     (v, !new_td, macros) in
1049   (v, stat)
1050
1051
1052 let time_total_parsing a b =
1053   Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
1054
1055 let parse_print_error_heuristic a b =
1056   Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
1057
1058
1059 (* alias *)
1060 let parse_c_and_cpp a =
1061   let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1062 let parse_c_and_cpp_keep_typedefs td macs a =
1063   parse_print_error_heuristic td macs a
1064
1065 (*****************************************************************************)
1066 (* Same but faster cos memoize stuff *)
1067 (*****************************************************************************)
1068 let parse_cache file =
1069   if not !Flag_parsing_c.use_cache
1070   then parse_print_error_heuristic None None file
1071   else
1072   let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
1073   let need_no_changed_files =
1074     (* should use Sys.argv.(0), would be safer. *)
1075
1076     [
1077       (* TOFIX
1078       Config.path ^ "/parsing_c/c_parser.cma";
1079       (* we may also depend now on the semantic patch because
1080          the SP may use macro and so we will disable some of the
1081          macro expansions from standard.h.
1082       *)
1083       !Config.std_h;
1084       *)
1085     ] in
1086   let need_no_changed_variables =
1087     (* could add some of the flags of flag_parsing_c.ml *)
1088     [] in
1089   Common.cache_computation_robust_in_dir
1090     !Flag_parsing_c.cache_prefix file ".ast_raw"
1091     (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1092     (fun () ->
1093       (* check whether to clear the cache *)
1094       (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1095         (None,_) | (_,None) -> ()
1096       | (Some limit,Some prefix) ->
1097           let count =
1098             Common.cmd_to_list
1099               (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1100                  prefix prefix) in
1101           match count with
1102             [c] ->
1103               if int_of_string c >= limit
1104               then
1105                 let _ =
1106                   Sys.command
1107                     (Printf.sprintf
1108                        "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1109                        prefix) in
1110                 ()
1111           | _ -> ());
1112       (* recompute *)
1113       parse_print_error_heuristic None None file)
1114
1115
1116
1117 (*****************************************************************************)
1118 (* Some special cases *)
1119 (*****************************************************************************)
1120
1121 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1122   let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1123   Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1124   let program = parse_c_and_cpp tmpfile +> fst in
1125   program +> Common.find_some (fun (e,_) ->
1126     match e with
1127     | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1128     | _ -> None
1129   )
1130
1131 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1132   let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1133   Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1134   let program = parse_c_and_cpp tmpfile +> fst in
1135   program +> Common.find_some (fun (e,_) ->
1136     match e with
1137     | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1138         (match compound with
1139         | [Ast_c.StmtElem st] ->
1140             (match Ast_c.unwrap_st st with
1141             | Ast_c.ExprStatement (Some e) -> Some e
1142             | _ -> None
1143             )
1144         | _ -> None
1145         )
1146     | _ -> None
1147   )