parsing_c/parse_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *)
  15
  16 open Common
  17
  18 module TH = Token_helpers
  19 module LP = Lexer_parser
  20
  21 module Stat = Parsing_stat
  22
  23 (*****************************************************************************)
  24 (* Wrappers *)
  25 (*****************************************************************************)
  26 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
  27
  28 (*****************************************************************************)
  29 (* Helpers *)
  30 (*****************************************************************************)
  31
  32 let lexbuf_to_strpos lexbuf     =
  33   (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
  34
  35 let token_to_strpos tok =
  36   (TH.str_of_tok tok, TH.pos_of_tok tok)
  37
  38
  39 let mk_info_item2 filename toks =
  40   let buf = Buffer.create 100 in
  41   let s =
  42     (* old: get_slice_file filename (line1, line2) *)
  43     begin
  44       toks +> List.iter (fun tok ->
  45         match TH.pinfo_of_tok tok with
  46         | Ast_c.OriginTok _ ->
  47             Buffer.add_string buf (TH.str_of_tok tok)
  48         | Ast_c.AbstractLineTok _ ->
  49             raise Impossible
  50         | _ -> ()
  51       );
  52       Buffer.contents buf
  53     end
  54   in
  55   (s, toks)
  56
  57 let mk_info_item a b =
  58   Common.profile_code "C parsing.mk_info_item"
  59     (fun () -> mk_info_item2 a b)
  60
  61
  62 let info_same_line line xs =
  63   xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
  64
  65
  66 (* move in cpp_token_c ? *)
  67 let is_define_passed passed =
  68   let xs = passed +> List.rev +> List.filter TH.is_not_comment in
  69   if List.length xs >= 2
  70   then
  71     (match Common.head_middle_tail xs with
  72     | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
  73         true
  74     | _ -> false
  75     )
  76   else begin
  77     pr2_err "WEIRD: length list of error recovery tokens < 2 ";
  78     false
  79   end
  80
  81
  82 (*****************************************************************************)
  83 (* Error diagnostic  *)
  84 (*****************************************************************************)
  85
  86 let error_msg_tok tok =
  87   let file = TH.file_of_tok tok in
  88   if !Flag_parsing_c.verbose_parsing
  89   then Common.error_message file (token_to_strpos tok)
  90   else ("error in " ^ file  ^ "; set verbose_parsing for more info")
  91
  92
  93 let print_bad line_error (start_line, end_line) filelines  =
  94   begin
  95     pr2 ("badcount: " ^ i_to_s (end_line - start_line));
  96
  97     for i = start_line to end_line do
  98       let line = filelines.(i) in
  99
 100       if i =|= line_error
 101       then  pr2 ("BAD:!!!!!" ^ " " ^ line)
 102       else  pr2 ("bad:" ^ " " ^      line)
 103     done
 104   end
 105
 106
 107 (*****************************************************************************)
 108 (* Stats on what was passed/commentized  *)
 109 (*****************************************************************************)
 110
 111 let commentized xs = xs +> Common.tail_map_filter (function
 112   | Parser_c.TCommentCpp (cppkind, ii) ->
 113       let s = Ast_c.str_of_info ii in
 114       let legal_passing =
 115         match !Flag_parsing_c.filter_passed_level with
 116         | 0 -> false
 117         | 1 ->
 118             List.mem cppkind [Token_c.CppAttr]
 119             ||
 120             (s =~ "__.*")
 121         | 2 ->
 122             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
 123             ||
 124             (s =~ "__.*")
 125         | 3 ->
 126             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
 127             ||
 128             (s =~ "__.*")
 129         | 4 ->
 130             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
 131             ||
 132             (s =~ "__.*")
 133
 134
 135         | 5 ->
 136             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
 137             ||
 138             (s =~ "__.*")
 139
 140
 141
 142
 143         | _ -> failwith "not valid level passing number"
 144       in
 145       if legal_passing then None else Some (ii.Ast_c.pinfo)
 146
 147         (*
 148         | Ast_c.CppOther ->
 149             (match s with
 150             | s when s =~ "KERN_.*" -> None
 151             | s when s =~ "__.*" -> None
 152             | _ ->
 153                 Some (ii.Ast_c.pinfo)
 154             )
 155         *)
 156
 157
 158   | Parser_c.TCommentMisc ii
 159   | Parser_c.TAction ii
 160     ->
 161       Some (ii.Ast_c.pinfo)
 162   | _ ->
 163       None
 164  )
 165
 166 let count_lines_commentized xs =
 167   let line = ref (-1) in
 168   let count = ref 0 in
 169   begin
 170     commentized xs +>
 171     List.iter
 172       (function
 173           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 174             let newline = pinfo.Common.line in
 175             if newline <> !line
 176             then begin
 177               line := newline;
 178               incr count
 179             end
 180         | _ -> ());
 181     !count
 182   end
 183
 184
 185
 186 let print_commentized xs =
 187   let line = ref (-1) in
 188   begin
 189     let ys = commentized xs in
 190     ys +>
 191     List.iter
 192       (function
 193           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 194             let newline = pinfo.Common.line in
 195             let s = pinfo.Common.str in
 196             let s = Str.global_substitute
 197                 (Str.regexp "\n") (fun s -> "") s
 198             in
 199             if newline =|= !line
 200             then prerr_string (s ^ " ")
 201             else begin
 202               if !line =|= -1
 203               then pr2_no_nl "passed:"
 204               else pr2_no_nl "\npassed:";
 205               line := newline;
 206               pr2_no_nl (s ^ " ");
 207             end
 208         | _ -> ());
 209     if not (null ys) then pr2 "";
 210   end
 211
 212
 213
 214
 215 (*****************************************************************************)
 216 (* Lexing only *)
 217 (*****************************************************************************)
 218
 219 (* called by parse_print_error_heuristic *)
 220 let tokens2 file =
 221  let table     = Common.full_charpos_to_pos_large file in
 222
 223  Common.with_open_infile file (fun chan ->
 224   let lexbuf = Lexing.from_channel chan in
 225   try
 226     let rec tokens_aux acc =
 227       let tok = Lexer_c.token lexbuf in
 228       (* fill in the line and col information *)
 229       let tok = tok +> TH.visitor_info_of_tok (fun ii ->
 230         { ii with Ast_c.pinfo=
 231           (* could assert pinfo.filename = file ? *)
 232           match Ast_c.pinfo_of_info ii with
 233             Ast_c.OriginTok pi ->
 234               Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
 235           | Ast_c.ExpandedTok (pi,vpi) ->
 236               Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
 237           | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
 238           | Ast_c.AbstractLineTok pi -> failwith "should not occur"
 239       })
 240       in
 241
 242       if TH.is_eof tok
 243       then List.rev (tok::acc)
 244       else tokens_aux (tok::acc)
 245     in
 246     tokens_aux []
 247   with
 248     | Lexer_c.Lexical s ->
 249         failwith ("lexical error " ^ s ^ "\n =" ^
 250                   (Common.error_message file (lexbuf_to_strpos lexbuf)))
 251     | e -> raise e
 252  )
 253
 254 let time_lexing ?(profile=true) a =
 255   if profile
 256   then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
 257   else tokens2 a
 258 let tokens ?profile a =
 259   Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
 260
 261
 262 let tokens_of_string string =
 263   let lexbuf = Lexing.from_string string in
 264   try
 265     let rec tokens_s_aux () =
 266       let tok = Lexer_c.token lexbuf in
 267       if TH.is_eof tok
 268       then [tok]
 269       else tok::(tokens_s_aux ())
 270     in
 271     tokens_s_aux ()
 272   with
 273     | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
 274     | e -> raise e
 275
 276
 277 (*****************************************************************************)
 278 (* Parsing, but very basic, no more used *)
 279 (*****************************************************************************)
 280
 281 (*
 282  * !!!Those function use refs, and are not reentrant !!! so take care.
 283  * It use globals defined in Lexer_parser.
 284  *
 285  * update: because now lexer return comments tokens, those functions
 286  * may not work anymore.
 287  *)
 288
 289 let parse file =
 290   let lexbuf = Lexing.from_channel (open_in file) in
 291   let result = Parser_c.main Lexer_c.token lexbuf in
 292   result
 293
 294
 295 let parse_print_error file =
 296   let chan = (open_in file) in
 297   let lexbuf = Lexing.from_channel chan in
 298
 299   let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
 300   try
 301     lexbuf +> Parser_c.main Lexer_c.token
 302   with
 303   | Lexer_c.Lexical s ->
 304       failwith ("lexical error " ^s^ "\n =" ^  error_msg ())
 305   | Parsing.Parse_error ->
 306       failwith ("parse error \n = " ^ error_msg ())
 307   | Semantic_c.Semantic (s, i) ->
 308       failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
 309   | e -> raise e
 310
 311
 312
 313
 314 (*****************************************************************************)
 315 (* Parsing subelements, useful to debug parser *)
 316 (*****************************************************************************)
 317
 318 (*
 319  * !!!Those function use refs, and are not reentrant !!! so take care.
 320  * It use globals defined in Lexer_parser.
 321  *)
 322
 323
 324 (* old:
 325  *   let parse_gen parsefunc s =
 326  *     let lexbuf = Lexing.from_string s in
 327  *     let result = parsefunc Lexer_c.token lexbuf in
 328  *     result
 329  *)
 330
 331 let parse_gen parsefunc s =
 332   let toks = tokens_of_string s +> List.filter TH.is_not_comment in
 333
 334
 335   (* Why use this lexing scheme ? Why not classically give lexer func
 336    * to parser ? Because I now keep comments in lexer. Could
 337    * just do a simple wrapper that when comment ask again for a token,
 338    * but maybe simpler to use cur_tok technique.
 339    *)
 340   let all_tokens = ref toks in
 341   let cur_tok    = ref (List.hd !all_tokens) in
 342
 343   let lexer_function =
 344     (fun _ ->
 345       if TH.is_eof !cur_tok
 346       then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
 347       else
 348         let v = Common.pop2 all_tokens in
 349         cur_tok := v;
 350         !cur_tok
 351     )
 352   in
 353   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 354   let result = parsefunc lexer_function lexbuf_fake in
 355   result
 356
 357
 358 let type_of_string       = parse_gen Parser_c.type_name
 359 let statement_of_string  = parse_gen Parser_c.statement
 360 let expression_of_string = parse_gen Parser_c.expr
 361
 362 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
 363
 364
 365
 366
 367
 368 (*****************************************************************************)
 369 (* Parsing default define macros, usually in a standard.h file *)
 370 (*****************************************************************************)
 371
 372 let extract_macros2 file =
 373   Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
 374     Flag_parsing_c.verbose_lexing := false;
 375     let toks = tokens ~profile:false file in
 376     let toks = Parsing_hacks.fix_tokens_define toks in
 377     Cpp_token_c.extract_macros toks
 378   )
 379
 380 let extract_macros a =
 381   Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
 382
 383
 384 (*****************************************************************************)
 385 (* Helper for main entry point *)
 386 (*****************************************************************************)
 387
 388
 389 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
 390  * possible error recovery. Indeed, they allow to skip some tokens and
 391  * still be able to call again the ocamlyacc parser. It is ugly code
 392  * because we cant modify ocamllex and ocamlyacc. As we want some
 393  * extended lexing tricks, we have to use such refs.
 394  *
 395  * Those refs are now also used for my lalr(k) technique. Indeed They
 396  * store the futur and previous tokens that were parsed, and so
 397  * provide enough context information for powerful lex trick.
 398  *
 399  * - passed_tokens_last_ckp stores the passed tokens since last
 400  *   checkpoint. Used for NotParsedCorrectly and also to build the
 401  *   info_item attached to each program_element.
 402  * - passed_tokens_clean is used for lookahead, in fact for lookback.
 403  * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
 404  *   contain some comments and so would make pattern matching difficult
 405  *   in lookahead. Hence this variable. We would like also to get rid
 406  *   of cpp instruction because sometimes a cpp instruction is between
 407  *   two tokens and makes a pattern matching fail. But lookahead also
 408  *   transform some cpp instruction (in comment) so can't remove them.
 409  *
 410  * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
 411  * whereas passed_tokens_clean and remaining_tokens_clean does not contain
 412  * comment-tokens.
 413  *
 414  * Normally we have:
 415  * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
 416  *    after the call to pop2.
 417  * toks = (reverse passed_tok) ++ remaining_tokens
 418  *     at the and of the lexer_function call.
 419  * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
 420  * At the end of lexer_function call,  cur_tok  overlap  with passed_tok.
 421  *
 422  * convention: I use "tr"  for "tokens refs"
 423  *
 424  * I now also need this lexing trick because the lexer return comment
 425  * tokens.
 426  *)
 427
 428 type tokens_state = {
 429   mutable rest :         Parser_c.token list;
 430   mutable rest_clean :   Parser_c.token list;
 431   mutable current :      Parser_c.token;
 432   (* it's passed since last "checkpoint", not passed from the beginning *)
 433   mutable passed :       Parser_c.token list;
 434   mutable passed_clean : Parser_c.token list;
 435 }
 436
 437 let mk_tokens_state toks =
 438   {
 439     rest       = toks;
 440     rest_clean = (toks +> List.filter TH.is_not_comment);
 441     current    = (List.hd toks);
 442     passed = [];
 443     passed_clean = [];
 444   }
 445
 446
 447
 448 let clone_tokens_state tr =
 449   { rest = tr.rest;
 450     rest_clean = tr.rest_clean;
 451     current = tr.current;
 452     passed = tr.passed;
 453     passed_clean = tr.passed_clean;
 454   }
 455 let copy_tokens_state ~src ~dst =
 456   dst.rest <- src.rest;
 457   dst.rest_clean <- src.rest_clean;
 458   dst.current <- src.current;
 459   dst.passed <- src.passed;
 460   dst.passed_clean <-  src.passed_clean;
 461   ()
 462
 463 (* todo? agglomerate the x##b ? *)
 464 let rec filter_noise n xs =
 465   match n, xs with
 466   | _, [] -> []
 467   | 0, xs -> xs
 468   | n, x::xs ->
 469       (match x with
 470       | Parser_c.TMacroAttr _ ->
 471           filter_noise (n-1) xs
 472       | _ ->
 473           x::filter_noise (n-1) xs
 474       )
 475
 476 let clean_for_lookahead xs =
 477   match xs with
 478   | [] -> []
 479   | [x] -> [x]
 480   | x::xs ->
 481       x::filter_noise 10 xs
 482
 483
 484
 485 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
 486  * tr means token refs.
 487  *)
 488 let rec lexer_function ~pass tr = fun lexbuf ->
 489   match tr.rest with
 490   | [] -> pr2_err "ALREADY AT END"; tr.current
 491   | v::xs ->
 492     tr.rest <- xs;
 493     tr.current <- v;
 494
 495     if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
 496
 497     if TH.is_comment v
 498     then begin
 499       tr.passed <- v::tr.passed;
 500       lexer_function ~pass tr lexbuf
 501     end
 502     else begin
 503       let x = List.hd tr.rest_clean  in
 504       tr.rest_clean <- List.tl tr.rest_clean;
 505       assert (x =*= v);
 506
 507       (match v with
 508
 509       (* fix_define1.
 510        *
 511        * Why not in parsing_hacks lookahead and do passing like
 512        * I do for some ifdef directives ? Because here I also need to
 513        * generate some tokens sometimes and so I need access to the
 514        * tr.passed, tr.rest, etc.
 515        *)
 516       | Parser_c.TDefine (tok) ->
 517           if not (LP.current_context () =*= LP.InTopLevel) &&
 518             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 519           then begin
 520             incr Stat.nDefinePassing;
 521             pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
 522             let v' =
 523               Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 524             in
 525             tr.passed <- v'::tr.passed;
 526             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 527             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 528             lexer_function ~pass tr lexbuf
 529           end
 530           else begin
 531             tr.passed <- v::tr.passed;
 532             tr.passed_clean <- v::tr.passed_clean;
 533             v
 534           end
 535
 536       | Parser_c.TUndef (tok) ->
 537           if not (LP.current_context () =*= LP.InTopLevel) &&
 538             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 539           then begin
 540             incr Stat.nUndefPassing;
 541             pr2_once ("CPP-UNDEF: inside function, I treat it as comment");
 542             let v' =
 543               Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 544             in
 545             tr.passed <- v'::tr.passed;
 546             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 547             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 548             lexer_function ~pass tr lexbuf
 549           end
 550           else begin
 551             tr.passed <- v::tr.passed;
 552             tr.passed_clean <- v::tr.passed_clean;
 553             v
 554           end
 555
 556       | Parser_c.TInclude (includes, filename, inifdef, info) ->
 557           if not (LP.current_context () =*= LP.InTopLevel)  &&
 558             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 559           then begin
 560             incr Stat.nIncludePassing;
 561             pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
 562             let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
 563             tr.passed <- v::tr.passed;
 564             lexer_function ~pass tr lexbuf
 565           end
 566           else begin
 567             let (v,new_tokens) =
 568               Parsing_hacks.tokens_include(info, includes, filename, inifdef) in
 569             let new_tokens_clean =
 570               new_tokens +> List.filter TH.is_not_comment  in
 571
 572             tr.passed <- v::tr.passed;
 573             tr.passed_clean <- v::tr.passed_clean;
 574             tr.rest <- new_tokens ++ tr.rest;
 575             tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
 576             v
 577           end
 578
 579       | _ ->
 580
 581           (* typedef_fix1 *)
 582           let v = match v with
 583             | Parser_c.TIdent (s, ii) ->
 584                 if
 585                   LP.is_typedef s &&
 586                     not (!Flag_parsing_c.disable_add_typedef) &&
 587                     pass =|= 1
 588                 then Parser_c.TypedefIdent (s, ii)
 589                 else Parser_c.TIdent (s, ii)
 590             | x -> x
 591           in
 592
 593           let v = Parsing_hacks.lookahead ~pass
 594             (clean_for_lookahead (v::tr.rest_clean))
 595             tr.passed_clean in
 596
 597           tr.passed <- v::tr.passed;
 598
 599           (* the lookahead may have changed the status of the token and
 600            * consider it as a comment, for instance some #include are
 601            * turned into comments, hence this code. *)
 602           match v with
 603           | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
 604           | v ->
 605               tr.passed_clean <- v::tr.passed_clean;
 606               v
 607       )
 608     end
 609
 610
 611 let max_pass = 4
 612
 613
 614 let get_one_elem ~pass tr (file, filelines) =
 615
 616   if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
 617   then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
 618
 619   (* normally have to do that only when come from an exception in which
 620    * case the dt() may not have been done
 621    * TODO but if was in scoped scope ? have to let only the last scope
 622    * so need do a LP.lexer_reset_typedef ();
 623    *)
 624   LP.enable_typedef();
 625   LP._lexer_hint := (LP.default_hint ());
 626   LP.save_typedef_state();
 627
 628   tr.passed <- [];
 629
 630   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 631
 632   (try
 633       (* -------------------------------------------------- *)
 634       (* Call parser *)
 635       (* -------------------------------------------------- *)
 636       Common.profile_code_exclusif "YACC" (fun () ->
 637         Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
 638       )
 639     with e ->
 640       LP.restore_typedef_state();
 641
 642       (* must keep here, before the code that adjusts the tr fields *)
 643       let line_error = TH.line_of_tok tr.current in
 644
 645       let passed_before_error = tr.passed in
 646       let current = tr.current in
 647       (*  error recovery, go to next synchro point *)
 648       let (passed', rest') =
 649         Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
 650       tr.rest <- rest';
 651       tr.passed <- passed';
 652
 653       tr.current <- List.hd passed';
 654       tr.passed_clean <- [];           (* enough ? *)
 655       (* with error recovery, rest and rest_clean may not be in sync *)
 656       tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
 657
 658
 659       let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
 660       Right (info_of_bads,  line_error,
 661             tr.passed, passed_before_error,
 662             current, e)
 663   )
 664
 665
 666
 667 (* Macro problem recovery *)
 668 (* used by the multi-pass error recovery expand-on-demand *)
 669 (*
 670 val candidate_macros_in_passed:
 671   defs: (string, define_def) Hashtbl.t ->
 672   Parser_c.token list -> (string * define_def) list
 673 *)
 674
 675 let candidate_macros_in_passed2 ~defs passed  =
 676   let res = ref [] in
 677   let res2 = ref [] in
 678
 679   passed +> List.iter (function
 680   | Parser_c.TIdent (s,_)
 681    (* bugfix: may have to undo some infered things *)
 682   | Parser_c.TMacroIterator (s,_)
 683   | Parser_c.TypedefIdent (s,_)
 684     ->
 685       (match Common.hfind_option s defs with
 686       | Some def ->
 687           if s ==~ Parsing_hacks.regexp_macro
 688           then
 689             (* pr2 (spf "candidate: %s" s); *)
 690             Common.push2 (s, def) res
 691           else
 692             Common.push2 (s, def) res2
 693         | None -> ()
 694         )
 695
 696   | _ -> ()
 697   );
 698   if null !res
 699   then !res2
 700   else !res
 701
 702 let candidate_macros_in_passed ~defs b =
 703   Common.profile_code "MACRO managment" (fun () ->
 704     candidate_macros_in_passed2 ~defs b)
 705
 706
 707
 708
 709
 710 let find_optional_macro_to_expand2 ~defs toks =
 711
 712   let defs = Common.hash_of_list defs in
 713
 714   let toks = toks +> Common.tail_map (function
 715
 716     (* special cases to undo *)
 717     | Parser_c.TMacroIterator (s, ii) ->
 718         if Hashtbl.mem defs s
 719         then Parser_c.TIdent (s, ii)
 720         else Parser_c.TMacroIterator (s, ii)
 721
 722     | Parser_c.TypedefIdent (s, ii) ->
 723         if Hashtbl.mem defs s
 724         then Parser_c.TIdent (s, ii)
 725         else Parser_c.TypedefIdent (s, ii)
 726
 727     | x -> x
 728   ) in
 729
 730   let tokens = toks in
 731   Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
 732
 733   (* just calling apply_macro_defs and having a specialized version
 734    * of the code in fix_tokens_cpp is not enough as some work such
 735    * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
 736    * will not get the chance to be run on the new expanded tokens.
 737    * Hence even if it's expensive, it's currently better to
 738    * just call directly fix_tokens_cpp again here.
 739
 740   let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
 741   let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
 742   let paren_grouped = TV.mk_parenthised  cleaner in
 743   Cpp_token_c.apply_macro_defs
 744     ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
 745     ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
 746     defs paren_grouped;
 747   (* because the before field is used by apply_macro_defs *)
 748   tokens2 := TV.rebuild_tokens_extented !tokens2;
 749   Parsing_hacks.insert_virtual_positions
 750     (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
 751   *)
 752 let find_optional_macro_to_expand ~defs a =
 753     Common.profile_code "MACRO managment" (fun () ->
 754       find_optional_macro_to_expand2 ~defs a)
 755
 756
 757
 758
 759
 760 (*****************************************************************************)
 761 (* Main entry points *)
 762 (*****************************************************************************)
 763
 764 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 765   ref (Hashtbl.create 101)
 766
 767 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 768   ref (Hashtbl.create 101)
 769
 770
 771 (* can not be put in parsing_hack, cos then mutually recursive problem as
 772  * we also want to parse the standard.h file.
 773  *)
 774 let init_defs_macros std_h =
 775   if not (Common.lfile_exists std_h)
 776   then pr2 ("warning: Can't find default macro file: " ^ std_h)
 777   else begin
 778     pr2 ("init_defs: " ^ std_h);
 779     _defs := Common.hash_of_list (extract_macros std_h);
 780   end
 781
 782 let init_defs_builtins file_h =
 783   if not (Common.lfile_exists file_h)
 784   then pr2 ("warning: Can't find macro file: " ^ file_h)
 785   else begin
 786     pr2 ("init_defs_builtins: " ^ file_h);
 787     _defs_builtins :=
 788       Common.hash_of_list (extract_macros file_h);
 789   end
 790
 791
 792
 793 type info_item =  string * Parser_c.token list
 794
 795 type program2 = toplevel2 list
 796    and extended_program2 = toplevel2 list *
 797       (string, Lexer_parser.identkind) Common.scoped_h_env (* type defs *) *
 798       (string, Cpp_token_c.define_def) Hashtbl.t (* macro defs *)
 799    and toplevel2 = Ast_c.toplevel * info_item
 800
 801 let program_of_program2 xs =
 802   xs +> List.map fst
 803
 804 let with_program2 f program2 =
 805   program2
 806   +> Common.unzip
 807   +> (fun (program, infos) ->
 808     f program, infos
 809   )
 810   +> Common.uncurry Common.zip
 811
 812
 813
 814
 815
 816
 817 (* note: as now we go in 2 passes, there is first all the error message of
 818  * the lexer, and then the error of the parser. It is not anymore
 819  * interwinded.
 820  *
 821  * !!!This function use refs, and is not reentrant !!! so take care.
 822  * It use globals defined in Lexer_parser and also the _defs global
 823  * in parsing_hack.ml.
 824  *
 825  * This function uses internally some semi globals in the
 826  * tokens_stat record and parsing_stat record.
 827  *)
 828
 829 let parse_print_error_heuristic2 saved_typedefs saved_macros file =
 830
 831   let filelines = Common.cat_array file in
 832   let stat = Parsing_stat.default_stat file in
 833
 834   (* -------------------------------------------------- *)
 835   (* call lexer and get all the tokens *)
 836   (* -------------------------------------------------- *)
 837
 838   LP.lexer_reset_typedef saved_typedefs;
 839   Parsing_hacks.ifdef_paren_cnt := 0;
 840
 841   let toks_orig = tokens file in
 842   let toks = Parsing_hacks.fix_tokens_define toks_orig in
 843   let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
 844
 845   (* expand macros on demand trick, preparation phase *)
 846   let macros =
 847     Common.profile_code "MACRO mgmt prep 1" (fun () ->
 848       let macros =
 849         match saved_macros with None -> Hashtbl.copy !_defs | Some h -> h in
 850       (* include also builtins as some macros may generate some builtins too
 851        * like __decl_spec or __stdcall
 852        *)
 853       !_defs_builtins +> Hashtbl.iter (fun s def ->
 854         Hashtbl.replace macros   s def;
 855       );
 856       macros
 857     )
 858   in
 859   Common.profile_code "MACRO mgmt prep 2" (fun () ->
 860     let local_macros = extract_macros file in
 861     local_macros +> List.iter (fun (s, def) ->
 862       Hashtbl.replace macros   s def;
 863     );
 864   );
 865
 866   let tr = mk_tokens_state toks in
 867
 868   let rec loop tr =
 869
 870     (* todo?: I am not sure that it represents current_line, cos maybe
 871      * tr.current partipated in the previous parsing phase, so maybe tr.current
 872      * is not the first token of the next parsing phase. Same with checkpoint2.
 873      * It would be better to record when we have a } or ; in parser.mly,
 874      *  cos we know that they are the last symbols of external_declaration2.
 875      *
 876      * bugfix: may not be equal to 'file' as after macro expansions we can
 877      * start to parse a new entity from the body of a macro, for instance
 878      * when parsing a define_machine() body, cf standard.h
 879      *)
 880     let checkpoint = TH.line_of_tok tr.current in
 881     let checkpoint_file = TH.file_of_tok tr.current in
 882
 883     (* call the parser *)
 884     let elem =
 885       let pass1 =
 886         Common.profile_code "Parsing: 1st pass" (fun () ->
 887           get_one_elem ~pass:1 tr (file, filelines)
 888         ) in
 889       match pass1 with
 890       | Left e -> Left e
 891       | Right (info,line_err, passed, passed_before_error, cur, exn) ->
 892           if !Flag_parsing_c.disable_multi_pass
 893           then pass1
 894           else begin
 895             Common.profile_code "Parsing: multi pass" (fun () ->
 896
 897             pr2_err "parsing pass2: try again";
 898             let toks = List.rev passed ++ tr.rest in
 899             let new_tr = mk_tokens_state toks in
 900             copy_tokens_state ~src:new_tr ~dst:tr;
 901             let passx = get_one_elem ~pass:2 tr (file, filelines) in
 902
 903             (match passx with
 904             | Left e -> passx
 905             | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 906                 let candidates =
 907                   candidate_macros_in_passed ~defs:macros passed
 908                 in
 909
 910
 911                 if is_define_passed passed || null candidates
 912                 then passx
 913                 else begin
 914                   (* todo factorize code *)
 915
 916                   pr2_err "parsing pass3: try again";
 917                   let toks = List.rev passed ++ tr.rest in
 918                   let toks' =
 919                     find_optional_macro_to_expand ~defs:candidates toks in
 920                   let new_tr = mk_tokens_state toks' in
 921                   copy_tokens_state ~src:new_tr ~dst:tr;
 922                   let passx = get_one_elem ~pass:3 tr (file, filelines) in
 923
 924                   (match passx with
 925                   | Left e -> passx
 926                   | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 927                       pr2_err "parsing pass4: try again";
 928
 929                       let candidates =
 930                         candidate_macros_in_passed
 931                           ~defs:macros passed
 932                       in
 933
 934                       let toks = List.rev passed ++ tr.rest in
 935                       let toks' =
 936                       find_optional_macro_to_expand ~defs:candidates toks in
 937                       let new_tr = mk_tokens_state toks' in
 938                       copy_tokens_state ~src:new_tr ~dst:tr;
 939                       let passx = get_one_elem ~pass:4 tr (file, filelines) in
 940                       passx
 941                   )
 942                  end
 943             )
 944             )
 945           end
 946     in
 947
 948
 949     (* again not sure if checkpoint2 corresponds to end of bad region *)
 950     let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
 951     let checkpoint2_file = TH.file_of_tok tr.current in
 952
 953     let diffline =
 954       if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
 955       then (checkpoint2 - checkpoint)
 956       else 0
 957         (* TODO? so if error come in middle of something ? where the
 958          * start token was from original file but synchro found in body
 959          * of macro ? then can have wrong number of lines stat.
 960          * Maybe simpler just to look at tr.passed and count
 961          * the lines in the token from the correct file ?
 962          *)
 963     in
 964     let info = mk_info_item file (List.rev tr.passed) in
 965
 966     (* some stat updates *)
 967     stat.Stat.commentized <-
 968       stat.Stat.commentized + count_lines_commentized (snd info);
 969
 970     let elem =
 971       match elem with
 972       | Left e ->
 973           stat.Stat.correct <- stat.Stat.correct + diffline;
 974           e
 975       | Right (info_of_bads, line_error, toks_of_bads,
 976               _passed_before_error, cur, exn) ->
 977
 978           let was_define = is_define_passed tr.passed in
 979
 980           if was_define && !Flag_parsing_c.filter_msg_define_error
 981           then ()
 982           else begin
 983
 984             (match exn with
 985             | Lexer_c.Lexical _
 986             | Parsing.Parse_error
 987             | Semantic_c.Semantic _ -> ()
 988             | e -> raise e
 989             );
 990
 991             if !Flag_parsing_c.show_parsing_error
 992             then begin
 993               (match exn with
 994               (* Lexical is not anymore launched I think *)
 995               | Lexer_c.Lexical s ->
 996                   pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
 997               | Parsing.Parse_error ->
 998                   pr2 ("parse error \n = " ^ error_msg_tok cur)
 999               | Semantic_c.Semantic (s, i) ->
1000                   pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1001               | e -> raise Impossible
1002               );
1003               (* bugfix: *)
1004               if (checkpoint_file =$= checkpoint2_file) &&
1005                 checkpoint_file =$= file
1006               then print_bad line_error (checkpoint, checkpoint2) filelines
1007               else pr2 "PB: bad: but on tokens not from original file"
1008             end;
1009
1010
1011             let pbline =
1012               toks_of_bads
1013               +> Common.filter (TH.is_same_line_or_close line_error)
1014               +> Common.filter TH.is_ident_like
1015             in
1016             let error_info =
1017               (pbline +> List.map TH.str_of_tok), line_error
1018             in
1019             stat.Stat.problematic_lines <-
1020               error_info::stat.Stat.problematic_lines;
1021
1022           end;
1023
1024           if was_define && !Flag_parsing_c.filter_define_error
1025           then stat.Stat.correct <- stat.Stat.correct + diffline
1026           else stat.Stat.bad     <- stat.Stat.bad     + diffline;
1027
1028           Ast_c.NotParsedCorrectly info_of_bads
1029     in
1030
1031     (match elem with
1032     | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1033     | xs -> (xs, info):: loop tr (* recurse *)
1034     )
1035   in
1036   let v = loop tr in
1037   let v = with_program2 Parsing_consistency_c.consistency_checking v in
1038   let v =
1039     let new_td = ref (Common.clone_scoped_h_env !LP._typedef) in
1040     Common.clean_scope_h new_td;
1041     (v, !new_td, macros) in
1042   (v, stat)
1043
1044
1045 let time_total_parsing a b =
1046   Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a b)
1047
1048 let parse_print_error_heuristic a b =
1049   Common.profile_code "C parsing" (fun () -> time_total_parsing a b)
1050
1051
1052 (* alias *)
1053 let parse_c_and_cpp a =
1054   let ((c,_,_),stat) = parse_print_error_heuristic None None a in (c,stat)
1055 let parse_c_and_cpp_keep_typedefs td macs a =
1056   parse_print_error_heuristic td macs a
1057
1058 (*****************************************************************************)
1059 (* Same but faster cos memoize stuff *)
1060 (*****************************************************************************)
1061 let parse_cache file =
1062   if not !Flag_parsing_c.use_cache
1063   then parse_print_error_heuristic None None file
1064   else
1065   let _ = pr2_once "TOFIX: use_cache is not sensitive to changes in the considered macros, include files, etc" in
1066   let need_no_changed_files =
1067     (* should use Sys.argv.(0), would be safer. *)
1068
1069     [
1070       (* TOFIX
1071       Config.path ^ "/parsing_c/c_parser.cma";
1072       (* we may also depend now on the semantic patch because
1073          the SP may use macro and so we will disable some of the
1074          macro expansions from standard.h.
1075       *)
1076       !Config.std_h;
1077       *)
1078     ] in
1079   let need_no_changed_variables =
1080     (* could add some of the flags of flag_parsing_c.ml *)
1081     [] in
1082   Common.cache_computation_robust_in_dir
1083     !Flag_parsing_c.cache_prefix file ".ast_raw"
1084     (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1085     (fun () ->
1086       (* check whether to clear the cache *)
1087       (match (!Flag_parsing_c.cache_limit,!Flag_parsing_c.cache_prefix) with
1088         (None,_) | (_,None) -> ()
1089       | (Some limit,Some prefix) ->
1090           let count =
1091             Common.cmd_to_list
1092               (Printf.sprintf "test -e %s && find %s -name \"*_raw\" | wc -l"
1093                  prefix prefix) in
1094           match count with
1095             [c] ->
1096               if int_of_string c >= limit
1097               then
1098                 let _ =
1099                   Sys.command
1100                     (Printf.sprintf
1101                        "find %s -name \"*_raw\" -exec /bin/rm {} \\;"
1102                        prefix) in
1103                 ()
1104           | _ -> ());
1105       (* recompute *)
1106       parse_print_error_heuristic None None file)
1107
1108
1109
1110 (*****************************************************************************)
1111 (* Some special cases *)
1112 (*****************************************************************************)
1113
1114 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1115   let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1116   Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1117   let program = parse_c_and_cpp tmpfile +> fst in
1118   program +> Common.find_some (fun (e,_) ->
1119     match e with
1120     | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1121     | _ -> None
1122   )
1123
1124 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1125   let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1126   Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1127   let program = parse_c_and_cpp tmpfile +> fst in
1128   program +> Common.find_some (fun (e,_) ->
1129     match e with
1130     | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1131         (match compound with
1132         | [Ast_c.StmtElem st] ->
1133             (match Ast_c.unwrap_st st with
1134             | Ast_c.ExprStatement (Some e) -> Some e
1135             | _ -> None
1136             )
1137         | _ -> None
1138         )
1139     | _ -> None
1140   )