parsing_c/parse_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *)
  14
  15 open Common
  16
  17 module TH = Token_helpers
  18 module LP = Lexer_parser
  19
  20 module Stat = Parsing_stat
  21
  22 (*****************************************************************************)
  23 (* Wrappers *)
  24 (*****************************************************************************)
  25 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
  26
  27 (*****************************************************************************)
  28 (* Helpers *)
  29 (*****************************************************************************)
  30
  31 let lexbuf_to_strpos lexbuf     =
  32   (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
  33
  34 let token_to_strpos tok =
  35   (TH.str_of_tok tok, TH.pos_of_tok tok)
  36
  37
  38 let mk_info_item2 filename toks =
  39   let buf = Buffer.create 100 in
  40   let s =
  41     (* old: get_slice_file filename (line1, line2) *)
  42     begin
  43       toks +> List.iter (fun tok ->
  44         match TH.pinfo_of_tok tok with
  45         | Ast_c.OriginTok _ ->
  46             Buffer.add_string buf (TH.str_of_tok tok)
  47         | Ast_c.AbstractLineTok _ ->
  48             raise Impossible
  49         | _ -> ()
  50       );
  51       Buffer.contents buf
  52     end
  53   in
  54   (s, toks)
  55
  56 let mk_info_item a b =
  57   Common.profile_code "C parsing.mk_info_item"
  58     (fun () -> mk_info_item2 a b)
  59
  60
  61 let info_same_line line xs =
  62   xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
  63
  64
  65 (* move in cpp_token_c ? *)
  66 let is_define_passed passed =
  67   let xs = passed +> List.rev +> List.filter TH.is_not_comment in
  68   if List.length xs >= 2
  69   then
  70     (match Common.head_middle_tail xs with
  71     | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
  72         true
  73     | _ -> false
  74     )
  75   else begin
  76     pr2_err "WEIRD: length list of error recovery tokens < 2 ";
  77     false
  78   end
  79
  80
  81 (*****************************************************************************)
  82 (* Error diagnostic  *)
  83 (*****************************************************************************)
  84
  85 let error_msg_tok tok =
  86   let file = TH.file_of_tok tok in
  87   if !Flag_parsing_c.verbose_parsing
  88   then Common.error_message file (token_to_strpos tok)
  89   else ("error in " ^ file  ^ "; set verbose_parsing for more info")
  90
  91
  92 let print_bad line_error (start_line, end_line) filelines  =
  93   begin
  94     pr2 ("badcount: " ^ i_to_s (end_line - start_line));
  95
  96     for i = start_line to end_line do
  97       let line = filelines.(i) in
  98
  99       if i =|= line_error
 100       then  pr2 ("BAD:!!!!!" ^ " " ^ line)
 101       else  pr2 ("bad:" ^ " " ^      line)
 102     done
 103   end
 104
 105
 106 (*****************************************************************************)
 107 (* Stats on what was passed/commentized  *)
 108 (*****************************************************************************)
 109
 110 let commentized xs = xs +> Common.map_filter (function
 111   | Parser_c.TCommentCpp (cppkind, ii) ->
 112       let s = Ast_c.str_of_info ii in
 113       let legal_passing =
 114         match !Flag_parsing_c.filter_passed_level with
 115         | 0 -> false
 116         | 1 ->
 117             List.mem cppkind [Token_c.CppAttr]
 118             ||
 119             (s =~ "__.*")
 120         | 2 ->
 121             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
 122             ||
 123             (s =~ "__.*")
 124         | 3 ->
 125             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
 126             ||
 127             (s =~ "__.*")
 128         | 4 ->
 129             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
 130             ||
 131             (s =~ "__.*")
 132
 133
 134         | 5 ->
 135             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
 136             ||
 137             (s =~ "__.*")
 138
 139
 140
 141
 142         | _ -> failwith "not valid level passing number"
 143       in
 144       if legal_passing then None else Some (ii.Ast_c.pinfo)
 145
 146         (*
 147         | Ast_c.CppOther ->
 148             (match s with
 149             | s when s =~ "KERN_.*" -> None
 150             | s when s =~ "__.*" -> None
 151             | _ ->
 152                 Some (ii.Ast_c.pinfo)
 153             )
 154         *)
 155
 156
 157   | Parser_c.TCommentMisc ii
 158   | Parser_c.TAction ii
 159     ->
 160       Some (ii.Ast_c.pinfo)
 161   | _ ->
 162       None
 163  )
 164
 165 let count_lines_commentized xs =
 166   let line = ref (-1) in
 167   let count = ref 0 in
 168   begin
 169     commentized xs +>
 170     List.iter
 171       (function
 172           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 173             let newline = pinfo.Common.line in
 174             if newline <> !line
 175             then begin
 176               line := newline;
 177               incr count
 178             end
 179         | _ -> ());
 180     !count
 181   end
 182
 183
 184
 185 let print_commentized xs =
 186   let line = ref (-1) in
 187   begin
 188     let ys = commentized xs in
 189     ys +>
 190     List.iter
 191       (function
 192           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 193             let newline = pinfo.Common.line in
 194             let s = pinfo.Common.str in
 195             let s = Str.global_substitute
 196                 (Str.regexp "\n") (fun s -> "") s
 197             in
 198             if newline =|= !line
 199             then prerr_string (s ^ " ")
 200             else begin
 201               if !line =|= -1
 202               then pr2_no_nl "passed:"
 203               else pr2_no_nl "\npassed:";
 204               line := newline;
 205               pr2_no_nl (s ^ " ");
 206             end
 207         | _ -> ());
 208     if not (null ys) then pr2 "";
 209   end
 210
 211
 212
 213
 214 (*****************************************************************************)
 215 (* Lexing only *)
 216 (*****************************************************************************)
 217
 218 (* called by parse_print_error_heuristic *)
 219 let tokens2 file =
 220  let table     = Common.full_charpos_to_pos_large file in
 221
 222  Common.with_open_infile file (fun chan ->
 223   let lexbuf = Lexing.from_channel chan in
 224   try
 225     let rec tokens_aux acc =
 226       let tok = Lexer_c.token lexbuf in
 227       (* fill in the line and col information *)
 228       let tok = tok +> TH.visitor_info_of_tok (fun ii ->
 229         { ii with Ast_c.pinfo=
 230           (* could assert pinfo.filename = file ? *)
 231           match Ast_c.pinfo_of_info ii with
 232             Ast_c.OriginTok pi ->
 233               Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
 234           | Ast_c.ExpandedTok (pi,vpi) ->
 235               Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
 236           | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
 237           | Ast_c.AbstractLineTok pi -> failwith "should not occur"
 238       })
 239       in
 240
 241       if TH.is_eof tok
 242       then List.rev (tok::acc)
 243       else tokens_aux (tok::acc)
 244     in
 245     tokens_aux []
 246   with
 247     | Lexer_c.Lexical s ->
 248         failwith ("lexical error " ^ s ^ "\n =" ^
 249                   (Common.error_message file (lexbuf_to_strpos lexbuf)))
 250     | e -> raise e
 251  )
 252
 253 let time_lexing ?(profile=true) a =
 254   if profile
 255   then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
 256   else tokens2 a
 257 let tokens ?profile a =
 258   Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
 259
 260
 261 let tokens_of_string string =
 262   let lexbuf = Lexing.from_string string in
 263   try
 264     let rec tokens_s_aux () =
 265       let tok = Lexer_c.token lexbuf in
 266       if TH.is_eof tok
 267       then [tok]
 268       else tok::(tokens_s_aux ())
 269     in
 270     tokens_s_aux ()
 271   with
 272     | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
 273     | e -> raise e
 274
 275
 276 (*****************************************************************************)
 277 (* Parsing, but very basic, no more used *)
 278 (*****************************************************************************)
 279
 280 (*
 281  * !!!Those function use refs, and are not reentrant !!! so take care.
 282  * It use globals defined in Lexer_parser.
 283  *
 284  * update: because now lexer return comments tokens, those functions
 285  * may not work anymore.
 286  *)
 287
 288 let parse file =
 289   let lexbuf = Lexing.from_channel (open_in file) in
 290   let result = Parser_c.main Lexer_c.token lexbuf in
 291   result
 292
 293
 294 let parse_print_error file =
 295   let chan = (open_in file) in
 296   let lexbuf = Lexing.from_channel chan in
 297
 298   let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
 299   try
 300     lexbuf +> Parser_c.main Lexer_c.token
 301   with
 302   | Lexer_c.Lexical s ->
 303       failwith ("lexical error " ^s^ "\n =" ^  error_msg ())
 304   | Parsing.Parse_error ->
 305       failwith ("parse error \n = " ^ error_msg ())
 306   | Semantic_c.Semantic (s, i) ->
 307       failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
 308   | e -> raise e
 309
 310
 311
 312
 313 (*****************************************************************************)
 314 (* Parsing subelements, useful to debug parser *)
 315 (*****************************************************************************)
 316
 317 (*
 318  * !!!Those function use refs, and are not reentrant !!! so take care.
 319  * It use globals defined in Lexer_parser.
 320  *)
 321
 322
 323 (* old:
 324  *   let parse_gen parsefunc s =
 325  *     let lexbuf = Lexing.from_string s in
 326  *     let result = parsefunc Lexer_c.token lexbuf in
 327  *     result
 328  *)
 329
 330 let parse_gen parsefunc s =
 331   let toks = tokens_of_string s +> List.filter TH.is_not_comment in
 332
 333
 334   (* Why use this lexing scheme ? Why not classically give lexer func
 335    * to parser ? Because I now keep comments in lexer. Could
 336    * just do a simple wrapper that when comment ask again for a token,
 337    * but maybe simpler to use cur_tok technique.
 338    *)
 339   let all_tokens = ref toks in
 340   let cur_tok    = ref (List.hd !all_tokens) in
 341
 342   let lexer_function =
 343     (fun _ ->
 344       if TH.is_eof !cur_tok
 345       then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
 346       else
 347         let v = Common.pop2 all_tokens in
 348         cur_tok := v;
 349         !cur_tok
 350     )
 351   in
 352   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 353   let result = parsefunc lexer_function lexbuf_fake in
 354   result
 355
 356
 357 let type_of_string       = parse_gen Parser_c.type_name
 358 let statement_of_string  = parse_gen Parser_c.statement
 359 let expression_of_string = parse_gen Parser_c.expr
 360
 361 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
 362
 363
 364
 365
 366
 367 (*****************************************************************************)
 368 (* Parsing default define macros, usually in a standard.h file *)
 369 (*****************************************************************************)
 370
 371 let extract_macros2 file =
 372   Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
 373     Flag_parsing_c.verbose_lexing := false;
 374     let toks = tokens ~profile:false file in
 375     let toks = Parsing_hacks.fix_tokens_define toks in
 376     Cpp_token_c.extract_macros toks
 377   )
 378
 379 let extract_macros a =
 380   Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
 381
 382
 383 (*****************************************************************************)
 384 (* Helper for main entry point *)
 385 (*****************************************************************************)
 386
 387
 388 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
 389  * possible error recovery. Indeed, they allow to skip some tokens and
 390  * still be able to call again the ocamlyacc parser. It is ugly code
 391  * because we cant modify ocamllex and ocamlyacc. As we want some
 392  * extended lexing tricks, we have to use such refs.
 393  *
 394  * Those refs are now also used for my lalr(k) technique. Indeed They
 395  * store the futur and previous tokens that were parsed, and so
 396  * provide enough context information for powerful lex trick.
 397  *
 398  * - passed_tokens_last_ckp stores the passed tokens since last
 399  *   checkpoint. Used for NotParsedCorrectly and also to build the
 400  *   info_item attached to each program_element.
 401  * - passed_tokens_clean is used for lookahead, in fact for lookback.
 402  * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
 403  *   contain some comments and so would make pattern matching difficult
 404  *   in lookahead. Hence this variable. We would like also to get rid
 405  *   of cpp instruction because sometimes a cpp instruction is between
 406  *   two tokens and makes a pattern matching fail. But lookahead also
 407  *   transform some cpp instruction (in comment) so can't remove them.
 408  *
 409  * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
 410  * whereas passed_tokens_clean and remaining_tokens_clean does not contain
 411  * comment-tokens.
 412  *
 413  * Normally we have:
 414  * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
 415  *    after the call to pop2.
 416  * toks = (reverse passed_tok) ++ remaining_tokens
 417  *     at the and of the lexer_function call.
 418  * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
 419  * At the end of lexer_function call,  cur_tok  overlap  with passed_tok.
 420  *
 421  * convention: I use "tr"  for "tokens refs"
 422  *
 423  * I now also need this lexing trick because the lexer return comment
 424  * tokens.
 425  *)
 426
 427 type tokens_state = {
 428   mutable rest :         Parser_c.token list;
 429   mutable rest_clean :   Parser_c.token list;
 430   mutable current :      Parser_c.token;
 431   (* it's passed since last "checkpoint", not passed from the beginning *)
 432   mutable passed :       Parser_c.token list;
 433   mutable passed_clean : Parser_c.token list;
 434 }
 435
 436 let mk_tokens_state toks =
 437   {
 438     rest       = toks;
 439     rest_clean = (toks +> List.filter TH.is_not_comment);
 440     current    = (List.hd toks);
 441     passed = [];
 442     passed_clean = [];
 443   }
 444
 445
 446
 447 let clone_tokens_state tr =
 448   { rest = tr.rest;
 449     rest_clean = tr.rest_clean;
 450     current = tr.current;
 451     passed = tr.passed;
 452     passed_clean = tr.passed_clean;
 453   }
 454 let copy_tokens_state ~src ~dst =
 455   dst.rest <- src.rest;
 456   dst.rest_clean <- src.rest_clean;
 457   dst.current <- src.current;
 458   dst.passed <- src.passed;
 459   dst.passed_clean <-  src.passed_clean;
 460   ()
 461
 462 (* todo? agglomerate the x##b ? *)
 463 let rec filter_noise n xs =
 464   match n, xs with
 465   | _, [] -> []
 466   | 0, xs -> xs
 467   | n, x::xs ->
 468       (match x with
 469       | Parser_c.TMacroAttr _ ->
 470           filter_noise (n-1) xs
 471       | _ ->
 472           x::filter_noise (n-1) xs
 473       )
 474
 475 let clean_for_lookahead xs =
 476   match xs with
 477   | [] -> []
 478   | [x] -> [x]
 479   | x::xs ->
 480       x::filter_noise 10 xs
 481
 482
 483
 484 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
 485  * tr means token refs.
 486  *)
 487 let rec lexer_function ~pass tr = fun lexbuf ->
 488   match tr.rest with
 489   | [] -> pr2_err "ALREADY AT END"; tr.current
 490   | v::xs ->
 491     tr.rest <- xs;
 492     tr.current <- v;
 493
 494     if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
 495
 496     if TH.is_comment v
 497     then begin
 498       tr.passed <- v::tr.passed;
 499       lexer_function ~pass tr lexbuf
 500     end
 501     else begin
 502       let x = List.hd tr.rest_clean  in
 503       tr.rest_clean <- List.tl tr.rest_clean;
 504       assert (x =*= v);
 505
 506       (match v with
 507
 508       (* fix_define1.
 509        *
 510        * Why not in parsing_hacks lookahead and do passing like
 511        * I do for some ifdef directives ? Because here I also need to
 512        * generate some tokens sometimes and so I need access to the
 513        * tr.passed, tr.rest, etc.
 514        *)
 515       | Parser_c.TDefine (tok) ->
 516           if not (LP.current_context () =*= LP.InTopLevel) &&
 517             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 518           then begin
 519             incr Stat.nDefinePassing;
 520             pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
 521             let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 522             in
 523             tr.passed <- v'::tr.passed;
 524             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 525             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 526             lexer_function ~pass tr lexbuf
 527           end
 528           else begin
 529             tr.passed <- v::tr.passed;
 530             tr.passed_clean <- v::tr.passed_clean;
 531             v
 532           end
 533
 534       | Parser_c.TInclude (includes, filename, inifdef, info) ->
 535           if not (LP.current_context () =*= LP.InTopLevel)  &&
 536             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 537           then begin
 538             incr Stat.nIncludePassing;
 539             pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
 540             let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
 541             tr.passed <- v::tr.passed;
 542             lexer_function ~pass tr lexbuf
 543           end
 544           else begin
 545             let (v,new_tokens) =
 546               Parsing_hacks.tokens_include (info, includes, filename, inifdef) in
 547             let new_tokens_clean =
 548               new_tokens +> List.filter TH.is_not_comment  in
 549
 550             tr.passed <- v::tr.passed;
 551             tr.passed_clean <- v::tr.passed_clean;
 552             tr.rest <- new_tokens ++ tr.rest;
 553             tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
 554             v
 555           end
 556
 557       | _ ->
 558
 559           (* typedef_fix1 *)
 560           let v = match v with
 561             | Parser_c.TIdent (s, ii) ->
 562                 if
 563                   LP.is_typedef s &&
 564                     not (!Flag_parsing_c.disable_add_typedef) &&
 565                     pass =|= 1
 566                 then Parser_c.TypedefIdent (s, ii)
 567                 else Parser_c.TIdent (s, ii)
 568             | x -> x
 569           in
 570
 571           let v = Parsing_hacks.lookahead ~pass
 572             (clean_for_lookahead (v::tr.rest_clean))
 573             tr.passed_clean in
 574
 575           tr.passed <- v::tr.passed;
 576
 577           (* the lookahead may have changed the status of the token and
 578            * consider it as a comment, for instance some #include are
 579            * turned into comments, hence this code. *)
 580           match v with
 581           | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
 582           | v ->
 583               tr.passed_clean <- v::tr.passed_clean;
 584               v
 585       )
 586     end
 587
 588
 589 let max_pass = 4
 590
 591
 592 let get_one_elem ~pass tr (file, filelines) =
 593
 594   if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
 595   then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
 596
 597   (* normally have to do that only when come from an exception in which
 598    * case the dt() may not have been done
 599    * TODO but if was in scoped scope ? have to let only the last scope
 600    * so need do a LP.lexer_reset_typedef ();
 601    *)
 602   LP.enable_typedef();
 603   LP._lexer_hint := (LP.default_hint ());
 604   LP.save_typedef_state();
 605
 606   tr.passed <- [];
 607
 608   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 609
 610   (try
 611       (* -------------------------------------------------- *)
 612       (* Call parser *)
 613       (* -------------------------------------------------- *)
 614       Common.profile_code_exclusif "YACC" (fun () ->
 615         Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
 616       )
 617     with e ->
 618       LP.restore_typedef_state();
 619
 620       (* must keep here, before the code that adjusts the tr fields *)
 621       let line_error = TH.line_of_tok tr.current in
 622
 623       let passed_before_error = tr.passed in
 624       let current = tr.current in
 625
 626       (*  error recovery, go to next synchro point *)
 627       let (passed', rest') =
 628         Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
 629       tr.rest <- rest';
 630       tr.passed <- passed';
 631
 632       tr.current <- List.hd passed';
 633       tr.passed_clean <- [];           (* enough ? *)
 634       (* with error recovery, rest and rest_clean may not be in sync *)
 635       tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
 636
 637
 638       let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
 639       Right (info_of_bads,  line_error,
 640             tr.passed, passed_before_error,
 641             current, e)
 642   )
 643
 644
 645
 646 (* Macro problem recovery *)
 647 (* used by the multi-pass error recovery expand-on-demand *)
 648 (*
 649 val candidate_macros_in_passed:
 650   defs: (string, define_def) Hashtbl.t ->
 651   Parser_c.token list -> (string * define_def) list
 652 *)
 653
 654 let candidate_macros_in_passed2 ~defs passed  =
 655   let res = ref [] in
 656   let res2 = ref [] in
 657
 658   passed +> List.iter (function
 659   | Parser_c.TIdent (s,_)
 660    (* bugfix: may have to undo some infered things *)
 661   | Parser_c.TMacroIterator (s,_)
 662   | Parser_c.TypedefIdent (s,_)
 663     ->
 664       (match Common.hfind_option s defs with
 665       | Some def ->
 666           if s ==~ Parsing_hacks.regexp_macro
 667           then
 668             (* pr2 (spf "candidate: %s" s); *)
 669             Common.push2 (s, def) res
 670           else
 671             Common.push2 (s, def) res2
 672         | None -> ()
 673         )
 674
 675   | _ -> ()
 676   );
 677   if null !res
 678   then !res2
 679   else !res
 680
 681 let candidate_macros_in_passed ~defs b =
 682   Common.profile_code "MACRO managment" (fun () ->
 683     candidate_macros_in_passed2 ~defs b)
 684
 685
 686
 687
 688
 689 let find_optional_macro_to_expand2 ~defs toks =
 690
 691   let defs = Common.hash_of_list defs in
 692
 693   let toks = toks +> Common.map (function
 694
 695     (* special cases to undo *)
 696     | Parser_c.TMacroIterator (s, ii) ->
 697         if Hashtbl.mem defs s
 698         then Parser_c.TIdent (s, ii)
 699         else Parser_c.TMacroIterator (s, ii)
 700
 701     | Parser_c.TypedefIdent (s, ii) ->
 702         if Hashtbl.mem defs s
 703         then Parser_c.TIdent (s, ii)
 704         else Parser_c.TypedefIdent (s, ii)
 705
 706     | x -> x
 707   ) in
 708
 709   let tokens = toks in
 710   Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
 711
 712   (* just calling apply_macro_defs and having a specialized version
 713    * of the code in fix_tokens_cpp is not enough as some work such
 714    * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
 715    * will not get the chance to be run on the new expanded tokens.
 716    * Hence even if it's expensive, it's currently better to
 717    * just call directly fix_tokens_cpp again here.
 718
 719   let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
 720   let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
 721   let paren_grouped = TV.mk_parenthised  cleaner in
 722   Cpp_token_c.apply_macro_defs
 723     ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
 724     ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
 725     defs paren_grouped;
 726   (* because the before field is used by apply_macro_defs *)
 727   tokens2 := TV.rebuild_tokens_extented !tokens2;
 728   Parsing_hacks.insert_virtual_positions
 729     (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
 730   *)
 731 let find_optional_macro_to_expand ~defs a =
 732     Common.profile_code "MACRO managment" (fun () ->
 733       find_optional_macro_to_expand2 ~defs a)
 734
 735
 736
 737
 738
 739 (*****************************************************************************)
 740 (* Main entry points *)
 741 (*****************************************************************************)
 742
 743 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 744   ref (Hashtbl.create 101)
 745
 746 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 747   ref (Hashtbl.create 101)
 748
 749
 750 (* can not be put in parsing_hack, cos then mutually recursive problem as
 751  * we also want to parse the standard.h file.
 752  *)
 753 let init_defs_macros std_h =
 754   if not (Common.lfile_exists std_h)
 755   then pr2 ("warning: Can't find default macro file: " ^ std_h)
 756   else begin
 757     pr2 ("init_defs: " ^ std_h);
 758     _defs := Common.hash_of_list (extract_macros std_h);
 759   end
 760
 761 let init_defs_builtins file_h =
 762   if not (Common.lfile_exists file_h)
 763   then pr2 ("warning: Can't find macro file: " ^ file_h)
 764   else begin
 765     pr2 ("init_defs_builtins: " ^ file_h);
 766     _defs_builtins :=
 767       Common.hash_of_list (extract_macros file_h);
 768   end
 769
 770
 771
 772 type info_item =  string * Parser_c.token list
 773
 774 type program2 = toplevel2 list
 775      and toplevel2 = Ast_c.toplevel * info_item
 776
 777 let program_of_program2 xs =
 778   xs +> List.map fst
 779
 780 let with_program2 f program2 =
 781   program2
 782   +> Common.unzip
 783   +> (fun (program, infos) ->
 784     f program, infos
 785   )
 786   +> Common.uncurry Common.zip
 787
 788
 789
 790
 791
 792
 793 (* note: as now we go in 2 passes, there is first all the error message of
 794  * the lexer, and then the error of the parser. It is not anymore
 795  * interwinded.
 796  *
 797  * !!!This function use refs, and is not reentrant !!! so take care.
 798  * It use globals defined in Lexer_parser and also the _defs global
 799  * in parsing_hack.ml.
 800  *
 801  * This function uses internally some semi globals in the
 802  * tokens_stat record and parsing_stat record.
 803  *)
 804
 805 let parse_print_error_heuristic2 file =
 806
 807   let filelines = Common.cat_array file in
 808   let stat = Parsing_stat.default_stat file in
 809
 810   (* -------------------------------------------------- *)
 811   (* call lexer and get all the tokens *)
 812   (* -------------------------------------------------- *)
 813   LP.lexer_reset_typedef();
 814   Parsing_hacks.ifdef_paren_cnt := 0;
 815
 816   let toks_orig = tokens file in
 817   let toks = Parsing_hacks.fix_tokens_define toks_orig in
 818   let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
 819
 820   (* expand macros on demand trick, preparation phase *)
 821   let macros =
 822     Common.profile_code "MACRO mgmt prep 1" (fun () ->
 823       let macros = Hashtbl.copy !_defs in
 824       (* include also builtins as some macros may generate some builtins too
 825        * like __decl_spec or __stdcall
 826        *)
 827       !_defs_builtins +> Hashtbl.iter (fun s def ->
 828         Hashtbl.replace macros   s def;
 829       );
 830       macros
 831     )
 832   in
 833   Common.profile_code "MACRO mgmt prep 2" (fun () ->
 834     let local_macros = extract_macros file in
 835     local_macros +> List.iter (fun (s, def) ->
 836       Hashtbl.replace macros   s def;
 837     );
 838   );
 839
 840   let tr = mk_tokens_state toks in
 841
 842   let rec loop tr =
 843
 844     (* todo?: I am not sure that it represents current_line, cos maybe
 845      * tr.current partipated in the previous parsing phase, so maybe tr.current
 846      * is not the first token of the next parsing phase. Same with checkpoint2.
 847      * It would be better to record when we have a } or ; in parser.mly,
 848      *  cos we know that they are the last symbols of external_declaration2.
 849      *
 850      * bugfix: may not be equal to 'file' as after macro expansions we can
 851      * start to parse a new entity from the body of a macro, for instance
 852      * when parsing a define_machine() body, cf standard.h
 853      *)
 854     let checkpoint = TH.line_of_tok tr.current in
 855     let checkpoint_file = TH.file_of_tok tr.current in
 856
 857     (* call the parser *)
 858     let elem =
 859       let pass1 =
 860         Common.profile_code "Parsing: 1st pass" (fun () ->
 861           get_one_elem ~pass:1 tr (file, filelines)
 862         ) in
 863       match pass1 with
 864       | Left e -> Left e
 865       | Right (info,line_err, passed, passed_before_error, cur, exn) ->
 866           if !Flag_parsing_c.disable_multi_pass
 867           then pass1
 868           else begin
 869             Common.profile_code "Parsing: multi pass" (fun () ->
 870
 871             pr2_err "parsing pass2: try again";
 872             let toks = List.rev passed ++ tr.rest in
 873             let new_tr = mk_tokens_state toks in
 874             copy_tokens_state ~src:new_tr ~dst:tr;
 875             let passx = get_one_elem ~pass:2 tr (file, filelines) in
 876
 877             (match passx with
 878             | Left e -> passx
 879             | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 880                 let candidates =
 881                   candidate_macros_in_passed ~defs:macros passed
 882                 in
 883
 884
 885                 if is_define_passed passed || null candidates
 886                 then passx
 887                 else begin
 888                   (* todo factorize code *)
 889
 890                   pr2_err "parsing pass3: try again";
 891                   let toks = List.rev passed ++ tr.rest in
 892                   let toks' =
 893                     find_optional_macro_to_expand ~defs:candidates toks in
 894                   let new_tr = mk_tokens_state toks' in
 895                   copy_tokens_state ~src:new_tr ~dst:tr;
 896                   let passx = get_one_elem ~pass:3 tr (file, filelines) in
 897
 898                   (match passx with
 899                   | Left e -> passx
 900                   | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 901                       pr2_err "parsing pass4: try again";
 902
 903                       let candidates =
 904                         candidate_macros_in_passed
 905                           ~defs:macros passed
 906                       in
 907
 908                       let toks = List.rev passed ++ tr.rest in
 909                       let toks' =
 910                       find_optional_macro_to_expand ~defs:candidates toks in
 911                       let new_tr = mk_tokens_state toks' in
 912                       copy_tokens_state ~src:new_tr ~dst:tr;
 913                       let passx = get_one_elem ~pass:4 tr (file, filelines) in
 914                       passx
 915                   )
 916                  end
 917             )
 918             )
 919           end
 920     in
 921
 922
 923     (* again not sure if checkpoint2 corresponds to end of bad region *)
 924     let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
 925     let checkpoint2_file = TH.file_of_tok tr.current in
 926
 927     let diffline =
 928       if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
 929       then (checkpoint2 - checkpoint)
 930       else 0
 931         (* TODO? so if error come in middle of something ? where the
 932          * start token was from original file but synchro found in body
 933          * of macro ? then can have wrong number of lines stat.
 934          * Maybe simpler just to look at tr.passed and count
 935          * the lines in the token from the correct file ?
 936          *)
 937     in
 938     let info = mk_info_item file (List.rev tr.passed) in
 939
 940     (* some stat updates *)
 941     stat.Stat.commentized <-
 942       stat.Stat.commentized + count_lines_commentized (snd info);
 943
 944     let elem =
 945       match elem with
 946       | Left e ->
 947           stat.Stat.correct <- stat.Stat.correct + diffline;
 948           e
 949       | Right (info_of_bads, line_error, toks_of_bads,
 950               _passed_before_error, cur, exn) ->
 951
 952           let was_define = is_define_passed tr.passed in
 953
 954           if was_define && !Flag_parsing_c.filter_msg_define_error
 955           then ()
 956           else begin
 957
 958             (match exn with
 959             | Lexer_c.Lexical _
 960             | Parsing.Parse_error
 961             | Semantic_c.Semantic _ -> ()
 962             | e -> raise e
 963             );
 964
 965             if !Flag_parsing_c.show_parsing_error
 966             then begin
 967               (match exn with
 968               (* Lexical is not anymore launched I think *)
 969               | Lexer_c.Lexical s ->
 970                   pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
 971               | Parsing.Parse_error ->
 972                   pr2 ("parse error \n = " ^ error_msg_tok cur)
 973               | Semantic_c.Semantic (s, i) ->
 974                   pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
 975               | e -> raise Impossible
 976               );
 977               (* bugfix: *)
 978               if (checkpoint_file =$= checkpoint2_file) &&
 979                 checkpoint_file =$= file
 980               then print_bad line_error (checkpoint, checkpoint2) filelines
 981               else pr2 "PB: bad: but on tokens not from original file"
 982             end;
 983
 984
 985             let pbline =
 986               toks_of_bads
 987               +> Common.filter (TH.is_same_line_or_close line_error)
 988               +> Common.filter TH.is_ident_like
 989             in
 990             let error_info =
 991               (pbline +> List.map TH.str_of_tok), line_error
 992             in
 993             stat.Stat.problematic_lines <-
 994               error_info::stat.Stat.problematic_lines;
 995
 996           end;
 997
 998           if was_define && !Flag_parsing_c.filter_define_error
 999           then stat.Stat.correct <- stat.Stat.correct + diffline
1000           else stat.Stat.bad     <- stat.Stat.bad     + diffline;
1001
1002           Ast_c.NotParsedCorrectly info_of_bads
1003     in
1004
1005     (match elem with
1006     | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1007     | xs -> (xs, info):: loop tr (* recurse *)
1008     )
1009   in
1010   let v = loop tr in
1011   let v = with_program2 Parsing_consistency_c.consistency_checking v in
1012   (v, stat)
1013
1014
1015 let time_total_parsing a  =
1016   Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1017
1018 let parse_print_error_heuristic a  =
1019   Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1020
1021
1022 (* alias *)
1023 let parse_c_and_cpp a = parse_print_error_heuristic a
1024
1025 (*****************************************************************************)
1026 (* Same but faster cos memoize stuff *)
1027 (*****************************************************************************)
1028 let parse_cache file =
1029   if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1030   else
1031   let _ = pr2 "TOFIX" in
1032   let need_no_changed_files =
1033     (* should use Sys.argv.(0), would be safer. *)
1034
1035     [
1036       (* TOFIX
1037       Config.path ^ "/parsing_c/c_parser.cma";
1038       (* we may also depend now on the semantic patch because
1039          the SP may use macro and so we will disable some of the
1040          macro expansions from standard.h.
1041       *)
1042       !Config.std_h;
1043       *)
1044     ]
1045   in
1046   let need_no_changed_variables =
1047     (* could add some of the flags of flag_parsing_c.ml *)
1048     []
1049   in
1050   Common.cache_computation_robust
1051     file ".ast_raw"
1052     (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1053     (fun () -> parse_print_error_heuristic file)
1054
1055
1056
1057 (*****************************************************************************)
1058 (* Some special cases *)
1059 (*****************************************************************************)
1060
1061 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1062   let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1063   Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1064   let program = parse_c_and_cpp tmpfile +> fst in
1065   program +> Common.find_some (fun (e,_) ->
1066     match e with
1067     | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1068     | _ -> None
1069   )
1070
1071 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1072   let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1073   Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1074   let program = parse_c_and_cpp tmpfile +> fst in
1075   program +> Common.find_some (fun (e,_) ->
1076     match e with
1077     | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1078         (match compound with
1079         | [Ast_c.StmtElem st] ->
1080             (match Ast_c.unwrap_st st with
1081             | Ast_c.ExprStatement (Some e) -> Some e
1082             | _ -> None
1083             )
1084         | _ -> None
1085         )
1086     | _ -> None
1087   )