parsing_c/parse_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *)
  15
  16 open Common
  17
  18 module TH = Token_helpers
  19 module LP = Lexer_parser
  20
  21 module Stat = Parsing_stat
  22
  23 (*****************************************************************************)
  24 (* Wrappers *)
  25 (*****************************************************************************)
  26 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
  27
  28 (*****************************************************************************)
  29 (* Helpers *)
  30 (*****************************************************************************)
  31
  32 let lexbuf_to_strpos lexbuf     =
  33   (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
  34
  35 let token_to_strpos tok =
  36   (TH.str_of_tok tok, TH.pos_of_tok tok)
  37
  38
  39 let mk_info_item2 filename toks =
  40   let buf = Buffer.create 100 in
  41   let s =
  42     (* old: get_slice_file filename (line1, line2) *)
  43     begin
  44       toks +> List.iter (fun tok ->
  45         match TH.pinfo_of_tok tok with
  46         | Ast_c.OriginTok _ ->
  47             Buffer.add_string buf (TH.str_of_tok tok)
  48         | Ast_c.AbstractLineTok _ ->
  49             raise Impossible
  50         | _ -> ()
  51       );
  52       Buffer.contents buf
  53     end
  54   in
  55   (s, toks)
  56
  57 let mk_info_item a b =
  58   Common.profile_code "C parsing.mk_info_item"
  59     (fun () -> mk_info_item2 a b)
  60
  61
  62 let info_same_line line xs =
  63   xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
  64
  65
  66 (* move in cpp_token_c ? *)
  67 let is_define_passed passed =
  68   let xs = passed +> List.rev +> List.filter TH.is_not_comment in
  69   if List.length xs >= 2
  70   then
  71     (match Common.head_middle_tail xs with
  72     | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
  73         true
  74     | _ -> false
  75     )
  76   else begin
  77     pr2_err "WEIRD: length list of error recovery tokens < 2 ";
  78     false
  79   end
  80
  81
  82 (*****************************************************************************)
  83 (* Error diagnostic  *)
  84 (*****************************************************************************)
  85
  86 let error_msg_tok tok =
  87   let file = TH.file_of_tok tok in
  88   if !Flag_parsing_c.verbose_parsing
  89   then Common.error_message file (token_to_strpos tok)
  90   else ("error in " ^ file  ^ "; set verbose_parsing for more info")
  91
  92
  93 let print_bad line_error (start_line, end_line) filelines  =
  94   begin
  95     pr2 ("badcount: " ^ i_to_s (end_line - start_line));
  96
  97     for i = start_line to end_line do
  98       let line = filelines.(i) in
  99
 100       if i =|= line_error
 101       then  pr2 ("BAD:!!!!!" ^ " " ^ line)
 102       else  pr2 ("bad:" ^ " " ^      line)
 103     done
 104   end
 105
 106
 107 (*****************************************************************************)
 108 (* Stats on what was passed/commentized  *)
 109 (*****************************************************************************)
 110
 111 let commentized xs = xs +> Common.tail_map_filter (function
 112   | Parser_c.TCommentCpp (cppkind, ii) ->
 113       let s = Ast_c.str_of_info ii in
 114       let legal_passing =
 115         match !Flag_parsing_c.filter_passed_level with
 116         | 0 -> false
 117         | 1 ->
 118             List.mem cppkind [Token_c.CppAttr]
 119             ||
 120             (s =~ "__.*")
 121         | 2 ->
 122             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
 123             ||
 124             (s =~ "__.*")
 125         | 3 ->
 126             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
 127             ||
 128             (s =~ "__.*")
 129         | 4 ->
 130             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
 131             ||
 132             (s =~ "__.*")
 133
 134
 135         | 5 ->
 136             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
 137             ||
 138             (s =~ "__.*")
 139
 140
 141
 142
 143         | _ -> failwith "not valid level passing number"
 144       in
 145       if legal_passing then None else Some (ii.Ast_c.pinfo)
 146
 147         (*
 148         | Ast_c.CppOther ->
 149             (match s with
 150             | s when s =~ "KERN_.*" -> None
 151             | s when s =~ "__.*" -> None
 152             | _ ->
 153                 Some (ii.Ast_c.pinfo)
 154             )
 155         *)
 156
 157
 158   | Parser_c.TCommentMisc ii
 159   | Parser_c.TAction ii
 160     ->
 161       Some (ii.Ast_c.pinfo)
 162   | _ ->
 163       None
 164  )
 165
 166 let count_lines_commentized xs =
 167   let line = ref (-1) in
 168   let count = ref 0 in
 169   begin
 170     commentized xs +>
 171     List.iter
 172       (function
 173           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 174             let newline = pinfo.Common.line in
 175             if newline <> !line
 176             then begin
 177               line := newline;
 178               incr count
 179             end
 180         | _ -> ());
 181     !count
 182   end
 183
 184
 185
 186 let print_commentized xs =
 187   let line = ref (-1) in
 188   begin
 189     let ys = commentized xs in
 190     ys +>
 191     List.iter
 192       (function
 193           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 194             let newline = pinfo.Common.line in
 195             let s = pinfo.Common.str in
 196             let s = Str.global_substitute
 197                 (Str.regexp "\n") (fun s -> "") s
 198             in
 199             if newline =|= !line
 200             then prerr_string (s ^ " ")
 201             else begin
 202               if !line =|= -1
 203               then pr2_no_nl "passed:"
 204               else pr2_no_nl "\npassed:";
 205               line := newline;
 206               pr2_no_nl (s ^ " ");
 207             end
 208         | _ -> ());
 209     if not (null ys) then pr2 "";
 210   end
 211
 212
 213
 214
 215 (*****************************************************************************)
 216 (* Lexing only *)
 217 (*****************************************************************************)
 218
 219 (* called by parse_print_error_heuristic *)
 220 let tokens2 file =
 221  let table     = Common.full_charpos_to_pos_large file in
 222
 223  Common.with_open_infile file (fun chan ->
 224   let lexbuf = Lexing.from_channel chan in
 225   try
 226     let rec tokens_aux acc =
 227       let tok = Lexer_c.token lexbuf in
 228       (* fill in the line and col information *)
 229       let tok = tok +> TH.visitor_info_of_tok (fun ii ->
 230         { ii with Ast_c.pinfo=
 231           (* could assert pinfo.filename = file ? *)
 232           match Ast_c.pinfo_of_info ii with
 233             Ast_c.OriginTok pi ->
 234               Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
 235           | Ast_c.ExpandedTok (pi,vpi) ->
 236               Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
 237           | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
 238           | Ast_c.AbstractLineTok pi -> failwith "should not occur"
 239       })
 240       in
 241
 242       if TH.is_eof tok
 243       then List.rev (tok::acc)
 244       else tokens_aux (tok::acc)
 245     in
 246     tokens_aux []
 247   with
 248     | Lexer_c.Lexical s ->
 249         failwith ("lexical error " ^ s ^ "\n =" ^
 250                   (Common.error_message file (lexbuf_to_strpos lexbuf)))
 251     | e -> raise e
 252  )
 253
 254 let time_lexing ?(profile=true) a =
 255   if profile
 256   then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
 257   else tokens2 a
 258 let tokens ?profile a =
 259   Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
 260
 261
 262 let tokens_of_string string =
 263   let lexbuf = Lexing.from_string string in
 264   try
 265     let rec tokens_s_aux () =
 266       let tok = Lexer_c.token lexbuf in
 267       if TH.is_eof tok
 268       then [tok]
 269       else tok::(tokens_s_aux ())
 270     in
 271     tokens_s_aux ()
 272   with
 273     | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
 274     | e -> raise e
 275
 276
 277 (*****************************************************************************)
 278 (* Parsing, but very basic, no more used *)
 279 (*****************************************************************************)
 280
 281 (*
 282  * !!!Those function use refs, and are not reentrant !!! so take care.
 283  * It use globals defined in Lexer_parser.
 284  *
 285  * update: because now lexer return comments tokens, those functions
 286  * may not work anymore.
 287  *)
 288
 289 let parse file =
 290   let lexbuf = Lexing.from_channel (open_in file) in
 291   let result = Parser_c.main Lexer_c.token lexbuf in
 292   result
 293
 294
 295 let parse_print_error file =
 296   let chan = (open_in file) in
 297   let lexbuf = Lexing.from_channel chan in
 298
 299   let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
 300   try
 301     lexbuf +> Parser_c.main Lexer_c.token
 302   with
 303   | Lexer_c.Lexical s ->
 304       failwith ("lexical error " ^s^ "\n =" ^  error_msg ())
 305   | Parsing.Parse_error ->
 306       failwith ("parse error \n = " ^ error_msg ())
 307   | Semantic_c.Semantic (s, i) ->
 308       failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
 309   | e -> raise e
 310
 311
 312
 313
 314 (*****************************************************************************)
 315 (* Parsing subelements, useful to debug parser *)
 316 (*****************************************************************************)
 317
 318 (*
 319  * !!!Those function use refs, and are not reentrant !!! so take care.
 320  * It use globals defined in Lexer_parser.
 321  *)
 322
 323
 324 (* old:
 325  *   let parse_gen parsefunc s =
 326  *     let lexbuf = Lexing.from_string s in
 327  *     let result = parsefunc Lexer_c.token lexbuf in
 328  *     result
 329  *)
 330
 331 let parse_gen parsefunc s =
 332   let toks = tokens_of_string s +> List.filter TH.is_not_comment in
 333
 334
 335   (* Why use this lexing scheme ? Why not classically give lexer func
 336    * to parser ? Because I now keep comments in lexer. Could
 337    * just do a simple wrapper that when comment ask again for a token,
 338    * but maybe simpler to use cur_tok technique.
 339    *)
 340   let all_tokens = ref toks in
 341   let cur_tok    = ref (List.hd !all_tokens) in
 342
 343   let lexer_function =
 344     (fun _ ->
 345       if TH.is_eof !cur_tok
 346       then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
 347       else
 348         let v = Common.pop2 all_tokens in
 349         cur_tok := v;
 350         !cur_tok
 351     )
 352   in
 353   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 354   let result = parsefunc lexer_function lexbuf_fake in
 355   result
 356
 357
 358 let type_of_string       = parse_gen Parser_c.type_name
 359 let statement_of_string  = parse_gen Parser_c.statement
 360 let expression_of_string = parse_gen Parser_c.expr
 361
 362 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
 363
 364
 365
 366
 367
 368 (*****************************************************************************)
 369 (* Parsing default define macros, usually in a standard.h file *)
 370 (*****************************************************************************)
 371
 372 let extract_macros2 file =
 373   Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
 374     Flag_parsing_c.verbose_lexing := false;
 375     let toks = tokens ~profile:false file in
 376     let toks = Parsing_hacks.fix_tokens_define toks in
 377     Cpp_token_c.extract_macros toks
 378   )
 379
 380 let extract_macros a =
 381   Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
 382
 383
 384 (*****************************************************************************)
 385 (* Helper for main entry point *)
 386 (*****************************************************************************)
 387
 388
 389 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
 390  * possible error recovery. Indeed, they allow to skip some tokens and
 391  * still be able to call again the ocamlyacc parser. It is ugly code
 392  * because we cant modify ocamllex and ocamlyacc. As we want some
 393  * extended lexing tricks, we have to use such refs.
 394  *
 395  * Those refs are now also used for my lalr(k) technique. Indeed They
 396  * store the futur and previous tokens that were parsed, and so
 397  * provide enough context information for powerful lex trick.
 398  *
 399  * - passed_tokens_last_ckp stores the passed tokens since last
 400  *   checkpoint. Used for NotParsedCorrectly and also to build the
 401  *   info_item attached to each program_element.
 402  * - passed_tokens_clean is used for lookahead, in fact for lookback.
 403  * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
 404  *   contain some comments and so would make pattern matching difficult
 405  *   in lookahead. Hence this variable. We would like also to get rid
 406  *   of cpp instruction because sometimes a cpp instruction is between
 407  *   two tokens and makes a pattern matching fail. But lookahead also
 408  *   transform some cpp instruction (in comment) so can't remove them.
 409  *
 410  * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
 411  * whereas passed_tokens_clean and remaining_tokens_clean does not contain
 412  * comment-tokens.
 413  *
 414  * Normally we have:
 415  * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
 416  *    after the call to pop2.
 417  * toks = (reverse passed_tok) ++ remaining_tokens
 418  *     at the and of the lexer_function call.
 419  * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
 420  * At the end of lexer_function call,  cur_tok  overlap  with passed_tok.
 421  *
 422  * convention: I use "tr"  for "tokens refs"
 423  *
 424  * I now also need this lexing trick because the lexer return comment
 425  * tokens.
 426  *)
 427
 428 type tokens_state = {
 429   mutable rest :         Parser_c.token list;
 430   mutable rest_clean :   Parser_c.token list;
 431   mutable current :      Parser_c.token;
 432   (* it's passed since last "checkpoint", not passed from the beginning *)
 433   mutable passed :       Parser_c.token list;
 434   mutable passed_clean : Parser_c.token list;
 435 }
 436
 437 let mk_tokens_state toks =
 438   {
 439     rest       = toks;
 440     rest_clean = (toks +> List.filter TH.is_not_comment);
 441     current    = (List.hd toks);
 442     passed = [];
 443     passed_clean = [];
 444   }
 445
 446
 447
 448 let clone_tokens_state tr =
 449   { rest = tr.rest;
 450     rest_clean = tr.rest_clean;
 451     current = tr.current;
 452     passed = tr.passed;
 453     passed_clean = tr.passed_clean;
 454   }
 455 let copy_tokens_state ~src ~dst =
 456   dst.rest <- src.rest;
 457   dst.rest_clean <- src.rest_clean;
 458   dst.current <- src.current;
 459   dst.passed <- src.passed;
 460   dst.passed_clean <-  src.passed_clean;
 461   ()
 462
 463 (* todo? agglomerate the x##b ? *)
 464 let rec filter_noise n xs =
 465   match n, xs with
 466   | _, [] -> []
 467   | 0, xs -> xs
 468   | n, x::xs ->
 469       (match x with
 470       | Parser_c.TMacroAttr _ ->
 471           filter_noise (n-1) xs
 472       | _ ->
 473           x::filter_noise (n-1) xs
 474       )
 475
 476 let clean_for_lookahead xs =
 477   match xs with
 478   | [] -> []
 479   | [x] -> [x]
 480   | x::xs ->
 481       x::filter_noise 10 xs
 482
 483
 484
 485 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
 486  * tr means token refs.
 487  *)
 488 let rec lexer_function ~pass tr = fun lexbuf ->
 489   match tr.rest with
 490   | [] -> pr2_err "ALREADY AT END"; tr.current
 491   | v::xs ->
 492     tr.rest <- xs;
 493     tr.current <- v;
 494
 495     if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
 496
 497     if TH.is_comment v
 498     then begin
 499       tr.passed <- v::tr.passed;
 500       lexer_function ~pass tr lexbuf
 501     end
 502     else begin
 503       let x = List.hd tr.rest_clean  in
 504       tr.rest_clean <- List.tl tr.rest_clean;
 505       assert (x =*= v);
 506
 507       (match v with
 508
 509       (* fix_define1.
 510        *
 511        * Why not in parsing_hacks lookahead and do passing like
 512        * I do for some ifdef directives ? Because here I also need to
 513        * generate some tokens sometimes and so I need access to the
 514        * tr.passed, tr.rest, etc.
 515        *)
 516       | Parser_c.TDefine (tok) ->
 517           if not (LP.current_context () =*= LP.InTopLevel) &&
 518             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 519           then begin
 520             incr Stat.nDefinePassing;
 521             pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
 522             let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 523             in
 524             tr.passed <- v'::tr.passed;
 525             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 526             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 527             lexer_function ~pass tr lexbuf
 528           end
 529           else begin
 530             tr.passed <- v::tr.passed;
 531             tr.passed_clean <- v::tr.passed_clean;
 532             v
 533           end
 534
 535       | Parser_c.TInclude (includes, filename, inifdef, info) ->
 536           if not (LP.current_context () =*= LP.InTopLevel)  &&
 537             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 538           then begin
 539             incr Stat.nIncludePassing;
 540             pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
 541             let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
 542             tr.passed <- v::tr.passed;
 543             lexer_function ~pass tr lexbuf
 544           end
 545           else begin
 546             let (v,new_tokens) =
 547               Parsing_hacks.tokens_include (info, includes, filename, inifdef) in
 548             let new_tokens_clean =
 549               new_tokens +> List.filter TH.is_not_comment  in
 550
 551             tr.passed <- v::tr.passed;
 552             tr.passed_clean <- v::tr.passed_clean;
 553             tr.rest <- new_tokens ++ tr.rest;
 554             tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
 555             v
 556           end
 557
 558       | _ ->
 559
 560           (* typedef_fix1 *)
 561           let v = match v with
 562             | Parser_c.TIdent (s, ii) ->
 563                 if
 564                   LP.is_typedef s &&
 565                     not (!Flag_parsing_c.disable_add_typedef) &&
 566                     pass =|= 1
 567                 then Parser_c.TypedefIdent (s, ii)
 568                 else Parser_c.TIdent (s, ii)
 569             | x -> x
 570           in
 571
 572           let v = Parsing_hacks.lookahead ~pass
 573             (clean_for_lookahead (v::tr.rest_clean))
 574             tr.passed_clean in
 575
 576           tr.passed <- v::tr.passed;
 577
 578           (* the lookahead may have changed the status of the token and
 579            * consider it as a comment, for instance some #include are
 580            * turned into comments, hence this code. *)
 581           match v with
 582           | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
 583           | v ->
 584               tr.passed_clean <- v::tr.passed_clean;
 585               v
 586       )
 587     end
 588
 589
 590 let max_pass = 4
 591
 592
 593 let get_one_elem ~pass tr (file, filelines) =
 594
 595   if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
 596   then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
 597
 598   (* normally have to do that only when come from an exception in which
 599    * case the dt() may not have been done
 600    * TODO but if was in scoped scope ? have to let only the last scope
 601    * so need do a LP.lexer_reset_typedef ();
 602    *)
 603   LP.enable_typedef();
 604   LP._lexer_hint := (LP.default_hint ());
 605   LP.save_typedef_state();
 606
 607   tr.passed <- [];
 608
 609   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 610
 611   (try
 612       (* -------------------------------------------------- *)
 613       (* Call parser *)
 614       (* -------------------------------------------------- *)
 615       Common.profile_code_exclusif "YACC" (fun () ->
 616         Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
 617       )
 618     with e ->
 619       LP.restore_typedef_state();
 620
 621       (* must keep here, before the code that adjusts the tr fields *)
 622       let line_error = TH.line_of_tok tr.current in
 623
 624       let passed_before_error = tr.passed in
 625       let current = tr.current in
 626       (*  error recovery, go to next synchro point *)
 627       let (passed', rest') =
 628         Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
 629       tr.rest <- rest';
 630       tr.passed <- passed';
 631
 632       tr.current <- List.hd passed';
 633       tr.passed_clean <- [];           (* enough ? *)
 634       (* with error recovery, rest and rest_clean may not be in sync *)
 635       tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
 636
 637
 638       let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
 639       Right (info_of_bads,  line_error,
 640             tr.passed, passed_before_error,
 641             current, e)
 642   )
 643
 644
 645
 646 (* Macro problem recovery *)
 647 (* used by the multi-pass error recovery expand-on-demand *)
 648 (*
 649 val candidate_macros_in_passed:
 650   defs: (string, define_def) Hashtbl.t ->
 651   Parser_c.token list -> (string * define_def) list
 652 *)
 653
 654 let candidate_macros_in_passed2 ~defs passed  =
 655   let res = ref [] in
 656   let res2 = ref [] in
 657
 658   passed +> List.iter (function
 659   | Parser_c.TIdent (s,_)
 660    (* bugfix: may have to undo some infered things *)
 661   | Parser_c.TMacroIterator (s,_)
 662   | Parser_c.TypedefIdent (s,_)
 663     ->
 664       (match Common.hfind_option s defs with
 665       | Some def ->
 666           if s ==~ Parsing_hacks.regexp_macro
 667           then
 668             (* pr2 (spf "candidate: %s" s); *)
 669             Common.push2 (s, def) res
 670           else
 671             Common.push2 (s, def) res2
 672         | None -> ()
 673         )
 674
 675   | _ -> ()
 676   );
 677   if null !res
 678   then !res2
 679   else !res
 680
 681 let candidate_macros_in_passed ~defs b =
 682   Common.profile_code "MACRO managment" (fun () ->
 683     candidate_macros_in_passed2 ~defs b)
 684
 685
 686
 687
 688
 689 let find_optional_macro_to_expand2 ~defs toks =
 690
 691   let defs = Common.hash_of_list defs in
 692
 693   let toks = toks +> Common.tail_map (function
 694
 695     (* special cases to undo *)
 696     | Parser_c.TMacroIterator (s, ii) ->
 697         if Hashtbl.mem defs s
 698         then Parser_c.TIdent (s, ii)
 699         else Parser_c.TMacroIterator (s, ii)
 700
 701     | Parser_c.TypedefIdent (s, ii) ->
 702         if Hashtbl.mem defs s
 703         then Parser_c.TIdent (s, ii)
 704         else Parser_c.TypedefIdent (s, ii)
 705
 706     | x -> x
 707   ) in
 708
 709   let tokens = toks in
 710   Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
 711
 712   (* just calling apply_macro_defs and having a specialized version
 713    * of the code in fix_tokens_cpp is not enough as some work such
 714    * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
 715    * will not get the chance to be run on the new expanded tokens.
 716    * Hence even if it's expensive, it's currently better to
 717    * just call directly fix_tokens_cpp again here.
 718
 719   let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
 720   let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
 721   let paren_grouped = TV.mk_parenthised  cleaner in
 722   Cpp_token_c.apply_macro_defs
 723     ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
 724     ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
 725     defs paren_grouped;
 726   (* because the before field is used by apply_macro_defs *)
 727   tokens2 := TV.rebuild_tokens_extented !tokens2;
 728   Parsing_hacks.insert_virtual_positions
 729     (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
 730   *)
 731 let find_optional_macro_to_expand ~defs a =
 732     Common.profile_code "MACRO managment" (fun () ->
 733       find_optional_macro_to_expand2 ~defs a)
 734
 735
 736
 737
 738
 739 (*****************************************************************************)
 740 (* Main entry points *)
 741 (*****************************************************************************)
 742
 743 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 744   ref (Hashtbl.create 101)
 745
 746 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 747   ref (Hashtbl.create 101)
 748
 749
 750 (* can not be put in parsing_hack, cos then mutually recursive problem as
 751  * we also want to parse the standard.h file.
 752  *)
 753 let init_defs_macros std_h =
 754   if not (Common.lfile_exists std_h)
 755   then pr2 ("warning: Can't find default macro file: " ^ std_h)
 756   else begin
 757     pr2 ("init_defs: " ^ std_h);
 758     _defs := Common.hash_of_list (extract_macros std_h);
 759   end
 760
 761 let init_defs_builtins file_h =
 762   if not (Common.lfile_exists file_h)
 763   then pr2 ("warning: Can't find macro file: " ^ file_h)
 764   else begin
 765     pr2 ("init_defs_builtins: " ^ file_h);
 766     _defs_builtins :=
 767       Common.hash_of_list (extract_macros file_h);
 768   end
 769
 770
 771
 772 type info_item =  string * Parser_c.token list
 773
 774 type program2 = toplevel2 list
 775      and toplevel2 = Ast_c.toplevel * info_item
 776
 777 let program_of_program2 xs =
 778   xs +> List.map fst
 779
 780 let with_program2 f program2 =
 781   program2
 782   +> Common.unzip
 783   +> (fun (program, infos) ->
 784     f program, infos
 785   )
 786   +> Common.uncurry Common.zip
 787
 788
 789
 790
 791
 792
 793 (* note: as now we go in 2 passes, there is first all the error message of
 794  * the lexer, and then the error of the parser. It is not anymore
 795  * interwinded.
 796  *
 797  * !!!This function use refs, and is not reentrant !!! so take care.
 798  * It use globals defined in Lexer_parser and also the _defs global
 799  * in parsing_hack.ml.
 800  *
 801  * This function uses internally some semi globals in the
 802  * tokens_stat record and parsing_stat record.
 803  *)
 804
 805 let parse_print_error_heuristic2 file =
 806
 807   let filelines = Common.cat_array file in
 808   let stat = Parsing_stat.default_stat file in
 809
 810   (* -------------------------------------------------- *)
 811   (* call lexer and get all the tokens *)
 812   (* -------------------------------------------------- *)
 813   LP.lexer_reset_typedef();
 814   Parsing_hacks.ifdef_paren_cnt := 0;
 815
 816   let toks_orig = tokens file in
 817   let toks = Parsing_hacks.fix_tokens_define toks_orig in
 818   let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
 819
 820   (* expand macros on demand trick, preparation phase *)
 821   let macros =
 822     Common.profile_code "MACRO mgmt prep 1" (fun () ->
 823       let macros = Hashtbl.copy !_defs in
 824       (* include also builtins as some macros may generate some builtins too
 825        * like __decl_spec or __stdcall
 826        *)
 827       !_defs_builtins +> Hashtbl.iter (fun s def ->
 828         Hashtbl.replace macros   s def;
 829       );
 830       macros
 831     )
 832   in
 833   Common.profile_code "MACRO mgmt prep 2" (fun () ->
 834     let local_macros = extract_macros file in
 835     local_macros +> List.iter (fun (s, def) ->
 836       Hashtbl.replace macros   s def;
 837     );
 838   );
 839
 840   let tr = mk_tokens_state toks in
 841
 842   let rec loop tr =
 843
 844     (* todo?: I am not sure that it represents current_line, cos maybe
 845      * tr.current partipated in the previous parsing phase, so maybe tr.current
 846      * is not the first token of the next parsing phase. Same with checkpoint2.
 847      * It would be better to record when we have a } or ; in parser.mly,
 848      *  cos we know that they are the last symbols of external_declaration2.
 849      *
 850      * bugfix: may not be equal to 'file' as after macro expansions we can
 851      * start to parse a new entity from the body of a macro, for instance
 852      * when parsing a define_machine() body, cf standard.h
 853      *)
 854     let checkpoint = TH.line_of_tok tr.current in
 855     let checkpoint_file = TH.file_of_tok tr.current in
 856
 857     (* call the parser *)
 858     let elem =
 859       let pass1 =
 860         Common.profile_code "Parsing: 1st pass" (fun () ->
 861           get_one_elem ~pass:1 tr (file, filelines)
 862         ) in
 863       match pass1 with
 864       | Left e -> Left e
 865       | Right (info,line_err, passed, passed_before_error, cur, exn) ->
 866           if !Flag_parsing_c.disable_multi_pass
 867           then pass1
 868           else begin
 869             Common.profile_code "Parsing: multi pass" (fun () ->
 870
 871             pr2_err "parsing pass2: try again";
 872             let toks = List.rev passed ++ tr.rest in
 873             let new_tr = mk_tokens_state toks in
 874             copy_tokens_state ~src:new_tr ~dst:tr;
 875             let passx = get_one_elem ~pass:2 tr (file, filelines) in
 876
 877             (match passx with
 878             | Left e -> passx
 879             | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 880                 let candidates =
 881                   candidate_macros_in_passed ~defs:macros passed
 882                 in
 883
 884
 885                 if is_define_passed passed || null candidates
 886                 then passx
 887                 else begin
 888                   (* todo factorize code *)
 889
 890                   pr2_err "parsing pass3: try again";
 891                   let toks = List.rev passed ++ tr.rest in
 892                   let toks' =
 893                     find_optional_macro_to_expand ~defs:candidates toks in
 894                   let new_tr = mk_tokens_state toks' in
 895                   copy_tokens_state ~src:new_tr ~dst:tr;
 896                   let passx = get_one_elem ~pass:3 tr (file, filelines) in
 897
 898                   (match passx with
 899                   | Left e -> passx
 900                   | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 901                       pr2_err "parsing pass4: try again";
 902
 903                       let candidates =
 904                         candidate_macros_in_passed
 905                           ~defs:macros passed
 906                       in
 907
 908                       let toks = List.rev passed ++ tr.rest in
 909                       let toks' =
 910                       find_optional_macro_to_expand ~defs:candidates toks in
 911                       let new_tr = mk_tokens_state toks' in
 912                       copy_tokens_state ~src:new_tr ~dst:tr;
 913                       let passx = get_one_elem ~pass:4 tr (file, filelines) in
 914                       passx
 915                   )
 916                  end
 917             )
 918             )
 919           end
 920     in
 921
 922
 923     (* again not sure if checkpoint2 corresponds to end of bad region *)
 924     let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
 925     let checkpoint2_file = TH.file_of_tok tr.current in
 926
 927     let diffline =
 928       if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
 929       then (checkpoint2 - checkpoint)
 930       else 0
 931         (* TODO? so if error come in middle of something ? where the
 932          * start token was from original file but synchro found in body
 933          * of macro ? then can have wrong number of lines stat.
 934          * Maybe simpler just to look at tr.passed and count
 935          * the lines in the token from the correct file ?
 936          *)
 937     in
 938     let info = mk_info_item file (List.rev tr.passed) in
 939
 940     (* some stat updates *)
 941     stat.Stat.commentized <-
 942       stat.Stat.commentized + count_lines_commentized (snd info);
 943
 944     let elem =
 945       match elem with
 946       | Left e ->
 947           stat.Stat.correct <- stat.Stat.correct + diffline;
 948           e
 949       | Right (info_of_bads, line_error, toks_of_bads,
 950               _passed_before_error, cur, exn) ->
 951
 952           let was_define = is_define_passed tr.passed in
 953
 954           if was_define && !Flag_parsing_c.filter_msg_define_error
 955           then ()
 956           else begin
 957
 958             (match exn with
 959             | Lexer_c.Lexical _
 960             | Parsing.Parse_error
 961             | Semantic_c.Semantic _ -> ()
 962             | e -> raise e
 963             );
 964
 965             if !Flag_parsing_c.show_parsing_error
 966             then begin
 967               (match exn with
 968               (* Lexical is not anymore launched I think *)
 969               | Lexer_c.Lexical s ->
 970                   pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
 971               | Parsing.Parse_error ->
 972                   pr2 ("parse error \n = " ^ error_msg_tok cur)
 973               | Semantic_c.Semantic (s, i) ->
 974                   pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
 975               | e -> raise Impossible
 976               );
 977               (* bugfix: *)
 978               if (checkpoint_file =$= checkpoint2_file) &&
 979                 checkpoint_file =$= file
 980               then print_bad line_error (checkpoint, checkpoint2) filelines
 981               else pr2 "PB: bad: but on tokens not from original file"
 982             end;
 983
 984
 985             let pbline =
 986               toks_of_bads
 987               +> Common.filter (TH.is_same_line_or_close line_error)
 988               +> Common.filter TH.is_ident_like
 989             in
 990             let error_info =
 991               (pbline +> List.map TH.str_of_tok), line_error
 992             in
 993             stat.Stat.problematic_lines <-
 994               error_info::stat.Stat.problematic_lines;
 995
 996           end;
 997
 998           if was_define && !Flag_parsing_c.filter_define_error
 999           then stat.Stat.correct <- stat.Stat.correct + diffline
1000           else stat.Stat.bad     <- stat.Stat.bad     + diffline;
1001
1002           Ast_c.NotParsedCorrectly info_of_bads
1003     in
1004
1005     (match elem with
1006     | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1007     | xs -> (xs, info):: loop tr (* recurse *)
1008     )
1009   in
1010   let v = loop tr in
1011   let v = with_program2 Parsing_consistency_c.consistency_checking v in
1012   (v, stat)
1013
1014
1015 let time_total_parsing a  =
1016   Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1017
1018 let parse_print_error_heuristic a  =
1019   Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1020
1021
1022 (* alias *)
1023 let parse_c_and_cpp a = parse_print_error_heuristic a
1024
1025 (*****************************************************************************)
1026 (* Same but faster cos memoize stuff *)
1027 (*****************************************************************************)
1028 let parse_cache file =
1029   if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1030   else
1031   let _ = pr2 "TOFIX" in
1032   let need_no_changed_files =
1033     (* should use Sys.argv.(0), would be safer. *)
1034
1035     [
1036       (* TOFIX
1037       Config.path ^ "/parsing_c/c_parser.cma";
1038       (* we may also depend now on the semantic patch because
1039          the SP may use macro and so we will disable some of the
1040          macro expansions from standard.h.
1041       *)
1042       !Config.std_h;
1043       *)
1044     ]
1045   in
1046   let need_no_changed_variables =
1047     (* could add some of the flags of flag_parsing_c.ml *)
1048     []
1049   in
1050   Common.cache_computation_robust
1051     file ".ast_raw"
1052     (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1053     (fun () -> parse_print_error_heuristic file)
1054
1055
1056
1057 (*****************************************************************************)
1058 (* Some special cases *)
1059 (*****************************************************************************)
1060
1061 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1062   let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1063   Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1064   let program = parse_c_and_cpp tmpfile +> fst in
1065   program +> Common.find_some (fun (e,_) ->
1066     match e with
1067     | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1068     | _ -> None
1069   )
1070
1071 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1072   let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1073   Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1074   let program = parse_c_and_cpp tmpfile +> fst in
1075   program +> Common.find_some (fun (e,_) ->
1076     match e with
1077     | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1078         (match compound with
1079         | [Ast_c.StmtElem st] ->
1080             (match Ast_c.unwrap_st st with
1081             | Ast_c.ExprStatement (Some e) -> Some e
1082             | _ -> None
1083             )
1084         | _ -> None
1085         )
1086     | _ -> None
1087   )