parsing_c/parse_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
   4  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License (GPL)
   8  * version 2 as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * file license.txt for more details.
  14  *)
  15
  16 open Common
  17
  18 module TH = Token_helpers
  19 module LP = Lexer_parser
  20
  21 module Stat = Parsing_stat
  22
  23 (*****************************************************************************)
  24 (* Wrappers *)
  25 (*****************************************************************************)
  26 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
  27
  28 (*****************************************************************************)
  29 (* Helpers *)
  30 (*****************************************************************************)
  31
  32 let lexbuf_to_strpos lexbuf     =
  33   (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
  34
  35 let token_to_strpos tok =
  36   (TH.str_of_tok tok, TH.pos_of_tok tok)
  37
  38
  39 let mk_info_item2 filename toks =
  40   let buf = Buffer.create 100 in
  41   let s =
  42     (* old: get_slice_file filename (line1, line2) *)
  43     begin
  44       toks +> List.iter (fun tok ->
  45         match TH.pinfo_of_tok tok with
  46         | Ast_c.OriginTok _ ->
  47             Buffer.add_string buf (TH.str_of_tok tok)
  48         | Ast_c.AbstractLineTok _ ->
  49             raise Impossible
  50         | _ -> ()
  51       );
  52       Buffer.contents buf
  53     end
  54   in
  55   (s, toks)
  56
  57 let mk_info_item a b =
  58   Common.profile_code "C parsing.mk_info_item"
  59     (fun () -> mk_info_item2 a b)
  60
  61
  62 let info_same_line line xs =
  63   xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
  64
  65
  66 (* move in cpp_token_c ? *)
  67 let is_define_passed passed =
  68   let xs = passed +> List.rev +> List.filter TH.is_not_comment in
  69   if List.length xs >= 2
  70   then
  71     (match Common.head_middle_tail xs with
  72     | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
  73         true
  74     | _ -> false
  75     )
  76   else begin
  77     pr2_err "WEIRD: length list of error recovery tokens < 2 ";
  78     false
  79   end
  80
  81
  82 (*****************************************************************************)
  83 (* Error diagnostic  *)
  84 (*****************************************************************************)
  85
  86 let error_msg_tok tok =
  87   let file = TH.file_of_tok tok in
  88   if !Flag_parsing_c.verbose_parsing
  89   then Common.error_message file (token_to_strpos tok)
  90   else ("error in " ^ file  ^ "; set verbose_parsing for more info")
  91
  92
  93 let print_bad line_error (start_line, end_line) filelines  =
  94   begin
  95     pr2 ("badcount: " ^ i_to_s (end_line - start_line));
  96
  97     for i = start_line to end_line do
  98       let line = filelines.(i) in
  99
 100       if i =|= line_error
 101       then  pr2 ("BAD:!!!!!" ^ " " ^ line)
 102       else  pr2 ("bad:" ^ " " ^      line)
 103     done
 104   end
 105
 106
 107 (*****************************************************************************)
 108 (* Stats on what was passed/commentized  *)
 109 (*****************************************************************************)
 110
 111 let commentized xs = xs +> Common.tail_map_filter (function
 112   | Parser_c.TCommentCpp (cppkind, ii) ->
 113       let s = Ast_c.str_of_info ii in
 114       let legal_passing =
 115         match !Flag_parsing_c.filter_passed_level with
 116         | 0 -> false
 117         | 1 ->
 118             List.mem cppkind [Token_c.CppAttr]
 119             ||
 120             (s =~ "__.*")
 121         | 2 ->
 122             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
 123             ||
 124             (s =~ "__.*")
 125         | 3 ->
 126             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
 127             ||
 128             (s =~ "__.*")
 129         | 4 ->
 130             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
 131             ||
 132             (s =~ "__.*")
 133
 134
 135         | 5 ->
 136             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
 137             ||
 138             (s =~ "__.*")
 139
 140
 141
 142
 143         | _ -> failwith "not valid level passing number"
 144       in
 145       if legal_passing then None else Some (ii.Ast_c.pinfo)
 146
 147         (*
 148         | Ast_c.CppOther ->
 149             (match s with
 150             | s when s =~ "KERN_.*" -> None
 151             | s when s =~ "__.*" -> None
 152             | _ ->
 153                 Some (ii.Ast_c.pinfo)
 154             )
 155         *)
 156
 157
 158   | Parser_c.TCommentMisc ii
 159   | Parser_c.TAction ii
 160     ->
 161       Some (ii.Ast_c.pinfo)
 162   | _ ->
 163       None
 164  )
 165
 166 let count_lines_commentized xs =
 167   let line = ref (-1) in
 168   let count = ref 0 in
 169   begin
 170     commentized xs +>
 171     List.iter
 172       (function
 173           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 174             let newline = pinfo.Common.line in
 175             if newline <> !line
 176             then begin
 177               line := newline;
 178               incr count
 179             end
 180         | _ -> ());
 181     !count
 182   end
 183
 184
 185
 186 let print_commentized xs =
 187   let line = ref (-1) in
 188   begin
 189     let ys = commentized xs in
 190     ys +>
 191     List.iter
 192       (function
 193           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 194             let newline = pinfo.Common.line in
 195             let s = pinfo.Common.str in
 196             let s = Str.global_substitute
 197                 (Str.regexp "\n") (fun s -> "") s
 198             in
 199             if newline =|= !line
 200             then prerr_string (s ^ " ")
 201             else begin
 202               if !line =|= -1
 203               then pr2_no_nl "passed:"
 204               else pr2_no_nl "\npassed:";
 205               line := newline;
 206               pr2_no_nl (s ^ " ");
 207             end
 208         | _ -> ());
 209     if not (null ys) then pr2 "";
 210   end
 211
 212
 213
 214
 215 (*****************************************************************************)
 216 (* Lexing only *)
 217 (*****************************************************************************)
 218
 219 (* called by parse_print_error_heuristic *)
 220 let tokens2 file =
 221  let table     = Common.full_charpos_to_pos_large file in
 222
 223  Common.with_open_infile file (fun chan ->
 224   let lexbuf = Lexing.from_channel chan in
 225   try
 226     let rec tokens_aux acc =
 227       let tok = Lexer_c.token lexbuf in
 228       (* fill in the line and col information *)
 229       let tok = tok +> TH.visitor_info_of_tok (fun ii ->
 230         { ii with Ast_c.pinfo=
 231           (* could assert pinfo.filename = file ? *)
 232           match Ast_c.pinfo_of_info ii with
 233             Ast_c.OriginTok pi ->
 234               Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
 235           | Ast_c.ExpandedTok (pi,vpi) ->
 236               Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
 237           | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
 238           | Ast_c.AbstractLineTok pi -> failwith "should not occur"
 239       })
 240       in
 241
 242       if TH.is_eof tok
 243       then List.rev (tok::acc)
 244       else tokens_aux (tok::acc)
 245     in
 246     tokens_aux []
 247   with
 248     | Lexer_c.Lexical s ->
 249         failwith ("lexical error " ^ s ^ "\n =" ^
 250                   (Common.error_message file (lexbuf_to_strpos lexbuf)))
 251     | e -> raise e
 252  )
 253
 254 let time_lexing ?(profile=true) a =
 255   if profile
 256   then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
 257   else tokens2 a
 258 let tokens ?profile a =
 259   Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
 260
 261
 262 let tokens_of_string string =
 263   let lexbuf = Lexing.from_string string in
 264   try
 265     let rec tokens_s_aux () =
 266       let tok = Lexer_c.token lexbuf in
 267       if TH.is_eof tok
 268       then [tok]
 269       else tok::(tokens_s_aux ())
 270     in
 271     tokens_s_aux ()
 272   with
 273     | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
 274     | e -> raise e
 275
 276
 277 (*****************************************************************************)
 278 (* Parsing, but very basic, no more used *)
 279 (*****************************************************************************)
 280
 281 (*
 282  * !!!Those function use refs, and are not reentrant !!! so take care.
 283  * It use globals defined in Lexer_parser.
 284  *
 285  * update: because now lexer return comments tokens, those functions
 286  * may not work anymore.
 287  *)
 288
 289 let parse file =
 290   let lexbuf = Lexing.from_channel (open_in file) in
 291   let result = Parser_c.main Lexer_c.token lexbuf in
 292   result
 293
 294
 295 let parse_print_error file =
 296   let chan = (open_in file) in
 297   let lexbuf = Lexing.from_channel chan in
 298
 299   let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
 300   try
 301     lexbuf +> Parser_c.main Lexer_c.token
 302   with
 303   | Lexer_c.Lexical s ->
 304       failwith ("lexical error " ^s^ "\n =" ^  error_msg ())
 305   | Parsing.Parse_error ->
 306       failwith ("parse error \n = " ^ error_msg ())
 307   | Semantic_c.Semantic (s, i) ->
 308       failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
 309   | e -> raise e
 310
 311
 312
 313
 314 (*****************************************************************************)
 315 (* Parsing subelements, useful to debug parser *)
 316 (*****************************************************************************)
 317
 318 (*
 319  * !!!Those function use refs, and are not reentrant !!! so take care.
 320  * It use globals defined in Lexer_parser.
 321  *)
 322
 323
 324 (* old:
 325  *   let parse_gen parsefunc s =
 326  *     let lexbuf = Lexing.from_string s in
 327  *     let result = parsefunc Lexer_c.token lexbuf in
 328  *     result
 329  *)
 330
 331 let parse_gen parsefunc s =
 332   let toks = tokens_of_string s +> List.filter TH.is_not_comment in
 333
 334
 335   (* Why use this lexing scheme ? Why not classically give lexer func
 336    * to parser ? Because I now keep comments in lexer. Could
 337    * just do a simple wrapper that when comment ask again for a token,
 338    * but maybe simpler to use cur_tok technique.
 339    *)
 340   let all_tokens = ref toks in
 341   let cur_tok    = ref (List.hd !all_tokens) in
 342
 343   let lexer_function =
 344     (fun _ ->
 345       if TH.is_eof !cur_tok
 346       then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
 347       else
 348         let v = Common.pop2 all_tokens in
 349         cur_tok := v;
 350         !cur_tok
 351     )
 352   in
 353   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 354   let result = parsefunc lexer_function lexbuf_fake in
 355   result
 356
 357
 358 let type_of_string       = parse_gen Parser_c.type_name
 359 let statement_of_string  = parse_gen Parser_c.statement
 360 let expression_of_string = parse_gen Parser_c.expr
 361
 362 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
 363
 364
 365
 366
 367
 368 (*****************************************************************************)
 369 (* Parsing default define macros, usually in a standard.h file *)
 370 (*****************************************************************************)
 371
 372 let extract_macros2 file =
 373   Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
 374     Flag_parsing_c.verbose_lexing := false;
 375     let toks = tokens ~profile:false file in
 376     let toks = Parsing_hacks.fix_tokens_define toks in
 377     Cpp_token_c.extract_macros toks
 378   )
 379
 380 let extract_macros a =
 381   Common.profile_code_exclusif "HACK" (fun () -> extract_macros2 a)
 382
 383
 384 (*****************************************************************************)
 385 (* Helper for main entry point *)
 386 (*****************************************************************************)
 387
 388
 389 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
 390  * possible error recovery. Indeed, they allow to skip some tokens and
 391  * still be able to call again the ocamlyacc parser. It is ugly code
 392  * because we cant modify ocamllex and ocamlyacc. As we want some
 393  * extended lexing tricks, we have to use such refs.
 394  *
 395  * Those refs are now also used for my lalr(k) technique. Indeed They
 396  * store the futur and previous tokens that were parsed, and so
 397  * provide enough context information for powerful lex trick.
 398  *
 399  * - passed_tokens_last_ckp stores the passed tokens since last
 400  *   checkpoint. Used for NotParsedCorrectly and also to build the
 401  *   info_item attached to each program_element.
 402  * - passed_tokens_clean is used for lookahead, in fact for lookback.
 403  * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
 404  *   contain some comments and so would make pattern matching difficult
 405  *   in lookahead. Hence this variable. We would like also to get rid
 406  *   of cpp instruction because sometimes a cpp instruction is between
 407  *   two tokens and makes a pattern matching fail. But lookahead also
 408  *   transform some cpp instruction (in comment) so can't remove them.
 409  *
 410  * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
 411  * whereas passed_tokens_clean and remaining_tokens_clean does not contain
 412  * comment-tokens.
 413  *
 414  * Normally we have:
 415  * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
 416  *    after the call to pop2.
 417  * toks = (reverse passed_tok) ++ remaining_tokens
 418  *     at the and of the lexer_function call.
 419  * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
 420  * At the end of lexer_function call,  cur_tok  overlap  with passed_tok.
 421  *
 422  * convention: I use "tr"  for "tokens refs"
 423  *
 424  * I now also need this lexing trick because the lexer return comment
 425  * tokens.
 426  *)
 427
 428 type tokens_state = {
 429   mutable rest :         Parser_c.token list;
 430   mutable rest_clean :   Parser_c.token list;
 431   mutable current :      Parser_c.token;
 432   (* it's passed since last "checkpoint", not passed from the beginning *)
 433   mutable passed :       Parser_c.token list;
 434   mutable passed_clean : Parser_c.token list;
 435 }
 436
 437 let mk_tokens_state toks =
 438   {
 439     rest       = toks;
 440     rest_clean = (toks +> List.filter TH.is_not_comment);
 441     current    = (List.hd toks);
 442     passed = [];
 443     passed_clean = [];
 444   }
 445
 446
 447
 448 let clone_tokens_state tr =
 449   { rest = tr.rest;
 450     rest_clean = tr.rest_clean;
 451     current = tr.current;
 452     passed = tr.passed;
 453     passed_clean = tr.passed_clean;
 454   }
 455 let copy_tokens_state ~src ~dst =
 456   dst.rest <- src.rest;
 457   dst.rest_clean <- src.rest_clean;
 458   dst.current <- src.current;
 459   dst.passed <- src.passed;
 460   dst.passed_clean <-  src.passed_clean;
 461   ()
 462
 463 (* todo? agglomerate the x##b ? *)
 464 let rec filter_noise n xs =
 465   match n, xs with
 466   | _, [] -> []
 467   | 0, xs -> xs
 468   | n, x::xs ->
 469       (match x with
 470       | Parser_c.TMacroAttr _ ->
 471           filter_noise (n-1) xs
 472       | _ ->
 473           x::filter_noise (n-1) xs
 474       )
 475
 476 let clean_for_lookahead xs =
 477   match xs with
 478   | [] -> []
 479   | [x] -> [x]
 480   | x::xs ->
 481       x::filter_noise 10 xs
 482
 483
 484
 485 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
 486  * tr means token refs.
 487  *)
 488 let rec lexer_function ~pass tr = fun lexbuf ->
 489   match tr.rest with
 490   | [] -> pr2_err "ALREADY AT END"; tr.current
 491   | v::xs ->
 492     tr.rest <- xs;
 493     tr.current <- v;
 494
 495     if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
 496
 497     if TH.is_comment v
 498     then begin
 499       tr.passed <- v::tr.passed;
 500       lexer_function ~pass tr lexbuf
 501     end
 502     else begin
 503       let x = List.hd tr.rest_clean  in
 504       tr.rest_clean <- List.tl tr.rest_clean;
 505       assert (x =*= v);
 506
 507       (match v with
 508
 509       (* fix_define1.
 510        *
 511        * Why not in parsing_hacks lookahead and do passing like
 512        * I do for some ifdef directives ? Because here I also need to
 513        * generate some tokens sometimes and so I need access to the
 514        * tr.passed, tr.rest, etc.
 515        *)
 516       | Parser_c.TDefine (tok) ->
 517           if not (LP.current_context () =*= LP.InTopLevel) &&
 518             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 519           then begin
 520             incr Stat.nDefinePassing;
 521             pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
 522             let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
 523             in
 524             tr.passed <- v'::tr.passed;
 525             tr.rest       <- Parsing_hacks.comment_until_defeol tr.rest;
 526             tr.rest_clean <- Parsing_hacks.drop_until_defeol tr.rest_clean;
 527             lexer_function ~pass tr lexbuf
 528           end
 529           else begin
 530             tr.passed <- v::tr.passed;
 531             tr.passed_clean <- v::tr.passed_clean;
 532             v
 533           end
 534
 535       | Parser_c.TInclude (includes, filename, inifdef, info) ->
 536           if not (LP.current_context () =*= LP.InTopLevel)  &&
 537             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
 538           then begin
 539             incr Stat.nIncludePassing;
 540             pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
 541             let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
 542             tr.passed <- v::tr.passed;
 543             lexer_function ~pass tr lexbuf
 544           end
 545           else begin
 546             let (v,new_tokens) =
 547               Parsing_hacks.tokens_include (info, includes, filename, inifdef) in
 548             let new_tokens_clean =
 549               new_tokens +> List.filter TH.is_not_comment  in
 550
 551             tr.passed <- v::tr.passed;
 552             tr.passed_clean <- v::tr.passed_clean;
 553             tr.rest <- new_tokens ++ tr.rest;
 554             tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
 555             v
 556           end
 557
 558       | _ ->
 559
 560           (* typedef_fix1 *)
 561           let v = match v with
 562             | Parser_c.TIdent (s, ii) ->
 563                 if
 564                   LP.is_typedef s &&
 565                     not (!Flag_parsing_c.disable_add_typedef) &&
 566                     pass =|= 1
 567                 then Parser_c.TypedefIdent (s, ii)
 568                 else Parser_c.TIdent (s, ii)
 569             | x -> x
 570           in
 571
 572           let v = Parsing_hacks.lookahead ~pass
 573             (clean_for_lookahead (v::tr.rest_clean))
 574             tr.passed_clean in
 575
 576           tr.passed <- v::tr.passed;
 577
 578           (* the lookahead may have changed the status of the token and
 579            * consider it as a comment, for instance some #include are
 580            * turned into comments, hence this code. *)
 581           match v with
 582           | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
 583           | v ->
 584               tr.passed_clean <- v::tr.passed_clean;
 585               v
 586       )
 587     end
 588
 589
 590 let max_pass = 4
 591
 592
 593 let get_one_elem ~pass tr (file, filelines) =
 594
 595   if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
 596   then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
 597
 598   (* normally have to do that only when come from an exception in which
 599    * case the dt() may not have been done
 600    * TODO but if was in scoped scope ? have to let only the last scope
 601    * so need do a LP.lexer_reset_typedef ();
 602    *)
 603   LP.enable_typedef();
 604   LP._lexer_hint := (LP.default_hint ());
 605   LP.save_typedef_state();
 606
 607   tr.passed <- [];
 608
 609   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 610
 611   (try
 612       (* -------------------------------------------------- *)
 613       (* Call parser *)
 614       (* -------------------------------------------------- *)
 615       Common.profile_code_exclusif "YACC" (fun () ->
 616         Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
 617       )
 618     with e ->
 619       LP.restore_typedef_state();
 620
 621       (* must keep here, before the code that adjusts the tr fields *)
 622       let line_error = TH.line_of_tok tr.current in
 623
 624       let passed_before_error = tr.passed in
 625       let current = tr.current in
 626
 627       (*  error recovery, go to next synchro point *)
 628       let (passed', rest') =
 629         Parsing_recovery_c.find_next_synchro tr.rest tr.passed in
 630       tr.rest <- rest';
 631       tr.passed <- passed';
 632
 633       tr.current <- List.hd passed';
 634       tr.passed_clean <- [];           (* enough ? *)
 635       (* with error recovery, rest and rest_clean may not be in sync *)
 636       tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
 637
 638
 639       let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
 640       Right (info_of_bads,  line_error,
 641             tr.passed, passed_before_error,
 642             current, e)
 643   )
 644
 645
 646
 647 (* Macro problem recovery *)
 648 (* used by the multi-pass error recovery expand-on-demand *)
 649 (*
 650 val candidate_macros_in_passed:
 651   defs: (string, define_def) Hashtbl.t ->
 652   Parser_c.token list -> (string * define_def) list
 653 *)
 654
 655 let candidate_macros_in_passed2 ~defs passed  =
 656   let res = ref [] in
 657   let res2 = ref [] in
 658
 659   passed +> List.iter (function
 660   | Parser_c.TIdent (s,_)
 661    (* bugfix: may have to undo some infered things *)
 662   | Parser_c.TMacroIterator (s,_)
 663   | Parser_c.TypedefIdent (s,_)
 664     ->
 665       (match Common.hfind_option s defs with
 666       | Some def ->
 667           if s ==~ Parsing_hacks.regexp_macro
 668           then
 669             (* pr2 (spf "candidate: %s" s); *)
 670             Common.push2 (s, def) res
 671           else
 672             Common.push2 (s, def) res2
 673         | None -> ()
 674         )
 675
 676   | _ -> ()
 677   );
 678   if null !res
 679   then !res2
 680   else !res
 681
 682 let candidate_macros_in_passed ~defs b =
 683   Common.profile_code "MACRO managment" (fun () ->
 684     candidate_macros_in_passed2 ~defs b)
 685
 686
 687
 688
 689
 690 let find_optional_macro_to_expand2 ~defs toks =
 691
 692   let defs = Common.hash_of_list defs in
 693
 694   let toks = toks +> Common.tail_map (function
 695
 696     (* special cases to undo *)
 697     | Parser_c.TMacroIterator (s, ii) ->
 698         if Hashtbl.mem defs s
 699         then Parser_c.TIdent (s, ii)
 700         else Parser_c.TMacroIterator (s, ii)
 701
 702     | Parser_c.TypedefIdent (s, ii) ->
 703         if Hashtbl.mem defs s
 704         then Parser_c.TIdent (s, ii)
 705         else Parser_c.TypedefIdent (s, ii)
 706
 707     | x -> x
 708   ) in
 709
 710   let tokens = toks in
 711   Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
 712
 713   (* just calling apply_macro_defs and having a specialized version
 714    * of the code in fix_tokens_cpp is not enough as some work such
 715    * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
 716    * will not get the chance to be run on the new expanded tokens.
 717    * Hence even if it's expensive, it's currently better to
 718    * just call directly fix_tokens_cpp again here.
 719
 720   let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
 721   let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
 722   let paren_grouped = TV.mk_parenthised  cleaner in
 723   Cpp_token_c.apply_macro_defs
 724     ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
 725     ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
 726     defs paren_grouped;
 727   (* because the before field is used by apply_macro_defs *)
 728   tokens2 := TV.rebuild_tokens_extented !tokens2;
 729   Parsing_hacks.insert_virtual_positions
 730     (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
 731   *)
 732 let find_optional_macro_to_expand ~defs a =
 733     Common.profile_code "MACRO managment" (fun () ->
 734       find_optional_macro_to_expand2 ~defs a)
 735
 736
 737
 738
 739
 740 (*****************************************************************************)
 741 (* Main entry points *)
 742 (*****************************************************************************)
 743
 744 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 745   ref (Hashtbl.create 101)
 746
 747 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 748   ref (Hashtbl.create 101)
 749
 750
 751 (* can not be put in parsing_hack, cos then mutually recursive problem as
 752  * we also want to parse the standard.h file.
 753  *)
 754 let init_defs_macros std_h =
 755   if not (Common.lfile_exists std_h)
 756   then pr2 ("warning: Can't find default macro file: " ^ std_h)
 757   else begin
 758     pr2 ("init_defs: " ^ std_h);
 759     _defs := Common.hash_of_list (extract_macros std_h);
 760   end
 761
 762 let init_defs_builtins file_h =
 763   if not (Common.lfile_exists file_h)
 764   then pr2 ("warning: Can't find macro file: " ^ file_h)
 765   else begin
 766     pr2 ("init_defs_builtins: " ^ file_h);
 767     _defs_builtins :=
 768       Common.hash_of_list (extract_macros file_h);
 769   end
 770
 771
 772
 773 type info_item =  string * Parser_c.token list
 774
 775 type program2 = toplevel2 list
 776      and toplevel2 = Ast_c.toplevel * info_item
 777
 778 let program_of_program2 xs =
 779   xs +> List.map fst
 780
 781 let with_program2 f program2 =
 782   program2
 783   +> Common.unzip
 784   +> (fun (program, infos) ->
 785     f program, infos
 786   )
 787   +> Common.uncurry Common.zip
 788
 789
 790
 791
 792
 793
 794 (* note: as now we go in 2 passes, there is first all the error message of
 795  * the lexer, and then the error of the parser. It is not anymore
 796  * interwinded.
 797  *
 798  * !!!This function use refs, and is not reentrant !!! so take care.
 799  * It use globals defined in Lexer_parser and also the _defs global
 800  * in parsing_hack.ml.
 801  *
 802  * This function uses internally some semi globals in the
 803  * tokens_stat record and parsing_stat record.
 804  *)
 805
 806 let parse_print_error_heuristic2 file =
 807
 808   let filelines = Common.cat_array file in
 809   let stat = Parsing_stat.default_stat file in
 810
 811   (* -------------------------------------------------- *)
 812   (* call lexer and get all the tokens *)
 813   (* -------------------------------------------------- *)
 814   LP.lexer_reset_typedef();
 815   Parsing_hacks.ifdef_paren_cnt := 0;
 816
 817   let toks_orig = tokens file in
 818   let toks = Parsing_hacks.fix_tokens_define toks_orig in
 819   let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
 820
 821   (* expand macros on demand trick, preparation phase *)
 822   let macros =
 823     Common.profile_code "MACRO mgmt prep 1" (fun () ->
 824       let macros = Hashtbl.copy !_defs in
 825       (* include also builtins as some macros may generate some builtins too
 826        * like __decl_spec or __stdcall
 827        *)
 828       !_defs_builtins +> Hashtbl.iter (fun s def ->
 829         Hashtbl.replace macros   s def;
 830       );
 831       macros
 832     )
 833   in
 834   Common.profile_code "MACRO mgmt prep 2" (fun () ->
 835     let local_macros = extract_macros file in
 836     local_macros +> List.iter (fun (s, def) ->
 837       Hashtbl.replace macros   s def;
 838     );
 839   );
 840
 841   let tr = mk_tokens_state toks in
 842
 843   let rec loop tr =
 844
 845     (* todo?: I am not sure that it represents current_line, cos maybe
 846      * tr.current partipated in the previous parsing phase, so maybe tr.current
 847      * is not the first token of the next parsing phase. Same with checkpoint2.
 848      * It would be better to record when we have a } or ; in parser.mly,
 849      *  cos we know that they are the last symbols of external_declaration2.
 850      *
 851      * bugfix: may not be equal to 'file' as after macro expansions we can
 852      * start to parse a new entity from the body of a macro, for instance
 853      * when parsing a define_machine() body, cf standard.h
 854      *)
 855     let checkpoint = TH.line_of_tok tr.current in
 856     let checkpoint_file = TH.file_of_tok tr.current in
 857
 858     (* call the parser *)
 859     let elem =
 860       let pass1 =
 861         Common.profile_code "Parsing: 1st pass" (fun () ->
 862           get_one_elem ~pass:1 tr (file, filelines)
 863         ) in
 864       match pass1 with
 865       | Left e -> Left e
 866       | Right (info,line_err, passed, passed_before_error, cur, exn) ->
 867           if !Flag_parsing_c.disable_multi_pass
 868           then pass1
 869           else begin
 870             Common.profile_code "Parsing: multi pass" (fun () ->
 871
 872             pr2_err "parsing pass2: try again";
 873             let toks = List.rev passed ++ tr.rest in
 874             let new_tr = mk_tokens_state toks in
 875             copy_tokens_state ~src:new_tr ~dst:tr;
 876             let passx = get_one_elem ~pass:2 tr (file, filelines) in
 877
 878             (match passx with
 879             | Left e -> passx
 880             | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 881                 let candidates =
 882                   candidate_macros_in_passed ~defs:macros passed
 883                 in
 884
 885
 886                 if is_define_passed passed || null candidates
 887                 then passx
 888                 else begin
 889                   (* todo factorize code *)
 890
 891                   pr2_err "parsing pass3: try again";
 892                   let toks = List.rev passed ++ tr.rest in
 893                   let toks' =
 894                     find_optional_macro_to_expand ~defs:candidates toks in
 895                   let new_tr = mk_tokens_state toks' in
 896                   copy_tokens_state ~src:new_tr ~dst:tr;
 897                   let passx = get_one_elem ~pass:3 tr (file, filelines) in
 898
 899                   (match passx with
 900                   | Left e -> passx
 901                   | Right (info,line_err,passed,passed_before_error,cur,exn) ->
 902                       pr2_err "parsing pass4: try again";
 903
 904                       let candidates =
 905                         candidate_macros_in_passed
 906                           ~defs:macros passed
 907                       in
 908
 909                       let toks = List.rev passed ++ tr.rest in
 910                       let toks' =
 911                       find_optional_macro_to_expand ~defs:candidates toks in
 912                       let new_tr = mk_tokens_state toks' in
 913                       copy_tokens_state ~src:new_tr ~dst:tr;
 914                       let passx = get_one_elem ~pass:4 tr (file, filelines) in
 915                       passx
 916                   )
 917                  end
 918             )
 919             )
 920           end
 921     in
 922
 923
 924     (* again not sure if checkpoint2 corresponds to end of bad region *)
 925     let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
 926     let checkpoint2_file = TH.file_of_tok tr.current in
 927
 928     let diffline =
 929       if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
 930       then (checkpoint2 - checkpoint)
 931       else 0
 932         (* TODO? so if error come in middle of something ? where the
 933          * start token was from original file but synchro found in body
 934          * of macro ? then can have wrong number of lines stat.
 935          * Maybe simpler just to look at tr.passed and count
 936          * the lines in the token from the correct file ?
 937          *)
 938     in
 939     let info = mk_info_item file (List.rev tr.passed) in
 940
 941     (* some stat updates *)
 942     stat.Stat.commentized <-
 943       stat.Stat.commentized + count_lines_commentized (snd info);
 944
 945     let elem =
 946       match elem with
 947       | Left e ->
 948           stat.Stat.correct <- stat.Stat.correct + diffline;
 949           e
 950       | Right (info_of_bads, line_error, toks_of_bads,
 951               _passed_before_error, cur, exn) ->
 952
 953           let was_define = is_define_passed tr.passed in
 954
 955           if was_define && !Flag_parsing_c.filter_msg_define_error
 956           then ()
 957           else begin
 958
 959             (match exn with
 960             | Lexer_c.Lexical _
 961             | Parsing.Parse_error
 962             | Semantic_c.Semantic _ -> ()
 963             | e -> raise e
 964             );
 965
 966             if !Flag_parsing_c.show_parsing_error
 967             then begin
 968               (match exn with
 969               (* Lexical is not anymore launched I think *)
 970               | Lexer_c.Lexical s ->
 971                   pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
 972               | Parsing.Parse_error ->
 973                   pr2 ("parse error \n = " ^ error_msg_tok cur)
 974               | Semantic_c.Semantic (s, i) ->
 975                   pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
 976               | e -> raise Impossible
 977               );
 978               (* bugfix: *)
 979               if (checkpoint_file =$= checkpoint2_file) &&
 980                 checkpoint_file =$= file
 981               then print_bad line_error (checkpoint, checkpoint2) filelines
 982               else pr2 "PB: bad: but on tokens not from original file"
 983             end;
 984
 985
 986             let pbline =
 987               toks_of_bads
 988               +> Common.filter (TH.is_same_line_or_close line_error)
 989               +> Common.filter TH.is_ident_like
 990             in
 991             let error_info =
 992               (pbline +> List.map TH.str_of_tok), line_error
 993             in
 994             stat.Stat.problematic_lines <-
 995               error_info::stat.Stat.problematic_lines;
 996
 997           end;
 998
 999           if was_define && !Flag_parsing_c.filter_define_error
1000           then stat.Stat.correct <- stat.Stat.correct + diffline
1001           else stat.Stat.bad     <- stat.Stat.bad     + diffline;
1002
1003           Ast_c.NotParsedCorrectly info_of_bads
1004     in
1005
1006     (match elem with
1007     | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1008     | xs -> (xs, info):: loop tr (* recurse *)
1009     )
1010   in
1011   let v = loop tr in
1012   let v = with_program2 Parsing_consistency_c.consistency_checking v in
1013   (v, stat)
1014
1015
1016 let time_total_parsing a  =
1017   Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1018
1019 let parse_print_error_heuristic a  =
1020   Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1021
1022
1023 (* alias *)
1024 let parse_c_and_cpp a = parse_print_error_heuristic a
1025
1026 (*****************************************************************************)
1027 (* Same but faster cos memoize stuff *)
1028 (*****************************************************************************)
1029 let parse_cache file =
1030   if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1031   else
1032   let _ = pr2 "TOFIX" in
1033   let need_no_changed_files =
1034     (* should use Sys.argv.(0), would be safer. *)
1035
1036     [
1037       (* TOFIX
1038       Config.path ^ "/parsing_c/c_parser.cma";
1039       (* we may also depend now on the semantic patch because
1040          the SP may use macro and so we will disable some of the
1041          macro expansions from standard.h.
1042       *)
1043       !Config.std_h;
1044       *)
1045     ]
1046   in
1047   let need_no_changed_variables =
1048     (* could add some of the flags of flag_parsing_c.ml *)
1049     []
1050   in
1051   Common.cache_computation_robust
1052     file ".ast_raw"
1053     (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1054     (fun () -> parse_print_error_heuristic file)
1055
1056
1057
1058 (*****************************************************************************)
1059 (* Some special cases *)
1060 (*****************************************************************************)
1061
1062 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1063   let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1064   Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1065   let program = parse_c_and_cpp tmpfile +> fst in
1066   program +> Common.find_some (fun (e,_) ->
1067     match e with
1068     | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1069     | _ -> None
1070   )
1071
1072 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1073   let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1074   Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1075   let program = parse_c_and_cpp tmpfile +> fst in
1076   program +> Common.find_some (fun (e,_) ->
1077     match e with
1078     | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1079         (match compound with
1080         | [Ast_c.StmtElem st] ->
1081             (match Ast_c.unwrap_st st with
1082             | Ast_c.ExprStatement (Some e) -> Some e
1083             | _ -> None
1084             )
1085         | _ -> None
1086         )
1087     | _ -> None
1088   )