parsing_c/parse_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *)
  14
  15 open Common
  16
  17 module TH = Token_helpers
  18 module LP = Lexer_parser
  19
  20 module Stat = Parsing_stat
  21
  22 (*****************************************************************************)
  23 (* Wrappers *)
  24 (*****************************************************************************)
  25 let pr2_err, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
  26
  27 (*****************************************************************************)
  28 (* Helpers *)
  29 (*****************************************************************************)
  30
  31 let lexbuf_to_strpos lexbuf     =
  32   (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
  33
  34 let token_to_strpos tok =
  35   (TH.str_of_tok tok, TH.pos_of_tok tok)
  36
  37
  38 let error_msg_tok tok =
  39   let file = TH.file_of_tok tok in
  40   if !Flag_parsing_c.verbose_parsing
  41   then Common.error_message file (token_to_strpos tok)
  42   else ("error in " ^ file  ^ "; set verbose_parsing for more info")
  43
  44
  45 let print_bad line_error (start_line, end_line) filelines  =
  46   begin
  47     pr2 ("badcount: " ^ i_to_s (end_line - start_line));
  48
  49     for i = start_line to end_line do
  50       let line = filelines.(i) in
  51
  52       if i =|= line_error
  53       then  pr2 ("BAD:!!!!!" ^ " " ^ line)
  54       else  pr2 ("bad:" ^ " " ^      line)
  55     done
  56   end
  57
  58
  59
  60 let mk_info_item2 filename toks =
  61   let buf = Buffer.create 100 in
  62   let s =
  63     (* old: get_slice_file filename (line1, line2) *)
  64     begin
  65       toks +> List.iter (fun tok ->
  66         match TH.pinfo_of_tok tok with
  67         | Ast_c.OriginTok _ ->
  68             Buffer.add_string buf (TH.str_of_tok tok)
  69         | Ast_c.AbstractLineTok _ ->
  70             raise Impossible
  71         | _ -> ()
  72       );
  73       Buffer.contents buf
  74     end
  75   in
  76   (s, toks)
  77
  78 let mk_info_item a b =
  79   Common.profile_code "C parsing.mk_info_item"
  80     (fun () -> mk_info_item2 a b)
  81
  82
  83 let info_same_line line xs =
  84   xs +> List.filter (fun info -> Ast_c.line_of_info info =|= line)
  85
  86
  87 (*****************************************************************************)
  88 (* Stats on what was passed/commentized  *)
  89 (*****************************************************************************)
  90
  91 let commentized xs = xs +> Common.map_filter (function
  92   | Parser_c.TCommentCpp (cppkind, ii) ->
  93       let s = Ast_c.str_of_info ii in
  94       let legal_passing =
  95         match !Flag_parsing_c.filter_passed_level with
  96         | 0 -> false
  97         | 1 ->
  98             List.mem cppkind [Token_c.CppAttr]
  99             ||
 100             (s =~ "__.*")
 101         | 2 ->
 102             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal]
 103             ||
 104             (s =~ "__.*")
 105         | 3 ->
 106             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective]
 107             ||
 108             (s =~ "__.*")
 109         | 4 ->
 110             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppMacro]
 111             ||
 112             (s =~ "__.*")
 113
 114
 115         | 5 ->
 116             List.mem cppkind [Token_c.CppAttr;Token_c.CppPassingNormal;Token_c.CppDirective;Token_c.CppMacro]
 117             ||
 118             (s =~ "__.*")
 119
 120
 121
 122
 123         | _ -> failwith "not valid level passing number"
 124       in
 125       if legal_passing then None else Some (ii.Ast_c.pinfo)
 126
 127         (*
 128         | Ast_c.CppOther ->
 129             (match s with
 130             | s when s =~ "KERN_.*" -> None
 131             | s when s =~ "__.*" -> None
 132             | _ ->
 133                 Some (ii.Ast_c.pinfo)
 134             )
 135         *)
 136
 137
 138   | Parser_c.TCommentMisc ii
 139   | Parser_c.TAction ii
 140     ->
 141       Some (ii.Ast_c.pinfo)
 142   | _ ->
 143       None
 144  )
 145
 146 let count_lines_commentized xs =
 147   let line = ref (-1) in
 148   let count = ref 0 in
 149   begin
 150     commentized xs +>
 151     List.iter
 152       (function
 153           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 154             let newline = pinfo.Common.line in
 155             if newline <> !line
 156             then begin
 157               line := newline;
 158               incr count
 159             end
 160         | _ -> ());
 161     !count
 162   end
 163
 164
 165
 166 let print_commentized xs =
 167   let line = ref (-1) in
 168   begin
 169     let ys = commentized xs in
 170     ys +>
 171     List.iter
 172       (function
 173           Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) ->
 174             let newline = pinfo.Common.line in
 175             let s = pinfo.Common.str in
 176             let s = Str.global_substitute
 177                 (Str.regexp "\n") (fun s -> "") s
 178             in
 179             if newline =|= !line
 180             then prerr_string (s ^ " ")
 181             else begin
 182               if !line =|= -1
 183               then pr2_no_nl "passed:"
 184               else pr2_no_nl "\npassed:";
 185               line := newline;
 186               pr2_no_nl (s ^ " ");
 187             end
 188         | _ -> ());
 189     if not (null ys) then pr2 "";
 190   end
 191
 192
 193
 194
 195 (*****************************************************************************)
 196 (* Lexing only *)
 197 (*****************************************************************************)
 198
 199 (* called by parse_print_error_heuristic *)
 200 let tokens2 file =
 201  let table     = Common.full_charpos_to_pos_large file in
 202
 203  Common.with_open_infile file (fun chan ->
 204   let lexbuf = Lexing.from_channel chan in
 205   try
 206     let rec tokens_aux acc =
 207       let tok = Lexer_c.token lexbuf in
 208       (* fill in the line and col information *)
 209       let tok = tok +> TH.visitor_info_of_tok (fun ii ->
 210         { ii with Ast_c.pinfo=
 211           (* could assert pinfo.filename = file ? *)
 212           match Ast_c.pinfo_of_info ii with
 213             Ast_c.OriginTok pi ->
 214               Ast_c.OriginTok (Common.complete_parse_info_large file table pi)
 215           | Ast_c.ExpandedTok (pi,vpi) ->
 216               Ast_c.ExpandedTok((Common.complete_parse_info_large file table pi),vpi)
 217           | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi)
 218           | Ast_c.AbstractLineTok pi -> failwith "should not occur"
 219       })
 220       in
 221
 222       if TH.is_eof tok
 223       then List.rev (tok::acc)
 224       else tokens_aux (tok::acc)
 225     in
 226     tokens_aux []
 227   with
 228     | Lexer_c.Lexical s ->
 229         failwith ("lexical error " ^ s ^ "\n =" ^
 230                   (Common.error_message file (lexbuf_to_strpos lexbuf)))
 231     | e -> raise e
 232  )
 233
 234 let time_lexing ?(profile=true) a =
 235   if profile
 236   then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a)
 237   else tokens2 a
 238 let tokens ?profile a =
 239   Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a)
 240
 241
 242 let tokens_of_string string =
 243   let lexbuf = Lexing.from_string string in
 244   try
 245     let rec tokens_s_aux () =
 246       let tok = Lexer_c.token lexbuf in
 247       if TH.is_eof tok
 248       then [tok]
 249       else tok::(tokens_s_aux ())
 250     in
 251     tokens_s_aux ()
 252   with
 253     | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" )
 254     | e -> raise e
 255
 256
 257 (*****************************************************************************)
 258 (* Parsing, but very basic, no more used *)
 259 (*****************************************************************************)
 260
 261 (*
 262  * !!!Those function use refs, and are not reentrant !!! so take care.
 263  * It use globals defined in Lexer_parser.
 264  *
 265  * update: because now lexer return comments tokens, those functions
 266  * may not work anymore.
 267  *)
 268
 269 let parse file =
 270   let lexbuf = Lexing.from_channel (open_in file) in
 271   let result = Parser_c.main Lexer_c.token lexbuf in
 272   result
 273
 274
 275 let parse_print_error file =
 276   let chan = (open_in file) in
 277   let lexbuf = Lexing.from_channel chan in
 278
 279   let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in
 280   try
 281     lexbuf +> Parser_c.main Lexer_c.token
 282   with
 283   | Lexer_c.Lexical s ->
 284       failwith ("lexical error " ^s^ "\n =" ^  error_msg ())
 285   | Parsing.Parse_error ->
 286       failwith ("parse error \n = " ^ error_msg ())
 287   | Semantic_c.Semantic (s, i) ->
 288       failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ())
 289   | e -> raise e
 290
 291
 292
 293
 294 (*****************************************************************************)
 295 (* Parsing subelements, useful to debug parser *)
 296 (*****************************************************************************)
 297
 298 (*
 299  * !!!Those function use refs, and are not reentrant !!! so take care.
 300  * It use globals defined in Lexer_parser.
 301  *)
 302
 303
 304 (* old:
 305  *   let parse_gen parsefunc s =
 306  *     let lexbuf = Lexing.from_string s in
 307  *     let result = parsefunc Lexer_c.token lexbuf in
 308  *     result
 309  *)
 310
 311 let parse_gen parsefunc s =
 312   let toks = tokens_of_string s +> List.filter TH.is_not_comment in
 313
 314
 315   (* Why use this lexing scheme ? Why not classically give lexer func
 316    * to parser ? Because I now keep comments in lexer. Could
 317    * just do a simple wrapper that when comment ask again for a token,
 318    * but maybe simpler to use cur_tok technique.
 319    *)
 320   let all_tokens = ref toks in
 321   let cur_tok    = ref (List.hd !all_tokens) in
 322
 323   let lexer_function =
 324     (fun _ ->
 325       if TH.is_eof !cur_tok
 326       then (pr2_err "LEXER: ALREADY AT END"; !cur_tok)
 327       else
 328         let v = Common.pop2 all_tokens in
 329         cur_tok := v;
 330         !cur_tok
 331     )
 332   in
 333   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
 334   let result = parsefunc lexer_function lexbuf_fake in
 335   result
 336
 337
 338 let type_of_string       = parse_gen Parser_c.type_name
 339 let statement_of_string  = parse_gen Parser_c.statement
 340 let expression_of_string = parse_gen Parser_c.expr
 341
 342 (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *)
 343
 344
 345
 346
 347
 348 (*****************************************************************************)
 349 (* Consistency checking *)
 350 (*****************************************************************************)
 351
 352 (* todo:
 353  *  could check that an ident has always the same class, be it a typedef
 354  *  (but sometimes do 'acpi_val acpi_val;'), an ident, a TMacroStatement,
 355  *  etc.
 356  *)
 357
 358 type class_ident =
 359   | CIdent (* can be var, func, field, tag, enum constant *)
 360   | CTypedef
 361
 362 let str_of_class_ident = function
 363   | CIdent -> "Ident"
 364   | CTypedef -> "Typedef"
 365
 366 (*
 367   | CMacro
 368   | CMacroString
 369   | CMacroStmt
 370   | CMacroDecl
 371   | CMacroIterator
 372   | CAttr
 373
 374 (* but take care that must still be able to use '=' *)
 375 type context = InFunction | InEnum | InStruct | InInitializer | InParams
 376 type class_token =
 377   | CIdent of class_ident
 378
 379   | CComment
 380   | CSpace
 381   | CCommentCpp of cppkind
 382   | CCommentMisc
 383   | CCppDirective
 384
 385   | COPar
 386   | CCPar
 387   | COBrace
 388   | CCBrace
 389
 390   | CSymbol
 391   | CReservedKwd (type | decl | qualif | flow | misc | attr)
 392 *)
 393
 394 let ident_to_typename ident : Ast_c.fullType =
 395   Ast_c.mk_ty (Ast_c.TypeName  (ident, Ast_c.noTypedefDef())) Ast_c.noii
 396
 397
 398 (* parse_typedef_fix4 *)
 399 let consistency_checking2 xs =
 400
 401   (* first phase, gather data *)
 402   let stat = Hashtbl.create 101 in
 403
 404   (* default value for hash *)
 405   let v1 () = Hashtbl.create 101 in
 406   let v2 () = ref 0 in
 407
 408   let bigf = { Visitor_c.default_visitor_c with
 409
 410     Visitor_c.kexpr = (fun (k,bigf) x ->
 411       match Ast_c.unwrap_expr x with
 412       | Ast_c.Ident (id) ->
 413           let s = Ast_c.str_of_name id in
 414           stat +>
 415             Common.hfind_default s v1 +> Common.hfind_default CIdent v2 +>
 416             (fun aref -> incr aref)
 417
 418       | _ -> k x
 419     );
 420     Visitor_c.ktype = (fun (k,bigf) t ->
 421       match Ast_c.unwrap_typeC t with
 422       | Ast_c.TypeName (name,_typ) ->
 423           let s = Ast_c.str_of_name name in
 424           stat +>
 425             Common.hfind_default s v1 +> Common.hfind_default CTypedef v2 +>
 426             (fun aref -> incr aref)
 427
 428       | _ -> k t
 429     );
 430   }
 431   in
 432   xs +> List.iter (fun (p, info_item) -> Visitor_c.vk_toplevel bigf p);
 433
 434
 435   let ident_to_type = ref [] in
 436
 437
 438   (* second phase, analyze data *)
 439   stat +> Hashtbl.iter (fun k v ->
 440     let xs = Common.hash_to_list v in
 441     if List.length xs >= 2
 442     then begin
 443       pr2_err ("CONFLICT:" ^ k);
 444       let sorted = xs +> List.sort (fun (ka,va) (kb,vb) ->
 445         if !va =|= !vb then
 446           (match ka, kb with
 447           | CTypedef, _ -> 1 (* first is smaller *)
 448           | _, CTypedef -> -1
 449           | _ -> 0
 450           )
 451         else compare !va !vb
 452       ) in
 453       let sorted = List.rev sorted in
 454       match sorted with
 455       | [CTypedef, i1;CIdent, i2] ->
 456           pr2_err ("transforming some ident in typedef");
 457           push2 k ident_to_type;
 458       | _ ->
 459           pr2_err ("TODO:other transforming?");
 460
 461     end
 462   );
 463
 464   (* third phase, update ast.
 465    * todo? but normally should try to handle correctly scope ? maybe sometime
 466    * sizeof(id) and even if id was for a long time an identifier, maybe
 467    * a few time, because of the scope it's actually really a type.
 468    *)
 469   if (null !ident_to_type)
 470   then xs
 471   else
 472     let bigf = { Visitor_c.default_visitor_c_s with
 473       Visitor_c.kdefineval_s = (fun (k,bigf) x ->
 474         match x with
 475         | Ast_c.DefineExpr e ->
 476             (match Ast_c.unwrap_expr e with
 477             | Ast_c.Ident (ident)  ->
 478                 let s = Ast_c.str_of_name ident in
 479                 if List.mem s !ident_to_type
 480                 then
 481                   let t = ident_to_typename ident in
 482                   Ast_c.DefineType t
 483                 else k x
 484             | _ -> k x
 485             )
 486         | _ -> k x
 487       );
 488       Visitor_c.kexpr_s = (fun (k, bigf) x ->
 489         match Ast_c.get_e_and_ii x with
 490         | (Ast_c.SizeOfExpr e, tref), isizeof ->
 491             let i1 = tuple_of_list1 isizeof in
 492             (match Ast_c.get_e_and_ii e with
 493             | (Ast_c.ParenExpr e, _), iiparen ->
 494                 let (i2, i3) = tuple_of_list2 iiparen in
 495                 (match Ast_c.get_e_and_ii e with
 496                 | (Ast_c.Ident (ident), _), _ii  ->
 497
 498                     let s = Ast_c.str_of_name ident in
 499                     if List.mem s !ident_to_type
 500                     then
 501                       let t = ident_to_typename ident in
 502                       (Ast_c.SizeOfType t, tref),[i1;i2;i3]
 503                     else  k x
 504                 | _ -> k x
 505                 )
 506             | _ -> k x
 507             )
 508         | _ -> k x
 509       );
 510     } in
 511     xs +> List.map (fun (p, info_item) ->
 512       Visitor_c.vk_toplevel_s bigf p, info_item
 513     )
 514
 515
 516 let consistency_checking a  =
 517   Common.profile_code "C consistencycheck" (fun () -> consistency_checking2 a)
 518
 519
 520
 521 (*****************************************************************************)
 522 (* Error recovery *)
 523 (*****************************************************************************)
 524
 525 let is_define_passed passed =
 526   let xs = passed +> List.rev +> List.filter TH.is_not_comment in
 527   if List.length xs >= 2
 528   then
 529     (match Common.head_middle_tail xs with
 530     | Parser_c.TDefine _, _, Parser_c.TDefEOL _ ->
 531         true
 532     | _ -> false
 533     )
 534   else begin
 535     pr2_err "WEIRD: length list of error recovery tokens < 2 ";
 536     false
 537   end
 538
 539 let is_defined_passed_bis last_round =
 540   let xs = last_round +> List.filter TH.is_not_comment in
 541   match xs with
 542   | Parser_c.TDefine _::_ -> true
 543   | _ -> false
 544
 545 (* ---------------------------------------------------------------------- *)
 546
 547
 548 (* todo: do something if find Parser_c.Eof ? *)
 549 let rec find_next_synchro next already_passed =
 550
 551   (* Maybe because not enough }, because for example an ifdef contains
 552    * in both branch some opening {, we later eat too much, "on deborde
 553    * sur la fonction d'apres". So already_passed may be too big and
 554    * looking for next synchro point starting from next may not be the
 555    * best. So maybe we can find synchro point inside already_passed
 556    * instead of looking in next.
 557    *
 558    * But take care! must progress. We must not stay in infinite loop!
 559    * For instance now I have as a error recovery to look for
 560    * a "start of something", corresponding to start of function,
 561    * but must go beyond this start otherwise will loop.
 562    * So look at premier(external_declaration2) in parser.output and
 563    * pass at least those first tokens.
 564    *
 565    * I have chosen to start search for next synchro point after the
 566    * first { I found, so quite sure we will not loop. *)
 567
 568   let last_round = List.rev already_passed in
 569   if is_defined_passed_bis last_round
 570   then find_next_synchro_define (last_round ++ next) []
 571   else
 572
 573   let (before, after) =
 574     last_round +> Common.span (fun tok ->
 575       match tok with
 576       (* by looking at TOBrace we are sure that the "start of something"
 577        * will not arrive too early
 578        *)
 579       | Parser_c.TOBrace _ -> false
 580       | Parser_c.TDefine _ -> false
 581       | _ -> true
 582     )
 583   in
 584   find_next_synchro_orig (after ++ next)  (List.rev before)
 585
 586
 587
 588 and find_next_synchro_define next already_passed =
 589   match next with
 590   | [] ->
 591       pr2_err "ERROR-RECOV: end of file while in recovery mode";
 592       already_passed, []
 593   | (Parser_c.TDefEOL i as v)::xs  ->
 594       pr2_err ("ERROR-RECOV: found sync end of #define, line "^i_to_s(TH.line_of_tok v));
 595       v::already_passed, xs
 596   | v::xs ->
 597       find_next_synchro_define xs (v::already_passed)
 598
 599
 600
 601
 602 and find_next_synchro_orig next already_passed =
 603   match next with
 604   | [] ->
 605       pr2_err "ERROR-RECOV: end of file while in recovery mode";
 606       already_passed, []
 607
 608   | (Parser_c.TCBrace i as v)::xs when TH.col_of_tok v =|= 0 ->
 609       pr2_err ("ERROR-RECOV: found sync '}' at line "^i_to_s (TH.line_of_tok v));
 610
 611       (match xs with
 612       | [] -> raise Impossible (* there is a EOF token normally *)
 613
 614       (* still useful: now parser.mly allow empty ';' so normally no pb *)
 615       | Parser_c.TPtVirg iptvirg::xs ->
 616           pr2_err "ERROR-RECOV: found sync bis, eating } and ;";
 617           (Parser_c.TPtVirg iptvirg)::v::already_passed, xs
 618
 619       | Parser_c.TIdent x::Parser_c.TPtVirg iptvirg::xs ->
 620           pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;";
 621           (Parser_c.TPtVirg iptvirg)::(Parser_c.TIdent x)::v::already_passed,
 622           xs
 623
 624       | Parser_c.TCommentSpace sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg
 625         ::xs ->
 626           pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;";
 627           (Parser_c.TCommentSpace sp)::
 628             (Parser_c.TPtVirg iptvirg)::
 629             (Parser_c.TIdent x)::
 630             v::
 631             already_passed,
 632           xs
 633
 634       | Parser_c.TCommentNewline sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg
 635         ::xs ->
 636           pr2_err "ERROR-RECOV: found sync bis, eating ident, }, and ;";
 637           (Parser_c.TCommentNewline sp)::
 638             (Parser_c.TPtVirg iptvirg)::
 639             (Parser_c.TIdent x)::
 640             v::
 641             already_passed,
 642           xs
 643
 644       | _ ->
 645           v::already_passed, xs
 646       )
 647   | v::xs when TH.col_of_tok v =|= 0 && TH.is_start_of_something v  ->
 648       pr2_err ("ERROR-RECOV: found sync col 0 at line "^ i_to_s(TH.line_of_tok v));
 649       already_passed, v::xs
 650
 651   | v::xs ->
 652       find_next_synchro_orig xs (v::already_passed)
 653
 654
 655 (*****************************************************************************)
 656 (* Macro problem recovery *)
 657 (*****************************************************************************)
 658 module TV = Token_views_c
 659
 660 let candidate_macros_in_passed2 passed defs_optional =
 661   let res = ref [] in
 662   let res2 = ref [] in
 663
 664   passed +> List.iter (function
 665   | Parser_c.TIdent (s,_)
 666    (* bugfix: may have to undo some infered things *)
 667   | Parser_c.TMacroIterator (s,_)
 668   | Parser_c.TypedefIdent (s,_)
 669     ->
 670       (match Common.hfind_option s defs_optional with
 671       | Some def ->
 672           if s ==~ Parsing_hacks.regexp_macro
 673           then
 674             (* pr2 (spf "candidate: %s" s); *)
 675             Common.push2 (s, def) res
 676           else
 677             Common.push2 (s, def) res2
 678         | None -> ()
 679         )
 680
 681   | _ -> ()
 682   );
 683   if null !res
 684   then !res2
 685   else !res
 686
 687 let candidate_macros_in_passed a b =
 688   Common.profile_code "MACRO managment" (fun () ->
 689     candidate_macros_in_passed2 a b)
 690
 691
 692
 693 let find_optional_macro_to_expand2 ~defs toks =
 694
 695   let defs = Common.hash_of_list defs in
 696
 697   let toks = toks +> Common.map (function
 698
 699     (* special cases to undo *)
 700     | Parser_c.TMacroIterator (s, ii) ->
 701         if Hashtbl.mem defs s
 702         then Parser_c.TIdent (s, ii)
 703         else Parser_c.TMacroIterator (s, ii)
 704
 705     | Parser_c.TypedefIdent (s, ii) ->
 706         if Hashtbl.mem defs s
 707         then Parser_c.TIdent (s, ii)
 708         else Parser_c.TypedefIdent (s, ii)
 709
 710     | x -> x
 711   ) in
 712
 713   let tokens = toks in
 714   Parsing_hacks.fix_tokens_cpp ~macro_defs:defs tokens
 715
 716   (* just calling apply_macro_defs and having a specialized version
 717    * of the code in fix_tokens_cpp is not enough as some work such
 718    * as the passing of the body of attribute in Parsing_hacks.find_macro_paren
 719    * will not get the chance to be run on the new expanded tokens.
 720    * Hence even if it's expensive, it's currently better to
 721    * just call directly fix_tokens_cpp again here.
 722
 723   let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
 724   let cleaner = !tokens2 +> Parsing_hacks.filter_cpp_stuff in
 725   let paren_grouped = TV.mk_parenthised  cleaner in
 726   Cpp_token_c.apply_macro_defs
 727     ~msg_apply_known_macro:(fun s -> pr2 (spf "APPLYING: %s" s))
 728     ~msg_apply_known_macro_hint:(fun s -> pr2 "hint")
 729     defs paren_grouped;
 730   (* because the before field is used by apply_macro_defs *)
 731   tokens2 := TV.rebuild_tokens_extented !tokens2;
 732   Parsing_hacks.insert_virtual_positions
 733     (!tokens2 +> Common.acc_map (fun x -> x.TV.tok))
 734   *)
 735 let find_optional_macro_to_expand ~defs a =
 736     Common.profile_code "MACRO managment" (fun () ->
 737       find_optional_macro_to_expand2 ~defs a)
 738
 739
 740
 741 (*****************************************************************************)
 742 (* Include/Define hacks *)
 743 (*****************************************************************************)
 744
 745 (* Sometimes I prefer to generate a single token for a list of things in the
 746  * lexer so that if I have to passed them, like for passing TInclude then
 747  * it's easy. Also if I don't do a single token, then I need to
 748  * parse the rest which may not need special stuff, like detecting
 749  * end of line which the parser is not really ready for. So for instance
 750  * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
 751  * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
 752  * but this kind of token is valid only after a #include and the
 753  * lexing and parsing rules are different for such tokens so not that
 754  * easy to parse such things in parser_c.mly. Hence the following hacks.
 755  *
 756  * less?: maybe could get rid of this like I get rid of some of fix_define.
 757  *)
 758
 759 (* ------------------------------------------------------------------------- *)
 760 (* helpers *)
 761 (* ------------------------------------------------------------------------- *)
 762
 763 (* used to generate new token from existing one *)
 764 let new_info posadd str ii =
 765   { Ast_c.pinfo =
 766       Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
 767         charpos = Ast_c.pos_of_info ii + posadd;
 768         str     = str;
 769         column = Ast_c.col_of_info ii + posadd;
 770       };
 771     (* must generate a new ref each time, otherwise share *)
 772     cocci_tag = ref Ast_c.emptyAnnot;
 773     comments_tag = ref Ast_c.emptyComments;
 774    }
 775
 776
 777 let rec comment_until_defeol xs =
 778   match xs with
 779   | [] ->
 780       (* job not done in Cpp_token_c.define_parse ? *)
 781       failwith "cant find end of define token TDefEOL"
 782   | x::xs ->
 783       (match x with
 784       | Parser_c.TDefEOL i ->
 785           Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
 786           ::xs
 787       | _ ->
 788           let x' =
 789             (* bugfix: otherwise may lose a TComment token *)
 790             if TH.is_real_comment x
 791             then x
 792             else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
 793           in
 794           x'::comment_until_defeol xs
 795       )
 796
 797 let drop_until_defeol xs =
 798   List.tl
 799     (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
 800
 801
 802
 803 (* ------------------------------------------------------------------------- *)
 804 (* returns a pair (replaced token, list of next tokens) *)
 805 (* ------------------------------------------------------------------------- *)
 806
 807 let tokens_include (info, includes, filename, inifdef) =
 808   Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
 809   [Parser_c.TIncludeFilename
 810       (filename, (new_info (String.length includes) filename info))
 811   ]
 812
 813 (*****************************************************************************)
 814 (* Parsing default define macros, usually in a standard.h file *)
 815 (*****************************************************************************)
 816
 817 let parse_cpp_define_file2 file =
 818   Common.save_excursion Flag_parsing_c.verbose_lexing (fun () ->
 819     Flag_parsing_c.verbose_lexing := false;
 820     let toks = tokens ~profile:false file in
 821     let toks = Cpp_token_c.fix_tokens_define toks in
 822     Cpp_token_c.extract_cpp_define toks
 823   )
 824
 825 let parse_cpp_define_file a =
 826   Common.profile_code_exclusif "HACK" (fun () -> parse_cpp_define_file2 a)
 827
 828
 829
 830 let (_defs : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 831   ref (Hashtbl.create 101)
 832
 833 let (_defs_builtins : (string, Cpp_token_c.define_def) Hashtbl.t ref)  =
 834   ref (Hashtbl.create 101)
 835
 836
 837 (* can not be put in parsing_hack, cos then mutually recursive problem as
 838  * we also want to parse the standard.h file.
 839  *)
 840 let init_defs_macros std_h =
 841   if not (Common.lfile_exists std_h)
 842   then pr2 ("warning: Can't find default macro file: " ^ std_h)
 843   else begin
 844     pr2 ("init_defs: " ^ std_h);
 845     _defs := Common.hash_of_list (parse_cpp_define_file std_h);
 846   end
 847
 848 let init_defs_builtins file_h =
 849   if not (Common.lfile_exists file_h)
 850   then pr2 ("warning: Can't find macro file: " ^ file_h)
 851   else begin
 852     pr2 ("init_defs_builtins: " ^ file_h);
 853     _defs_builtins :=
 854       Common.hash_of_list (parse_cpp_define_file file_h);
 855   end
 856
 857
 858 (*****************************************************************************)
 859 (* Main entry point *)
 860 (*****************************************************************************)
 861
 862 type info_item =  string * Parser_c.token list
 863
 864 type program2 = toplevel2 list
 865      and toplevel2 = Ast_c.toplevel * info_item
 866
 867 let program_of_program2 xs =
 868   xs +> List.map fst
 869
 870 let with_program2 f program2 =
 871   program2
 872   +> Common.unzip
 873   +> (fun (program, infos) ->
 874     f program, infos
 875   )
 876   +> Common.uncurry Common.zip
 877
 878
 879
 880 (* The use of local refs (remaining_tokens, passed_tokens, ...) makes
 881  * possible error recovery. Indeed, they allow to skip some tokens and
 882  * still be able to call again the ocamlyacc parser. It is ugly code
 883  * because we cant modify ocamllex and ocamlyacc. As we want some
 884  * extended lexing tricks, we have to use such refs.
 885  *
 886  * Those refs are now also used for my lalr(k) technique. Indeed They
 887  * store the futur and previous tokens that were parsed, and so
 888  * provide enough context information for powerful lex trick.
 889  *
 890  * - passed_tokens_last_ckp stores the passed tokens since last
 891  *   checkpoint. Used for NotParsedCorrectly and also to build the
 892  *   info_item attached to each program_element.
 893  * - passed_tokens_clean is used for lookahead, in fact for lookback.
 894  * - remaining_tokens_clean is used for lookahead. Now remaining_tokens
 895  *   contain some comments and so would make pattern matching difficult
 896  *   in lookahead. Hence this variable. We would like also to get rid
 897  *   of cpp instruction because sometimes a cpp instruction is between
 898  *   two tokens and makes a pattern matching fail. But lookahead also
 899  *   transform some cpp instruction (in comment) so can't remove them.
 900  *
 901  * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens,
 902  * whereas passed_tokens_clean and remaining_tokens_clean does not contain
 903  * comment-tokens.
 904  *
 905  * Normally we have:
 906  * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens
 907  *    after the call to pop2.
 908  * toks = (reverse passed_tok) ++ remaining_tokens
 909  *     at the and of the lexer_function call.
 910  * At the very beginning, cur_tok and remaining_tokens overlap, but not after.
 911  * At the end of lexer_function call,  cur_tok  overlap  with passed_tok.
 912  *
 913  * convention: I use "tr"  for "tokens refs"
 914  *
 915  * I now also need this lexing trick because the lexer return comment
 916  * tokens.
 917  *)
 918
 919 type tokens_state = {
 920   mutable rest :         Parser_c.token list;
 921   mutable rest_clean :   Parser_c.token list;
 922   mutable current :      Parser_c.token;
 923   (* it's passed since last "checkpoint", not passed from the beginning *)
 924   mutable passed :       Parser_c.token list;
 925   mutable passed_clean : Parser_c.token list;
 926 }
 927
 928 let mk_tokens_state toks =
 929   {
 930     rest       = toks;
 931     rest_clean = (toks +> List.filter TH.is_not_comment);
 932     current    = (List.hd toks);
 933     passed = [];
 934     passed_clean = [];
 935   }
 936
 937
 938
 939 let clone_tokens_state tr =
 940   { rest = tr.rest;
 941     rest_clean = tr.rest_clean;
 942     current = tr.current;
 943     passed = tr.passed;
 944     passed_clean = tr.passed_clean;
 945   }
 946 let copy_tokens_state ~src ~dst =
 947   dst.rest <- src.rest;
 948   dst.rest_clean <- src.rest_clean;
 949   dst.current <- src.current;
 950   dst.passed <- src.passed;
 951   dst.passed_clean <-  src.passed_clean;
 952   ()
 953
 954 (* todo? agglomerate the x##b ? *)
 955 let rec filter_noise n xs =
 956   match n, xs with
 957   | _, [] -> []
 958   | 0, xs -> xs
 959   | n, x::xs ->
 960       (match x with
 961       | Parser_c.TMacroAttr _ ->
 962           filter_noise (n-1) xs
 963       | _ ->
 964           x::filter_noise (n-1) xs
 965       )
 966
 967 let clean_for_lookahead xs =
 968   match xs with
 969   | [] -> []
 970   | [x] -> [x]
 971   | x::xs ->
 972       x::filter_noise 10 xs
 973
 974
 975
 976 (* Hacked lex. This function use refs passed by parse_print_error_heuristic
 977  * tr means token refs.
 978  *)
 979 let rec lexer_function ~pass tr = fun lexbuf ->
 980   match tr.rest with
 981   | [] -> pr2_err "ALREADY AT END"; tr.current
 982   | v::xs ->
 983     tr.rest <- xs;
 984     tr.current <- v;
 985
 986     if !Flag_parsing_c.debug_lexer then Common.pr2_gen v;
 987
 988     if TH.is_comment v
 989     then begin
 990       tr.passed <- v::tr.passed;
 991       lexer_function ~pass tr lexbuf
 992     end
 993     else begin
 994       let x = List.hd tr.rest_clean  in
 995       tr.rest_clean <- List.tl tr.rest_clean;
 996       assert (x =*= v);
 997
 998       (match v with
 999
1000       (* fix_define1.
1001        *
1002        * Why not in parsing_hacks lookahead and do passing like
1003        * I do for some ifdef directives ? Because here I also need to
1004        * generate some tokens sometimes and so I need access to the
1005        * tr.passed, tr.rest, etc.
1006        *)
1007       | Parser_c.TDefine (tok) ->
1008           if not (LP.current_context () =*= LP.InTopLevel) &&
1009             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
1010           then begin
1011             incr Stat.nDefinePassing;
1012             pr2_once ("CPP-DEFINE: inside function, I treat it as comment");
1013             let v' = Parser_c.TCommentCpp (Token_c.CppDirective,TH.info_of_tok v)
1014             in
1015             tr.passed <- v'::tr.passed;
1016             tr.rest       <- comment_until_defeol tr.rest;
1017             tr.rest_clean <- drop_until_defeol tr.rest_clean;
1018             lexer_function ~pass tr lexbuf
1019           end
1020           else begin
1021             tr.passed <- v::tr.passed;
1022             tr.passed_clean <- v::tr.passed_clean;
1023             v
1024           end
1025
1026       | Parser_c.TInclude (includes, filename, inifdef, info) ->
1027           if not (LP.current_context () =*= LP.InTopLevel)  &&
1028             (!Flag_parsing_c.cpp_directive_passing || (pass >= 2))
1029           then begin
1030             incr Stat.nIncludePassing;
1031             pr2_once ("CPP-INCLUDE: inside function, I treat it as comment");
1032             let v = Parser_c.TCommentCpp(Token_c.CppDirective, info) in
1033             tr.passed <- v::tr.passed;
1034             lexer_function ~pass tr lexbuf
1035           end
1036           else begin
1037             let (v,new_tokens) =
1038               tokens_include (info, includes, filename, inifdef) in
1039             let new_tokens_clean =
1040               new_tokens +> List.filter TH.is_not_comment  in
1041
1042             tr.passed <- v::tr.passed;
1043             tr.passed_clean <- v::tr.passed_clean;
1044             tr.rest <- new_tokens ++ tr.rest;
1045             tr.rest_clean <- new_tokens_clean ++ tr.rest_clean;
1046             v
1047           end
1048
1049       | _ ->
1050
1051           (* typedef_fix1 *)
1052           let v = match v with
1053             | Parser_c.TIdent (s, ii) ->
1054                 if
1055                   LP.is_typedef s &&
1056                     not (!Flag_parsing_c.disable_add_typedef) &&
1057                     pass =|= 1
1058                 then Parser_c.TypedefIdent (s, ii)
1059                 else Parser_c.TIdent (s, ii)
1060             | x -> x
1061           in
1062
1063           let v = Parsing_hacks.lookahead ~pass
1064             (clean_for_lookahead (v::tr.rest_clean))
1065             tr.passed_clean in
1066
1067           tr.passed <- v::tr.passed;
1068
1069           (* the lookahead may have changed the status of the token and
1070            * consider it as a comment, for instance some #include are
1071            * turned into comments, hence this code. *)
1072           match v with
1073           | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf
1074           | v ->
1075               tr.passed_clean <- v::tr.passed_clean;
1076               v
1077       )
1078     end
1079
1080
1081 let max_pass = 4
1082
1083
1084 let get_one_elem ~pass tr (file, filelines) =
1085
1086   if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef
1087   then pr2_err "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn";
1088
1089   (* normally have to do that only when come from an exception in which
1090    * case the dt() may not have been done
1091    * TODO but if was in scoped scope ? have to let only the last scope
1092    * so need do a LP.lexer_reset_typedef ();
1093    *)
1094   LP.enable_typedef();
1095   LP._lexer_hint := (LP.default_hint ());
1096   LP.save_typedef_state();
1097
1098   tr.passed <- [];
1099
1100   let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
1101
1102   (try
1103       (* -------------------------------------------------- *)
1104       (* Call parser *)
1105       (* -------------------------------------------------- *)
1106       Common.profile_code_exclusif "YACC" (fun () ->
1107         Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake)
1108       )
1109     with e ->
1110       LP.restore_typedef_state();
1111
1112       (* must keep here, before the code that adjusts the tr fields *)
1113       let line_error = TH.line_of_tok tr.current in
1114
1115       let passed_before_error = tr.passed in
1116       let current = tr.current in
1117
1118       (*  error recovery, go to next synchro point *)
1119       let (passed', rest') = find_next_synchro tr.rest tr.passed in
1120       tr.rest <- rest';
1121       tr.passed <- passed';
1122
1123       tr.current <- List.hd passed';
1124       tr.passed_clean <- [];           (* enough ? *)
1125       (* with error recovery, rest and rest_clean may not be in sync *)
1126       tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment);
1127
1128
1129       let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in
1130       Right (info_of_bads,  line_error,
1131             tr.passed, passed_before_error,
1132             current, e)
1133   )
1134
1135
1136
1137
1138 (* note: as now we go in 2 passes, there is first all the error message of
1139  * the lexer, and then the error of the parser. It is not anymore
1140  * interwinded.
1141  *
1142  * !!!This function use refs, and is not reentrant !!! so take care.
1143  * It use globals defined in Lexer_parser and also the _defs global
1144  * in parsing_hack.ml.
1145  *
1146  * This function uses internally some semi globals in the
1147  * tokens_stat record and parsing_stat record.
1148  *)
1149
1150 let parse_print_error_heuristic2 file =
1151
1152   let filelines = Common.cat_array file in
1153   let stat = Parsing_stat.default_stat file in
1154
1155   (* -------------------------------------------------- *)
1156   (* call lexer and get all the tokens *)
1157   (* -------------------------------------------------- *)
1158   LP.lexer_reset_typedef();
1159   Parsing_hacks.ifdef_paren_cnt := 0;
1160
1161   let toks_orig = tokens file in
1162
1163   let toks = Cpp_token_c.fix_tokens_define toks_orig in
1164
1165   let toks = Parsing_hacks.fix_tokens_cpp ~macro_defs:!_defs_builtins toks in
1166
1167   (* expand macros on demand trick, preparation phase *)
1168   let macros =
1169     Common.profile_code "MACRO mgmt prep 1" (fun () ->
1170       let macros = Hashtbl.copy !_defs in
1171       (* include also builtins as some macros may generate some builtins too
1172        * like __decl_spec or __stdcall
1173        *)
1174       !_defs_builtins +> Hashtbl.iter (fun s def ->
1175         Hashtbl.replace macros   s def;
1176       );
1177       macros
1178     )
1179   in
1180   Common.profile_code "MACRO mgmt prep 2" (fun () ->
1181     let local_macros = parse_cpp_define_file file in
1182     local_macros +> List.iter (fun (s, def) ->
1183       Hashtbl.replace macros   s def;
1184     );
1185   );
1186
1187   let tr = mk_tokens_state toks in
1188
1189   let rec loop tr =
1190
1191     (* todo?: I am not sure that it represents current_line, cos maybe
1192      * tr.current partipated in the previous parsing phase, so maybe tr.current
1193      * is not the first token of the next parsing phase. Same with checkpoint2.
1194      * It would be better to record when we have a } or ; in parser.mly,
1195      *  cos we know that they are the last symbols of external_declaration2.
1196      *
1197      * bugfix: may not be equal to 'file' as after macro expansions we can
1198      * start to parse a new entity from the body of a macro, for instance
1199      * when parsing a define_machine() body, cf standard.h
1200      *)
1201     let checkpoint = TH.line_of_tok tr.current in
1202     let checkpoint_file = TH.file_of_tok tr.current in
1203
1204     (* call the parser *)
1205     let elem =
1206       let pass1 =
1207         Common.profile_code "Parsing: 1st pass" (fun () ->
1208           get_one_elem ~pass:1 tr (file, filelines)
1209         ) in
1210       match pass1 with
1211       | Left e -> Left e
1212       | Right (info,line_err, passed, passed_before_error, cur, exn) ->
1213           if !Flag_parsing_c.disable_multi_pass
1214           then pass1
1215           else begin
1216             Common.profile_code "Parsing: multi pass" (fun () ->
1217
1218             pr2_err "parsing pass2: try again";
1219             let toks = List.rev passed ++ tr.rest in
1220             let new_tr = mk_tokens_state toks in
1221             copy_tokens_state ~src:new_tr ~dst:tr;
1222             let passx = get_one_elem ~pass:2 tr (file, filelines) in
1223
1224             (match passx with
1225             | Left e -> passx
1226             | Right (info,line_err,passed,passed_before_error,cur,exn) ->
1227                 let candidates =
1228                   candidate_macros_in_passed passed macros
1229                 in
1230                 if is_define_passed passed || null candidates
1231                 then passx
1232                 else begin
1233                   (* todo factorize code *)
1234
1235                   pr2_err "parsing pass3: try again";
1236                   let toks = List.rev passed ++ tr.rest in
1237                   let toks' =
1238                     find_optional_macro_to_expand ~defs:candidates toks in
1239                   let new_tr = mk_tokens_state toks' in
1240                   copy_tokens_state ~src:new_tr ~dst:tr;
1241                   let passx = get_one_elem ~pass:3 tr (file, filelines) in
1242
1243                   (match passx with
1244                   | Left e -> passx
1245                   | Right (info,line_err,passed,passed_before_error,cur,exn) ->
1246                       pr2_err "parsing pass4: try again";
1247
1248                       let candidates =
1249                         candidate_macros_in_passed passed macros in
1250
1251                       let toks = List.rev passed ++ tr.rest in
1252                       let toks' =
1253                       find_optional_macro_to_expand ~defs:candidates toks in
1254                       let new_tr = mk_tokens_state toks' in
1255                       copy_tokens_state ~src:new_tr ~dst:tr;
1256                       let passx = get_one_elem ~pass:4 tr (file, filelines) in
1257                       passx
1258                   )
1259                  end
1260             )
1261             )
1262           end
1263     in
1264
1265
1266     (* again not sure if checkpoint2 corresponds to end of bad region *)
1267     let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *)
1268     let checkpoint2_file = TH.file_of_tok tr.current in
1269
1270     let diffline =
1271       if (checkpoint_file =$= checkpoint2_file) && (checkpoint_file =$= file)
1272       then (checkpoint2 - checkpoint)
1273       else 0
1274         (* TODO? so if error come in middle of something ? where the
1275          * start token was from original file but synchro found in body
1276          * of macro ? then can have wrong number of lines stat.
1277          * Maybe simpler just to look at tr.passed and count
1278          * the lines in the token from the correct file ?
1279          *)
1280     in
1281     let info = mk_info_item file (List.rev tr.passed) in
1282
1283     (* some stat updates *)
1284     stat.Stat.commentized <-
1285       stat.Stat.commentized + count_lines_commentized (snd info);
1286
1287     let elem =
1288       match elem with
1289       | Left e ->
1290           stat.Stat.correct <- stat.Stat.correct + diffline;
1291           e
1292       | Right (info_of_bads, line_error, toks_of_bads,
1293               _passed_before_error, cur, exn) ->
1294
1295           let was_define = is_define_passed tr.passed in
1296
1297           if was_define && !Flag_parsing_c.filter_msg_define_error
1298           then ()
1299           else begin
1300
1301             (match exn with
1302             | Lexer_c.Lexical _
1303             | Parsing.Parse_error
1304             | Semantic_c.Semantic _ -> ()
1305             | e -> raise e
1306             );
1307
1308             if !Flag_parsing_c.show_parsing_error
1309             then begin
1310               (match exn with
1311               (* Lexical is not anymore launched I think *)
1312               | Lexer_c.Lexical s ->
1313                   pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok cur)
1314               | Parsing.Parse_error ->
1315                   pr2 ("parse error \n = " ^ error_msg_tok cur)
1316               | Semantic_c.Semantic (s, i) ->
1317                   pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok cur)
1318               | e -> raise Impossible
1319               );
1320               (* bugfix: *)
1321               if (checkpoint_file =$= checkpoint2_file) &&
1322                 checkpoint_file =$= file
1323               then print_bad line_error (checkpoint, checkpoint2) filelines
1324               else pr2 "PB: bad: but on tokens not from original file"
1325             end;
1326
1327
1328             let pbline =
1329               toks_of_bads
1330               +> Common.filter (TH.is_same_line_or_close line_error)
1331               +> Common.filter TH.is_ident_like
1332             in
1333             let error_info =
1334               (pbline +> List.map TH.str_of_tok), line_error
1335             in
1336             stat.Stat.problematic_lines <-
1337               error_info::stat.Stat.problematic_lines;
1338
1339           end;
1340
1341           if was_define && !Flag_parsing_c.filter_define_error
1342           then stat.Stat.correct <- stat.Stat.correct + diffline
1343           else stat.Stat.bad     <- stat.Stat.bad     + diffline;
1344
1345           Ast_c.NotParsedCorrectly info_of_bads
1346     in
1347
1348     (match elem with
1349     | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)]
1350     | xs -> (xs, info):: loop tr (* recurse *)
1351     )
1352   in
1353   let v = loop tr in
1354   let v = consistency_checking v in
1355   (v, stat)
1356
1357
1358 let time_total_parsing a  =
1359   Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a)
1360
1361 let parse_print_error_heuristic a  =
1362   Common.profile_code "C parsing" (fun () -> time_total_parsing a)
1363
1364
1365 (* alias *)
1366 let parse_c_and_cpp a = parse_print_error_heuristic a
1367
1368 (*****************************************************************************)
1369 (* Same but faster cos memoize stuff *)
1370 (*****************************************************************************)
1371 let parse_cache file =
1372   if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file
1373   else
1374   let _ = pr2 "TOFIX" in
1375   let need_no_changed_files =
1376     (* should use Sys.argv.(0), would be safer. *)
1377
1378     [
1379       (* TOFIX
1380       Config.path ^ "/parsing_c/c_parser.cma";
1381       (* we may also depend now on the semantic patch because
1382          the SP may use macro and so we will disable some of the
1383          macro expansions from standard.h.
1384       *)
1385       !Config.std_h;
1386       *)
1387     ]
1388   in
1389   let need_no_changed_variables =
1390     (* could add some of the flags of flag_parsing_c.ml *)
1391     []
1392   in
1393   Common.cache_computation_robust
1394     file ".ast_raw"
1395     (need_no_changed_files, need_no_changed_variables) ".depend_raw"
1396     (fun () -> parse_print_error_heuristic file)
1397
1398
1399
1400 (*****************************************************************************)
1401 (* Some special cases *)
1402 (*****************************************************************************)
1403
1404 let (cstatement_of_string: string -> Ast_c.statement) = fun s ->
1405   let tmpfile = Common.new_temp_file "cocci_stmt_of_s" "c" in
1406   Common.write_file tmpfile ("void main() { \n" ^ s ^ "\n}");
1407   let program = parse_c_and_cpp tmpfile +> fst in
1408   program +> Common.find_some (fun (e,_) ->
1409     match e with
1410     | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st
1411     | _ -> None
1412   )
1413
1414 let (cexpression_of_string: string -> Ast_c.expression) = fun s ->
1415   let tmpfile = Common.new_temp_file "cocci_expr_of_s" "c" in
1416   Common.write_file tmpfile ("void main() { \n" ^ s ^ ";\n}");
1417   let program = parse_c_and_cpp tmpfile +> fst in
1418   program +> Common.find_some (fun (e,_) ->
1419     match e with
1420     | Ast_c.Definition ({Ast_c.f_body = compound},_) ->
1421         (match compound with
1422         | [Ast_c.StmtElem st] ->
1423             (match Ast_c.unwrap_st st with
1424             | Ast_c.ExprStatement (Some e) -> Some e
1425             | _ -> None
1426             )
1427         | _ -> None
1428         )
1429     | _ -> None
1430   )