(* Copyright (C) 2006, 2007, 2008 Yoann Padioleau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License (GPL) * version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * file license.txt for more details. *) open Common module TH = Token_helpers module LP = Lexer_parser module Stat = Parsing_stat (*****************************************************************************) (* Wrappers *) (*****************************************************************************) let pr2 s = if !Flag_parsing_c.verbose_parsing then Common.pr2 s let pr2_once s = if !Flag_parsing_c.verbose_parsing then Common.pr2_once s (*****************************************************************************) (* Helpers *) (*****************************************************************************) let lexbuf_to_strpos lexbuf = (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf) let token_to_strpos tok = (TH.str_of_tok tok, TH.pos_of_tok tok) let error_msg_tok tok = let file = TH.file_of_tok tok in if !Flag_parsing_c.verbose_parsing then Common.error_message file (token_to_strpos tok) else ("error in " ^ file ^ "set verbose_parsing for more info") let print_bad line_error (start_line, end_line) filelines = begin pr2 ("badcount: " ^ i_to_s (end_line - start_line)); for i = start_line to end_line do let line = filelines.(i) in if i = line_error then pr2 ("BAD:!!!!!" ^ " " ^ line) else pr2 ("bad:" ^ " " ^ line) done end let mk_info_item2 filename toks = let buf = Buffer.create 100 in let s = (* old: get_slice_file filename (line1, line2) *) begin toks +> List.iter (fun tok -> match TH.pinfo_of_tok tok with | Ast_c.OriginTok _ -> Buffer.add_string buf (TH.str_of_tok tok) | Ast_c.AbstractLineTok _ -> raise Impossible | _ -> () ); Buffer.contents buf end in (s, toks) let mk_info_item a b = Common.profile_code "C parsing.mk_info_item" (fun () -> mk_info_item2 a b) let info_same_line line xs = xs +> List.filter (fun info -> Ast_c.line_of_info info = line) (*****************************************************************************) (* Stats on what was passed/commentized *) (*****************************************************************************) let commentized xs = xs +> Common.map_filter (function | Parser_c.TCommentCpp (cppkind, ii) -> let s = Ast_c.str_of_info ii in let legal_passing = match !Flag_parsing_c.filter_passed_level with | 0 -> false | 1 -> List.mem cppkind [Ast_c.CppAttr] || (s =~ "__.*") | 2 -> List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal] || (s =~ "__.*") | 3 -> List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal;Ast_c.CppDirective] || (s =~ "__.*") | 4 -> List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal;Ast_c.CppMacro] || (s =~ "__.*") | 5 -> List.mem cppkind [Ast_c.CppAttr;Ast_c.CppPassingNormal;Ast_c.CppDirective;Ast_c.CppMacro] || (s =~ "__.*") | _ -> failwith "not valid level passing number" in if legal_passing then None else Some (ii.Ast_c.pinfo) (* | Ast_c.CppOther -> (match s with | s when s =~ "KERN_.*" -> None | s when s =~ "__.*" -> None | _ -> Some (ii.Ast_c.pinfo) ) *) | Parser_c.TCommentMisc ii | Parser_c.TAction ii -> Some (ii.Ast_c.pinfo) | _ -> None ) let count_lines_commentized xs = let line = ref (-1) in let count = ref 0 in begin commentized xs +> List.iter (function Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> let newline = pinfo.Common.line in if newline <> !line then begin line := newline; incr count end | _ -> ()); !count end let print_commentized xs = let line = ref (-1) in begin let ys = commentized xs in ys +> List.iter (function Ast_c.OriginTok pinfo | Ast_c.ExpandedTok (_,(pinfo,_)) -> let newline = pinfo.Common.line in let s = pinfo.Common.str in let s = Str.global_substitute (Str.regexp "\n") (fun s -> "") s in if newline = !line then prerr_string (s ^ " ") else begin if !line = -1 then pr2_no_nl "passed:" else pr2_no_nl "\npassed:"; line := newline; pr2_no_nl (s ^ " "); end | _ -> ()); if not (null ys) then pr2 ""; end (*****************************************************************************) (* Lexing only *) (*****************************************************************************) (* called by parse_print_error_heuristic *) let tokens2 file = let table = Common.full_charpos_to_pos file in Common.with_open_infile file (fun chan -> let lexbuf = Lexing.from_channel chan in try let rec tokens_aux acc = let tok = Lexer_c.token lexbuf in (* fill in the line and col information *) let tok = tok +> TH.visitor_info_of_tok (fun ii -> { ii with Ast_c.pinfo= (* could assert pinfo.filename = file ? *) match Ast_c.pinfo_of_info ii with Ast_c.OriginTok pi -> Ast_c.OriginTok (Common.complete_parse_info file table pi) | Ast_c.ExpandedTok (pi,vpi) -> Ast_c.ExpandedTok((Common.complete_parse_info file table pi),vpi) | Ast_c.FakeTok (s,vpi) -> Ast_c.FakeTok (s,vpi) | Ast_c.AbstractLineTok pi -> failwith "should not occur" }) in if TH.is_eof tok then List.rev (tok::acc) else tokens_aux (tok::acc) in tokens_aux [] with | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" ^ (Common.error_message file (lexbuf_to_strpos lexbuf))) | e -> raise e ) let time_lexing ?(profile=true) a = if profile then Common.profile_code_exclusif "LEXING" (fun () -> tokens2 a) else tokens2 a let tokens ?profile a = Common.profile_code "C parsing.tokens" (fun () -> time_lexing ?profile a) let tokens_of_string string = let lexbuf = Lexing.from_string string in try let rec tokens_s_aux () = let tok = Lexer_c.token lexbuf in if TH.is_eof tok then [tok] else tok::(tokens_s_aux ()) in tokens_s_aux () with | Lexer_c.Lexical s -> failwith ("lexical error " ^ s ^ "\n =" ) | e -> raise e (*****************************************************************************) (* Parsing, but very basic, no more used *) (*****************************************************************************) (* * !!!Those function use refs, and are not reentrant !!! so take care. * It use globals defined in Lexer_parser. * * update: because now lexer return comments tokens, those functions * may not work anymore. *) let parse file = let lexbuf = Lexing.from_channel (open_in file) in let result = Parser_c.main Lexer_c.token lexbuf in result let parse_print_error file = let chan = (open_in file) in let lexbuf = Lexing.from_channel chan in let error_msg () = Common.error_message file (lexbuf_to_strpos lexbuf) in try lexbuf +> Parser_c.main Lexer_c.token with | Lexer_c.Lexical s -> failwith ("lexical error " ^s^ "\n =" ^ error_msg ()) | Parsing.Parse_error -> failwith ("parse error \n = " ^ error_msg ()) | Semantic_c.Semantic (s, i) -> failwith ("semantic error " ^ s ^ "\n =" ^ error_msg ()) | e -> raise e (*****************************************************************************) (* Parsing subelements, useful to debug parser *) (*****************************************************************************) (* * !!!Those function use refs, and are not reentrant !!! so take care. * It use globals defined in Lexer_parser. *) (* old: * let parse_gen parsefunc s = * let lexbuf = Lexing.from_string s in * let result = parsefunc Lexer_c.token lexbuf in * result *) let parse_gen parsefunc s = let toks = tokens_of_string s +> List.filter TH.is_not_comment in (* Why use this lexing scheme ? Why not classically give lexer func * to parser ? Because I now keep comments in lexer. Could * just do a simple wrapper that when comment ask again for a token, * but maybe simpler to use cur_tok technique. *) let all_tokens = ref toks in let cur_tok = ref (List.hd !all_tokens) in let lexer_function = (fun _ -> if TH.is_eof !cur_tok then (pr2 "LEXER: ALREADY AT END"; !cur_tok) else let v = Common.pop2 all_tokens in cur_tok := v; !cur_tok ) in let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in let result = parsefunc lexer_function lexbuf_fake in result let type_of_string = parse_gen Parser_c.type_name let statement_of_string = parse_gen Parser_c.statement let expression_of_string = parse_gen Parser_c.expr (* ex: statement_of_string "(struct us_data* )psh->hostdata = NULL;" *) (*****************************************************************************) (* Consistency checking *) (*****************************************************************************) type class_ident = | CIdent (* can be var, func, field, tag, enum constant *) | CTypedef let str_of_class_ident = function | CIdent -> "Ident" | CTypedef -> "Typedef" (* | CMacro | CMacroString | CMacroStmt | CMacroDecl | CMacroIterator | CAttr (* but take care that must still be able to use '=' *) type context = InFunction | InEnum | InStruct | InInitializer | InParams type class_token = | CIdent of class_ident | CComment | CSpace | CCommentCpp of cppkind | CCommentMisc | CCppDirective | COPar | CCPar | COBrace | CCBrace | CSymbol | CReservedKwd (type | decl | qualif | flow | misc | attr) *) (* parse_typedef_fix4 *) let consistency_checking2 xs = (* first phase, gather data *) let stat = Hashtbl.create 101 in (* default value for hash *) let v1 () = Hashtbl.create 101 in let v2 () = ref 0 in let bigf = { Visitor_c.default_visitor_c with Visitor_c.kexpr = (fun (k,bigf) x -> match Ast_c.unwrap_expr x with | Ast_c.Ident s -> stat +> Common.hfind_default s v1 +> Common.hfind_default CIdent v2 +> (fun aref -> incr aref) | _ -> k x ); Visitor_c.ktype = (fun (k,bigf) t -> match Ast_c.unwrap_typeC t with | Ast_c.TypeName (s,_typ) -> stat +> Common.hfind_default s v1 +> Common.hfind_default CTypedef v2 +> (fun aref -> incr aref) | _ -> k t ); } in xs +> List.iter (fun (p, info_item) -> Visitor_c.vk_toplevel bigf p); let ident_to_type = ref [] in (* second phase, analyze data *) stat +> Hashtbl.iter (fun k v -> let xs = Common.hash_to_list v in if List.length xs >= 2 then begin pr2 ("CONFLICT:" ^ k); let sorted = xs +> List.sort (fun (ka,va) (kb,vb) -> if !va = !vb then (match ka, kb with | CTypedef, _ -> 1 (* first is smaller *) | _, CTypedef -> -1 | _ -> 0 ) else compare !va !vb ) in let sorted = List.rev sorted in match sorted with | [CTypedef, i1;CIdent, i2] -> pr2 ("transforming some ident in typedef"); push2 k ident_to_type; | _ -> pr2 ("TODO:other transforming?"); end ); (* third phase, update ast. * todo? but normally should try to handle correctly scope ? maybe sometime * sizeof(id) and even if id was for a long time an identifier, maybe * a few time, because of the scope it's actually really a type. *) if (null !ident_to_type) then xs else let bigf = { Visitor_c.default_visitor_c_s with Visitor_c.kdefineval_s = (fun (k,bigf) x -> match x with | Ast_c.DefineExpr e -> (match e with | (Ast_c.Ident s, _), ii when List.mem s !ident_to_type -> let t = (Ast_c.nQ, (Ast_c.TypeName (s, Ast_c.noTypedefDef()), ii)) in Ast_c.DefineType t | _ -> k x ) | _ -> k x ); Visitor_c.kexpr_s = (fun (k, bigf) x -> match x with | (Ast_c.SizeOfExpr e, tref), isizeof -> let i1 = tuple_of_list1 isizeof in (match e with | (Ast_c.ParenExpr e, _), iiparen -> (match e with | (Ast_c.Ident s, _), ii when List.mem s !ident_to_type -> let (i2, i3) = tuple_of_list2 iiparen in let t = (Ast_c.nQ, (Ast_c.TypeName (s, Ast_c.noTypedefDef()), ii)) in (Ast_c.SizeOfType t, tref), [i1;i2;i3] | _ -> k x ) | _ -> k x ) | _ -> k x ); } in xs +> List.map (fun (p, info_item) -> Visitor_c.vk_toplevel_s bigf p, info_item ) let consistency_checking a = Common.profile_code "C consistencycheck" (fun () -> consistency_checking2 a) (*****************************************************************************) (* Error recovery *) (*****************************************************************************) (* todo: do something if find Parser_c.Eof ? *) let rec find_next_synchro next already_passed = (* Maybe because not enough }, because for example an ifdef contains * in both branch some opening {, we later eat too much, "on deborde * sur la fonction d'apres". So already_passed may be too big and * looking for next synchro point starting from next may not be the * best. So maybe we can find synchro point inside already_passed * instead of looking in next. * * But take care! must progress. We must not stay in infinite loop! * For instance now I have as a error recovery to look for * a "start of something", corresponding to start of function, * but must go beyond this start otherwise will loop. * So look at premier(external_declaration2) in parser.output and * pass at least those first tokens. * * I have chosen to start search for next synchro point after the * first { I found, so quite sure we will not loop. *) let last_round = List.rev already_passed in let is_define = let xs = last_round +> List.filter TH.is_not_comment in match xs with | Parser_c.TDefine _::_ -> true | _ -> false in if is_define then find_next_synchro_define (last_round ++ next) [] else let (before, after) = last_round +> Common.span (fun tok -> match tok with (* by looking at TOBrace we are sure that the "start of something" * will not arrive too early *) | Parser_c.TOBrace _ -> false | Parser_c.TDefine _ -> false | _ -> true ) in find_next_synchro_orig (after ++ next) (List.rev before) and find_next_synchro_define next already_passed = match next with | [] -> pr2 "ERROR-RECOV: end of file while in recovery mode"; already_passed, [] | (Parser_c.TDefEOL i as v)::xs -> pr2 ("ERROR-RECOV: found sync end of #define, line "^i_to_s(TH.line_of_tok v)); v::already_passed, xs | v::xs -> find_next_synchro_define xs (v::already_passed) and find_next_synchro_orig next already_passed = match next with | [] -> pr2 "ERROR-RECOV: end of file while in recovery mode"; already_passed, [] | (Parser_c.TCBrace i as v)::xs when TH.col_of_tok v = 0 -> pr2 ("ERROR-RECOV: found sync '}' at line "^i_to_s (TH.line_of_tok v)); (match xs with | [] -> raise Impossible (* there is a EOF token normally *) (* still useful: now parser.mly allow empty ';' so normally no pb *) | Parser_c.TPtVirg iptvirg::xs -> pr2 "ERROR-RECOV: found sync bis, eating } and ;"; (Parser_c.TPtVirg iptvirg)::v::already_passed, xs | Parser_c.TIdent x::Parser_c.TPtVirg iptvirg::xs -> pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;"; (Parser_c.TPtVirg iptvirg)::(Parser_c.TIdent x)::v::already_passed, xs | Parser_c.TCommentSpace sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg ::xs -> pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;"; (Parser_c.TCommentSpace sp):: (Parser_c.TPtVirg iptvirg):: (Parser_c.TIdent x):: v:: already_passed, xs | Parser_c.TCommentNewline sp::Parser_c.TIdent x::Parser_c.TPtVirg iptvirg ::xs -> pr2 "ERROR-RECOV: found sync bis, eating ident, }, and ;"; (Parser_c.TCommentNewline sp):: (Parser_c.TPtVirg iptvirg):: (Parser_c.TIdent x):: v:: already_passed, xs | _ -> v::already_passed, xs ) | v::xs when TH.col_of_tok v = 0 && TH.is_start_of_something v -> pr2 ("ERROR-RECOV: found sync col 0 at line "^ i_to_s(TH.line_of_tok v)); already_passed, v::xs | v::xs -> find_next_synchro_orig xs (v::already_passed) (*****************************************************************************) (* Include/Define hacks *) (*****************************************************************************) (* Sometimes I prefer to generate a single token for a list of things in the * lexer so that if I have to passed them, like for passing TInclude then * it's easy. Also if I don't do a single token, then I need to * parse the rest which may not need special stuff, like detecting * end of line which the parser is not really ready for. So for instance * could I parse a #include as 2 or more tokens ? just * lex #include ? so then need recognize as one token ? * but this kind of token is valid only after a #include and the * lexing and parsing rules are different for such tokens so not that * easy to parse such things in parser_c.mly. Hence the following hacks. * * less?: maybe could get rid of this like I get rid of some of fix_define. *) (* ------------------------------------------------------------------------- *) (* helpers *) (* ------------------------------------------------------------------------- *) (* used to generate new token from existing one *) let new_info posadd str ii = { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with charpos = Ast_c.pos_of_info ii + posadd; str = str; column = Ast_c.col_of_info ii + posadd; }; (* must generate a new ref each time, otherwise share *) cocci_tag = ref Ast_c.emptyAnnot; comments_tag = ref Ast_c.emptyComments; } let rec comment_until_defeol xs = match xs with | [] -> failwith "cant find end of define token TDefEOL" | x::xs -> (match x with | Parser_c.TDefEOL i -> Parser_c.TCommentCpp (Ast_c.CppDirective, TH.info_of_tok x) ::xs | _ -> let x' = (* bugfix: otherwise may lose a TComment token *) if TH.is_real_comment x then x else Parser_c.TCommentCpp (Ast_c.CppPassingNormal (*good?*), TH.info_of_tok x) in x'::comment_until_defeol xs ) let drop_until_defeol xs = List.tl (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) (* ------------------------------------------------------------------------- *) (* returns a pair (replaced token, list of next tokens) *) (* ------------------------------------------------------------------------- *) let tokens_include (info, includes, filename, inifdef) = Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), [Parser_c.TIncludeFilename (filename, (new_info (String.length includes) filename info)) ] (*****************************************************************************) (* Parsing default define macros, usually in a standard.h file *) (*****************************************************************************) let parse_cpp_define_file2 file = let toks = tokens ~profile:false file in let toks = Parsing_hacks.fix_tokens_define toks in Parsing_hacks.extract_cpp_define toks let parse_cpp_define_file a = Common.profile_code_exclusif "HACK" (fun () -> parse_cpp_define_file2 a) (* can not be put in parsing_hack, cos then mutually recursive problem as * we also want to parse the standard.h file. *) let init_defs std_h = if not (Common.lfile_exists std_h) then pr2 ("warning: Can't find default macro file: " ^ std_h) else begin pr2 ("init_defs: " ^ std_h); Parsing_hacks._defs := Common.hash_of_list (parse_cpp_define_file std_h); end (*****************************************************************************) (* Main entry point *) (*****************************************************************************) type info_item = string * Parser_c.token list type program2 = toplevel2 list and toplevel2 = Ast_c.toplevel * info_item let program_of_program2 xs = xs +> List.map fst let with_program2 f program2 = program2 +> Common.unzip +> (fun (program, infos) -> f program, infos ) +> Common.uncurry Common.zip (* The use of local refs (remaining_tokens, passed_tokens, ...) makes * possible error recovery. Indeed, they allow to skip some tokens and * still be able to call again the ocamlyacc parser. It is ugly code * because we cant modify ocamllex and ocamlyacc. As we want some * extended lexing tricks, we have to use such refs. * * Those refs are now also used for my lalr(k) technique. Indeed They * store the futur and previous tokens that were parsed, and so * provide enough context information for powerful lex trick. * * - passed_tokens_last_ckp stores the passed tokens since last * checkpoint. Used for NotParsedCorrectly and also to build the * info_item attached to each program_element. * - passed_tokens_clean is used for lookahead, in fact for lookback. * - remaining_tokens_clean is used for lookahead. Now remaining_tokens * contain some comments and so would make pattern matching difficult * in lookahead. Hence this variable. We would like also to get rid * of cpp instruction because sometimes a cpp instruction is between * two tokens and makes a pattern matching fail. But lookahead also * transform some cpp instruction (in comment) so can't remove them. * * So remaining_tokens, passed_tokens_last_ckp contain comment-tokens, * whereas passed_tokens_clean and remaining_tokens_clean does not contain * comment-tokens. * * Normally we have: * toks = (reverse passed_tok) ++ cur_tok ++ remaining_tokens * after the call to pop2. * toks = (reverse passed_tok) ++ remaining_tokens * at the and of the lexer_function call. * At the very beginning, cur_tok and remaining_tokens overlap, but not after. * At the end of lexer_function call, cur_tok overlap with passed_tok. * * convention: I use "tr" for "tokens refs" * * I now also need this lexing trick because the lexer return comment * tokens. *) type tokens_state = { mutable rest : Parser_c.token list; mutable rest_clean : Parser_c.token list; mutable current : Parser_c.token; (* it's passed since last "checkpoint", not passed from the beginning *) mutable passed : Parser_c.token list; mutable passed_clean : Parser_c.token list; } let clone_tokens_stat tr = { rest = tr.rest; rest_clean = tr.rest_clean; current = tr.current; passed = tr.passed; passed_clean = tr.passed_clean; } let copy_tokens_stat ~src ~dst = dst.rest <- src.rest; dst.rest_clean <- src.rest_clean; dst.current <- src.current; dst.passed <- src.passed; dst.passed_clean <- src.passed_clean; () let rec filter_noise n xs = match n, xs with | _, [] -> [] | 0, xs -> xs | n, x::xs -> (match x with | Parser_c.TMacroAttr _ -> filter_noise (n-1) xs | _ -> x::filter_noise (n-1) xs ) let clean_for_lookahead xs = match xs with | [] -> [] | [x] -> [x] | x::xs -> x::filter_noise 10 xs (* Hacked lex. This function use refs passed by parse_print_error_heuristic * tr means token refs. *) let rec lexer_function ~pass tr = fun lexbuf -> match tr.rest with | [] -> pr2 "ALREADY AT END"; tr.current | v::xs -> tr.rest <- xs; tr.current <- v; if !Flag_parsing_c.debug_lexer then Common.pr2_gen v; if TH.is_comment v then begin tr.passed <- v::tr.passed; lexer_function ~pass tr lexbuf end else begin let x = List.hd tr.rest_clean in tr.rest_clean <- List.tl tr.rest_clean; assert (x = v); (match v with (* fix_define1. Why not in parsing_hacks lookahead and do passing like * I do for some ifdef directives ? Because here I also need to * generate some tokens sometimes. *) | Parser_c.TDefine (tok) -> if not (LP.current_context () = LP.InTopLevel) && (!Flag_parsing_c.cpp_directive_passing || (pass = 2)) then begin incr Stat.nDefinePassing; pr2_once ("CPP-DEFINE: inside function, I treat it as comment"); let v' = Parser_c.TCommentCpp (Ast_c.CppDirective,TH.info_of_tok v) in tr.passed <- v'::tr.passed; tr.rest <- comment_until_defeol tr.rest; tr.rest_clean <- drop_until_defeol tr.rest_clean; lexer_function ~pass tr lexbuf end else begin tr.passed <- v::tr.passed; tr.passed_clean <- v::tr.passed_clean; v end | Parser_c.TInclude (includes, filename, inifdef, info) -> if not (LP.current_context () = LP.InTopLevel) && (!Flag_parsing_c.cpp_directive_passing || (pass = 2)) then begin incr Stat.nIncludePassing; pr2_once ("CPP-INCLUDE: inside function, I treat it as comment"); let v = Parser_c.TCommentCpp(Ast_c.CppDirective, info) in tr.passed <- v::tr.passed; lexer_function ~pass tr lexbuf end else begin let (v,new_tokens) = tokens_include (info, includes, filename, inifdef) in let new_tokens_clean = new_tokens +> List.filter TH.is_not_comment in tr.passed <- v::tr.passed; tr.passed_clean <- v::tr.passed_clean; tr.rest <- new_tokens ++ tr.rest; tr.rest_clean <- new_tokens_clean ++ tr.rest_clean; v end | _ -> (* typedef_fix1 *) let v = match v with | Parser_c.TIdent (s, ii) -> if LP.is_typedef s && not (!Flag_parsing_c.disable_add_typedef) && pass = 1 then Parser_c.TypedefIdent (s, ii) else Parser_c.TIdent (s, ii) | x -> x in let v = Parsing_hacks.lookahead ~pass (clean_for_lookahead (v::tr.rest_clean)) tr.passed_clean in tr.passed <- v::tr.passed; (* the lookahead may have changed the status of the token and * consider it as a comment, for instance some #include are * turned into comments, hence this code. *) match v with | Parser_c.TCommentCpp _ -> lexer_function ~pass tr lexbuf | v -> tr.passed_clean <- v::tr.passed_clean; v ) end let get_one_elem ~pass tr (file, filelines) = if not (LP.is_enabled_typedef()) && !Flag_parsing_c.debug_typedef then pr2 "TYPEDEF:_handle_typedef=false. Not normal if dont come from exn"; (* normally have to do that only when come from an exception in which * case the dt() may not have been done * TODO but if was in scoped scope ? have to let only the last scope * so need do a LP.lexer_reset_typedef (); *) LP.enable_typedef(); LP._lexer_hint := (LP.default_hint ()); LP.save_typedef_state(); tr.passed <- []; let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in (try (* -------------------------------------------------- *) (* Call parser *) (* -------------------------------------------------- *) Common.profile_code_exclusif "YACC" (fun () -> Left (Parser_c.celem (lexer_function ~pass tr) lexbuf_fake) ) with e -> begin if (pass = 1 && !Flag_parsing_c.disable_two_pass)|| (pass = 2) then begin (match e with (* Lexical is not anymore launched I think *) | Lexer_c.Lexical s -> pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok tr.current) | Parsing.Parse_error -> pr2 ("parse error \n = " ^ error_msg_tok tr.current) | Semantic_c.Semantic (s, i) -> pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok tr.current) | e -> raise e ) end; LP.restore_typedef_state(); (* must keep here, before the code that adjusts the tr fields *) let line_error = TH.line_of_tok tr.current in (* error recovery, go to next synchro point *) let (passed', rest') = find_next_synchro tr.rest tr.passed in tr.rest <- rest'; tr.passed <- passed'; tr.current <- List.hd passed'; tr.passed_clean <- []; (* enough ? *) (* with error recovery, rest and rest_clean may not be in sync *) tr.rest_clean <- (tr.rest +> List.filter TH.is_not_comment); let info_of_bads = Common.map_eff_rev TH.info_of_tok tr.passed in Right (info_of_bads, line_error, tr.passed) end ) (* note: as now we go in 2 passes, there is first all the error message of * the lexer, and then the error of the parser. It is not anymore * interwinded. * * !!!This function use refs, and is not reentrant !!! so take care. * It use globals defined in Lexer_parser and also the _defs global * in parsing_hack.ml. * * This function uses internally some semi globals in the * tokens_stat record and parsing_stat record. *) let parse_print_error_heuristic2 file = let filelines = Common.cat_array file in let stat = Parsing_stat.default_stat file in (* -------------------------------------------------- *) (* call lexer and get all the tokens *) (* -------------------------------------------------- *) LP.lexer_reset_typedef(); Parsing_hacks.ifdef_paren_cnt := 0; let toks_orig = tokens file in let toks = Parsing_hacks.fix_tokens_define toks_orig in let toks = Parsing_hacks.fix_tokens_cpp toks in let tr = { rest = toks; rest_clean = (toks +> List.filter TH.is_not_comment); current = (List.hd toks); passed = []; passed_clean = []; } in let rec loop tr = (* todo?: I am not sure that it represents current_line, cos maybe * tr.current partipated in the previous parsing phase, so maybe tr.current * is not the first token of the next parsing phase. Same with checkpoint2. * It would be better to record when we have a } or ; in parser.mly, * cos we know that they are the last symbols of external_declaration2. * * bugfix: may not be equal to 'file' as after macro expansions we can * start to parse a new entity from the body of a macro, for instance * when parsing a define_machine() body, cf standard.h *) let checkpoint = TH.line_of_tok tr.current in let checkpoint_file = TH.file_of_tok tr.current in let tr_save = clone_tokens_stat tr in (* call the parser *) let elem = let pass1 = get_one_elem ~pass:1 tr (file, filelines) in match pass1 with | Left e -> Left e | Right res -> if !Flag_parsing_c.disable_two_pass then Right res else begin pr2 "parsing pass2: try again"; copy_tokens_stat ~src:tr_save ~dst: tr; let pass2 = get_one_elem ~pass:2 tr (file, filelines) in pass2 end in (* again not sure if checkpoint2 corresponds to end of bad region *) let checkpoint2 = TH.line_of_tok tr.current in (* <> line_error *) let checkpoint2_file = TH.file_of_tok tr.current in let was_define = (match elem with | Left _ -> false | Right (_, line_error, _) -> let was_define = let xs = tr.passed +> List.rev +> List.filter TH.is_not_comment in if List.length xs >= 2 then (match Common.head_middle_tail xs with | Parser_c.TDefine _, _, Parser_c.TDefEOL _ -> true | _ -> false ) else begin pr2 "WIERD: length list of error recovery tokens < 2 "; false end in (if was_define && !Flag_parsing_c.filter_msg_define_error then () else (* bugfix: *) if (checkpoint_file = checkpoint2_file) && checkpoint_file = file then print_bad line_error (checkpoint, checkpoint2) filelines else pr2 "PB: bad: but on tokens not from original file" ); was_define ) in let diffline = if (checkpoint_file = checkpoint2_file) && (checkpoint_file = file) then (checkpoint2 - checkpoint) else 0 (* TODO? so if error come in middle of something ? where the * start token was from original file but synchro found in body * of macro ? then can have wrong number of lines stat. * Maybe simpler just to look at tr.passed and count * the lines in the token from the correct file ? *) in let info = mk_info_item file (List.rev tr.passed) in (* some stat updates *) stat.Stat.commentized <- stat.Stat.commentized + count_lines_commentized (snd info); let elem = match elem with | Left e -> stat.Stat.correct <- stat.Stat.correct + diffline; e | Right (info_of_bads, line_error, toks_of_bads) -> if was_define && !Flag_parsing_c.filter_define_error then stat.Stat.correct <- stat.Stat.correct + diffline else stat.Stat.bad <- stat.Stat.bad + diffline; let pbline = toks_of_bads +> Common.filter (TH.is_same_line line_error) +> Common.filter TH.is_ident_like in let error_info = (pbline +> List.map TH.str_of_tok), line_error in stat.Stat.problematic_lines <- error_info::stat.Stat.problematic_lines; Ast_c.NotParsedCorrectly info_of_bads in (match elem with | Ast_c.FinalDef x -> [(Ast_c.FinalDef x, info)] | xs -> (xs, info):: loop tr (* recurse *) ) in let v = loop tr in let v = consistency_checking v in (v, stat) let time_total_parsing a = Common.profile_code "TOTAL" (fun () -> parse_print_error_heuristic2 a) let parse_print_error_heuristic a = Common.profile_code "C parsing" (fun () -> time_total_parsing a) (* alias *) let parse_c_and_cpp a = parse_print_error_heuristic a (*****************************************************************************) (* Same but faster cos memoize stuff *) (*****************************************************************************) let parse_cache file = if not !Flag_parsing_c.use_cache then parse_print_error_heuristic file else let _ = pr2 "TOFIX" in let need_no_changed_files = (* should use Sys.argv.(0), would be safer. *) [ (* TOFIX Config.path ^ "/parsing_c/c_parser.cma"; (* we may also depend now on the semantic patch because the SP may use macro and so we will disable some of the macro expansions from standard.h. *) !Config.std_h; *) ] in let need_no_changed_variables = (* could add some of the flags of flag_parsing_c.ml *) [] in Common.cache_computation_robust file ".ast_raw" (need_no_changed_files, need_no_changed_variables) ".depend_raw" (fun () -> parse_print_error_heuristic file) (*****************************************************************************) (* Some special cases *) (*****************************************************************************) let (cstatement_of_string: string -> Ast_c.statement) = fun s -> Common.write_file ("/tmp/__cocci.c") ("void main() { \n" ^ s ^ "\n}"); let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst in program +> Common.find_some (fun (e,_) -> match e with | Ast_c.Definition ({Ast_c.f_body = [Ast_c.StmtElem st]},_) -> Some st | _ -> None ) let (cexpression_of_string: string -> Ast_c.expression) = fun s -> Common.write_file ("/tmp/__cocci.c") ("void main() { \n" ^ s ^ ";\n}"); let program = parse_c_and_cpp ("/tmp/__cocci.c") +> fst in program +> Common.find_some (fun (e,_) -> match e with | Ast_c.Definition ({Ast_c.f_body = compound},_) -> (match compound with | [Ast_c.StmtElem (Ast_c.ExprStatement (Some e),ii)] -> Some e | _ -> None ) | _ -> None )