X-Git-Url: http://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/b1b2de814d2c59af2526bc19d41bb22a0c1fd16d..e6509c05b4c77bb8fcffa7f454c1412fcc0c3a15:/parsing_c/parsing_hacks.ml diff --git a/parsing_c/parsing_hacks.ml b/parsing_c/parsing_hacks.ml index 866463b..c2ba571 100644 --- a/parsing_c/parsing_hacks.ml +++ b/parsing_c/parsing_hacks.ml @@ -1,11 +1,12 @@ (* Yoann Padioleau - * + * + * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. * Copyright (C) 2007, 2008 Ecole des Mines de Nantes * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License (GPL) * version 2 as published by the Free Software Foundation. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -14,31 +15,28 @@ open Common -module TH = Token_helpers +module TH = Token_helpers +module TV = Token_views_c module LP = Lexer_parser module Stat = Parsing_stat -open Parser_c +open Parser_c + +open TV (*****************************************************************************) (* Some debugging functions *) (*****************************************************************************) -let pr2 s = - if !Flag_parsing_c.verbose_parsing - then Common.pr2 s +let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing -let pr2_once s = - if !Flag_parsing_c.verbose_parsing - then Common.pr2_once s - -let pr2_cpp s = +let pr2_cpp s = if !Flag_parsing_c.debug_cpp then Common.pr2_once ("CPP-" ^ s) -let msg_gen cond is_known printer s = +let msg_gen cond is_known printer s = if cond then if not (!Flag_parsing_c.filter_msg) @@ -46,7 +44,7 @@ let msg_gen cond is_known printer s = else if not (is_known s) then printer s - + (* In the following, there are some harcoded names of types or macros * but they are not used by our heuristics! They are just here to @@ -57,56 +55,56 @@ let msg_gen cond is_known printer s = * there is no more (or not that much) hardcoded linux stuff. *) -let is_known_typdef = - (fun s -> +let is_known_typdef = + (fun s -> (match s with | "u_char" | "u_short" | "u_int" | "u_long" - | "u8" | "u16" | "u32" | "u64" - | "s8" | "s16" | "s32" | "s64" - | "__u8" | "__u16" | "__u32" | "__u64" + | "u8" | "u16" | "u32" | "u64" + | "s8" | "s16" | "s32" | "s64" + | "__u8" | "__u16" | "__u32" | "__u64" -> true - - | "acpi_handle" - | "acpi_status" + + | "acpi_handle" + | "acpi_status" -> true - | "FILE" - | "DIR" + | "FILE" + | "DIR" -> true - + | s when s =~ ".*_t$" -> true - | _ -> false + | _ -> false ) ) -(* note: cant use partial application with let msg_typedef = - * because it would compute msg_typedef at compile time when +(* note: cant use partial application with let msg_typedef = + * because it would compute msg_typedef at compile time when * the flag debug_typedef is always false *) -let msg_typedef s = +let msg_typedef s = incr Stat.nTypedefInfer; msg_gen (!Flag_parsing_c.debug_typedef) is_known_typdef - (fun s -> + (fun s -> pr2_cpp ("TYPEDEF: promoting: " ^ s) ) s let msg_maybe_dangereous_typedef s = if not (is_known_typdef s) - then + then pr2 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s) -let msg_declare_macro s = +let msg_declare_macro s = incr Stat.nMacroDecl; msg_gen (!Flag_parsing_c.debug_cpp) - (fun s -> - (match s with + (fun s -> + (match s with | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM" - | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD" + | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD" | "DEFINE_SPINLOCK" | "DEFINE_TIMER" | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR" | "SENSOR_DEVICE_ATTR" @@ -127,39 +125,39 @@ let msg_declare_macro s = ) (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s)) s - -let msg_foreach s = + +let msg_foreach s = incr Stat.nIteratorHeuristic; pr2_cpp ("MACRO: found foreach: " ^ s) -(* ?? -let msg_debug_macro s = +(* ?? +let msg_debug_macro s = pr2_cpp ("MACRO: found debug-macro: " ^ s) *) -let msg_macro_noptvirg s = +let msg_macro_noptvirg s = incr Stat.nMacroStmt; pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s) -let msg_macro_toplevel_noptvirg s = +let msg_macro_toplevel_noptvirg s = incr Stat.nMacroStmt; pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s) -let msg_macro_noptvirg_single s = +let msg_macro_noptvirg_single s = incr Stat.nMacroStmt; pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s) -let msg_macro_higher_order s = +let msg_macro_higher_order s = incr Stat.nMacroHigherOrder; msg_gen (!Flag_parsing_c.debug_cpp) - (fun s -> - (match s with + (fun s -> + (match s with | "DBGINFO" | "DBGPX" | "DFLOW" @@ -171,17 +169,17 @@ let msg_macro_higher_order s = s -let msg_stringification s = +let msg_stringification s = incr Stat.nMacroString; msg_gen (!Flag_parsing_c.debug_cpp) - (fun s -> - (match s with + (fun s -> + (match s with | "REVISION" | "UTS_RELEASE" | "SIZE_STR" | "DMA_STR" -> true - (* s when s =~ ".*STR.*" -> true *) + (* s when s =~ ".*STR.*" -> true *) | _ -> false ) ) @@ -194,18 +192,18 @@ let msg_stringification_params s = -let msg_apply_known_macro s = +let msg_apply_known_macro s = incr Stat.nMacroExpand; pr2_cpp ("MACRO: found known macro = " ^ s) -let msg_apply_known_macro_hint s = +let msg_apply_known_macro_hint s = incr Stat.nMacroHint; pr2_cpp ("MACRO: found known macro hint = " ^ s) - -let msg_ifdef_bool_passing is_ifdef_positif = + +let msg_ifdef_bool_passing is_ifdef_positif = incr Stat.nIfdefZero; (* of Version ? *) if is_ifdef_positif then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION" @@ -220,11 +218,15 @@ let msg_ifdef_funheaders () = incr Stat.nIfdefFunheader; () +let msg_ifdef_cparen_else () = + incr Stat.nIfdefPassing; + pr2_cpp("found ifdef-cparen-else") -let msg_attribute s = + +let msg_attribute s = incr Stat.nMacroAttribute; pr2_cpp("ATTR:" ^ s) - + (*****************************************************************************) @@ -245,7 +247,7 @@ let regexp_declare = Str.regexp ".*DECLARE.*" (* linuxext: *) -let regexp_foreach = Str.regexp_case_fold +let regexp_foreach = Str.regexp_case_fold ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)" let regexp_typedef = Str.regexp @@ -258,570 +260,27 @@ let false_typedef = [ let ok_typedef s = not (List.mem s false_typedef) -let not_annot s = +let not_annot s = not (s ==~ regexp_annot) -(* ------------------------------------------------------------------------- *) -(* cpp part 1 for standard.h *) -(* ------------------------------------------------------------------------- *) - -type define_def = string * define_param * define_body - and define_param = - | NoParam - | Params of string list - and define_body = - | DefineBody of Parser_c.token list - | DefineHint of parsinghack_hint - - and parsinghack_hint = - | HintIterator - | HintDeclarator - | HintMacroString - | HintMacroStatement - | HintAttribute - | HintMacroIdentBuilder - - -(* cf also data/test.h *) -let assoc_hint_string = [ - "YACFE_ITERATOR" , HintIterator; - "YACFE_DECLARATOR" , HintDeclarator; - "YACFE_STRING" , HintMacroString; - "YACFE_STATEMENT" , HintMacroStatement; - "YACFE_ATTRIBUTE" , HintAttribute; - "YACFE_IDENT_BUILDER" , HintMacroIdentBuilder; - - "MACROSTATEMENT" , HintMacroStatement; (* backward compatibility *) -] - - -let (parsinghack_hint_of_string: string -> parsinghack_hint option) = fun s -> - Common.assoc_option s assoc_hint_string - -let (is_parsinghack_hint: string -> bool) = fun s -> - parsinghack_hint_of_string s <> None - -let (token_from_parsinghack_hint: - (string * Ast_c.info) -> parsinghack_hint -> Parser_c.token) = - fun (s,ii) hint -> - match hint with - | HintIterator -> - Parser_c.TMacroIterator (s, ii) - | HintDeclarator -> - Parser_c.TMacroDecl (s, ii) - | HintMacroString -> - Parser_c.TMacroString (s, ii) - | HintMacroStatement -> - Parser_c.TMacroStmt (s, ii) - | HintAttribute -> - Parser_c.TMacroAttr (s, ii) - | HintMacroIdentBuilder -> - Parser_c.TMacroIdentBuilder (s, ii) - - - -let (_defs : (string, define_def) Hashtbl.t ref) = - ref (Hashtbl.create 101) - - -(* ------------------------------------------------------------------------- *) -(* fuzzy parsing, different "views" over the same program *) -(* ------------------------------------------------------------------------- *) - - -(* Normally I should not use ref/mutable in the token_extended type - * and I should have a set of functions taking a list of tokens and - * returning a list of tokens. The problem is that to make easier some - * functions, it is better to work on better representation, on "views" - * over this list of tokens. But then modifying those views and get - * back from those views to the original simple list of tokens is - * tedious. One way is to maintain next to the view a list of "actions" - * (I was using a hash storing the charpos of the token and associating - * the action) but it is tedious too. Simpler to use mutable/ref. We - * use the same idea that we use when working on the Ast_c. *) - -(* old: when I was using the list of "actions" next to the views, the hash - * indexed by the charpos, there could have been some problems: - * how my fake_pos interact with the way I tag and adjust token ? - * because I base my tagging on the position of the token ! so sometimes - * could tag another fakeInfo that should not be tagged ? - * fortunately I don't use anymore this technique. - *) - -(* update: quite close to the Place_c.Inxxx *) -type context = - InFunction | InEnum | InStruct | InInitializer | NoContext - -type token_extended = { - mutable tok: Parser_c.token; - mutable where: context; - - (* less: need also a after ? *) - mutable new_tokens_before : Parser_c.token list; - - (* line x col cache, more easily accessible, of the info in the token *) - line: int; - col : int; -} - -let set_as_comment cppkind x = - if TH.is_eof x.tok - then () (* otherwise parse_c will be lost if don't find a EOF token *) - else - x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok) - -let mk_token_extended x = - let (line, col) = TH.linecol_of_tok x in - { tok = x; - line = line; col = col; - where = NoContext; - new_tokens_before = []; - } - - -(* x list list, because x list separated by ',' *) -type paren_grouped = - | Parenthised of paren_grouped list list * token_extended list - | PToken of token_extended - -type brace_grouped = - | Braceised of - brace_grouped list list * token_extended * token_extended option - | BToken of token_extended - -(* Far better data structure than doing hacks in the lexer or parser - * because in lexer we don't know to which ifdef a endif is related - * and so when we want to comment a ifdef, we don't know which endif - * we must also comment. Especially true for the #if 0 which sometimes - * have a #else part. - * - * x list list, because x list separated by #else or #elif - *) -type ifdef_grouped = - | Ifdef of ifdef_grouped list list * token_extended list - | Ifdefbool of bool * ifdef_grouped list list * token_extended list - | NotIfdefLine of token_extended list - - -type 'a line_grouped = - Line of 'a list - - -type body_function_grouped = - | BodyFunction of token_extended list - | NotBodyLine of token_extended list - - -(* ------------------------------------------------------------------------- *) -(* view builders *) -(* ------------------------------------------------------------------------- *) - -(* todo: synchro ! use more indentation - * if paren not closed and same indentation level, certainly because - * part of a mid-ifdef-expression. -*) -let rec mk_parenthised xs = - match xs with - | [] -> [] - | x::xs -> - (match x.tok with - | TOPar _ | TOParDefine _ -> - let body, extras, xs = mk_parameters [x] [] xs in - Parenthised (body,extras)::mk_parenthised xs - | _ -> - PToken x::mk_parenthised xs - ) - -(* return the body of the parenthised expression and the rest of the tokens *) -and mk_parameters extras acc_before_sep xs = - match xs with - | [] -> - (* maybe because of #ifdef which "opens" '(' in 2 branches *) - pr2 "PB: not found closing paren in fuzzy parsing"; - [List.rev acc_before_sep], List.rev extras, [] - | x::xs -> - (match x.tok with - (* synchro *) - | TOBrace _ when x.col =|= 0 -> - pr2 "PB: found synchro point } in paren"; - [List.rev acc_before_sep], List.rev (extras), (x::xs) - - | TCPar _ | TCParEOL _ -> - [List.rev acc_before_sep], List.rev (x::extras), xs - | TOPar _ | TOParDefine _ -> - let body, extrasnest, xs = mk_parameters [x] [] xs in - mk_parameters extras - (Parenthised (body,extrasnest)::acc_before_sep) - xs - | TComma _ -> - let body, extras, xs = mk_parameters (x::extras) [] xs in - (List.rev acc_before_sep)::body, extras, xs - | _ -> - mk_parameters extras (PToken x::acc_before_sep) xs - ) - - - - -let rec mk_braceised xs = - match xs with - | [] -> [] - | x::xs -> - (match x.tok with - | TOBrace _ -> - let body, endbrace, xs = mk_braceised_aux [] xs in - Braceised (body, x, endbrace)::mk_braceised xs - | TCBrace _ -> - pr2 "PB: found closing brace alone in fuzzy parsing"; - BToken x::mk_braceised xs - | _ -> - BToken x::mk_braceised xs - ) - -(* return the body of the parenthised expression and the rest of the tokens *) -and mk_braceised_aux acc xs = - match xs with - | [] -> - (* maybe because of #ifdef which "opens" '(' in 2 branches *) - pr2 "PB: not found closing brace in fuzzy parsing"; - [List.rev acc], None, [] - | x::xs -> - (match x.tok with - | TCBrace _ -> [List.rev acc], Some x, xs - | TOBrace _ -> - let body, endbrace, xs = mk_braceised_aux [] xs in - mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs - | _ -> - mk_braceised_aux (BToken x::acc) xs - ) - - - - -let rec mk_ifdef xs = - match xs with - | [] -> [] - | x::xs -> - (match x.tok with - | TIfdef _ -> - let body, extra, xs = mk_ifdef_parameters [x] [] xs in - Ifdef (body, extra)::mk_ifdef xs - | TIfdefBool (b,_, _) -> - let body, extra, xs = mk_ifdef_parameters [x] [] xs in - - (* if not passing, then consider a #if 0 as an ordinary #ifdef *) - if !Flag_parsing_c.if0_passing - then Ifdefbool (b, body, extra)::mk_ifdef xs - else Ifdef(body, extra)::mk_ifdef xs - - | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) -> - let body, extra, xs = mk_ifdef_parameters [x] [] xs in - Ifdefbool (b, body, extra)::mk_ifdef xs - - - | _ -> - (* todo? can have some Ifdef in the line ? *) - let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in - NotIfdefLine line::mk_ifdef xs - ) - -and mk_ifdef_parameters extras acc_before_sep xs = - match xs with - | [] -> - (* Note that mk_ifdef is assuming that CPP instruction are alone - * on their line. Because I do a span (fun x -> is_same_line ...) - * I might take with me a #endif if this one is mixed on a line - * with some "normal" tokens. - *) - pr2 "PB: not found closing ifdef in fuzzy parsing"; - [List.rev acc_before_sep], List.rev extras, [] - | x::xs -> - (match x.tok with - | TEndif _ -> - [List.rev acc_before_sep], List.rev (x::extras), xs - | TIfdef _ -> - let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in - mk_ifdef_parameters - extras (Ifdef (body, extrasnest)::acc_before_sep) xs - - | TIfdefBool (b,_,_) -> - let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in - - if !Flag_parsing_c.if0_passing - then - mk_ifdef_parameters - extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs - else - mk_ifdef_parameters - extras (Ifdef (body, extrasnest)::acc_before_sep) xs - - - | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) -> - let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in - mk_ifdef_parameters - extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs - - | TIfdefelse _ - | TIfdefelif _ -> - let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in - (List.rev acc_before_sep)::body, extras, xs - | _ -> - let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in - mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs - ) - -(* --------------------------------------- *) - -let line_of_paren = function - | PToken x -> x.line - | Parenthised (xxs, info_parens) -> - (match info_parens with - | [] -> raise Impossible - | x::xs -> x.line - ) - - -let rec span_line_paren line = function - | [] -> [],[] - | x::xs -> - (match x with - | PToken tok when TH.is_eof tok.tok -> - [], x::xs - | _ -> - if line_of_paren x =|= line - then - let (l1, l2) = span_line_paren line xs in - (x::l1, l2) - else ([], x::xs) - ) - - -let rec mk_line_parenthised xs = - match xs with - | [] -> [] - | x::xs -> - let line_no = line_of_paren x in - let line, xs = span_line_paren line_no xs in - Line (x::line)::mk_line_parenthised xs - - -(* --------------------------------------- *) -let rec mk_body_function_grouped xs = - match xs with - | [] -> [] - | x::xs -> - (match x with - | {tok = TOBrace _; col = 0} -> - let is_closing_brace = function - | {tok = TCBrace _; col = 0 } -> true - | _ -> false - in - let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in - (match xs with - | ({tok = TCBrace _; col = 0 })::xs -> - BodyFunction body::mk_body_function_grouped xs - | [] -> - pr2 "PB:not found closing brace in fuzzy parsing"; - [NotBodyLine body] - | _ -> raise Impossible - ) - - | _ -> - let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in - NotBodyLine line::mk_body_function_grouped xs - ) - - -(* ------------------------------------------------------------------------- *) -(* view iterators *) -(* ------------------------------------------------------------------------- *) - -let rec iter_token_paren f xs = - xs +> List.iter (function - | PToken tok -> f tok; - | Parenthised (xxs, info_parens) -> - info_parens +> List.iter f; - xxs +> List.iter (fun xs -> iter_token_paren f xs) - ) - -let rec iter_token_brace f xs = - xs +> List.iter (function - | BToken tok -> f tok; - | Braceised (xxs, tok1, tok2opt) -> - f tok1; do_option f tok2opt; - xxs +> List.iter (fun xs -> iter_token_brace f xs) - ) - -let rec iter_token_ifdef f xs = - xs +> List.iter (function - | NotIfdefLine xs -> xs +> List.iter f; - | Ifdefbool (_, xxs, info_ifdef) - | Ifdef (xxs, info_ifdef) -> - info_ifdef +> List.iter f; - xxs +> List.iter (iter_token_ifdef f) - ) - - - - -let tokens_of_paren xs = - let g = ref [] in - xs +> iter_token_paren (fun tok -> push2 tok g); - List.rev !g - - -let tokens_of_paren_ordered xs = - let g = ref [] in - - let rec aux_tokens_ordered = function - | PToken tok -> push2 tok g; - | Parenthised (xxs, info_parens) -> - let (opar, cpar, commas) = - match info_parens with - | opar::xs -> - (match List.rev xs with - | cpar::xs -> - opar, cpar, List.rev xs - | _ -> raise Impossible - ) - | _ -> raise Impossible - in - push2 opar g; - aux_args (xxs,commas); - push2 cpar g; - - and aux_args (xxs, commas) = - match xxs, commas with - | [], [] -> () - | [xs], [] -> xs +> List.iter aux_tokens_ordered - | xs::ys::xxs, comma::commas -> - xs +> List.iter aux_tokens_ordered; - push2 comma g; - aux_args (ys::xxs, commas) - | _ -> raise Impossible - - in - - xs +> List.iter aux_tokens_ordered; - List.rev !g - - - - -(* ------------------------------------------------------------------------- *) -(* set the context info in token *) -(* ------------------------------------------------------------------------- *) - - -let rec set_in_function_tag xs = - (* could try: ) { } but it can be the ) of a if or while, so - * better to base the heuristic on the position in column zero. - * Note that some struct or enum or init put also their { in first column - * but set_in_other will overwrite the previous InFunction tag. - *) - match xs with - | [] -> () - (* ) { and the closing } is in column zero, then certainly a function *) - | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs - when tok1.col <> 0 && tok2.col =|= 0 -> - body +> List.iter (iter_token_brace (fun tok -> - tok.where <- InFunction - )); - set_in_function_tag xs - - | (BToken x)::xs -> set_in_function_tag xs - - | (Braceised (body, tok1, Some tok2))::xs - when tok1.col =|= 0 && tok2.col =|= 0 -> - body +> List.iter (iter_token_brace (fun tok -> - tok.where <- InFunction - )); - set_in_function_tag xs - | Braceised (body, tok1, tok2)::xs -> - set_in_function_tag xs - - -let rec set_in_other xs = - match xs with - | [] -> () - (* enum x { } *) - | BToken ({tok = Tenum _})::BToken ({tok = TIdent _}) - ::Braceised(body, tok1, tok2)::xs - | BToken ({tok = Tenum _}) - ::Braceised(body, tok1, tok2)::xs - -> - body +> List.iter (iter_token_brace (fun tok -> - tok.where <- InEnum; - )); - set_in_other xs - - (* struct x { } *) - | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _}) - ::Braceised(body, tok1, tok2)::xs -> - body +> List.iter (iter_token_brace (fun tok -> - tok.where <- InStruct; - )); - set_in_other xs - (* = { } *) - | BToken ({tok = TEq _}) - ::Braceised(body, tok1, tok2)::xs -> - body +> List.iter (iter_token_brace (fun tok -> - tok.where <- InInitializer; - )); - set_in_other xs - | BToken _::xs -> set_in_other xs - - | Braceised(body, tok1, tok2)::xs -> - body +> List.iter set_in_other; - set_in_other xs - - - - -let set_context_tag xs = - begin - set_in_function_tag xs; - set_in_other xs; - end - (*****************************************************************************) (* Helpers *) (*****************************************************************************) -(* To expand the parameter of the macro. The env corresponds to the actual - * code that is binded to the parameters of the macro. - * TODO? recurse ? fixpoint ? the expansion may also contain macro. - * Or to macro expansion in a strict manner, that is process first - * the parameters, expands macro in params, and then process enclosing - * macro call. - *) -let rec (cpp_engine: (string , Parser_c.token list) assoc -> - Parser_c.token list -> Parser_c.token list) = - fun env xs -> - xs +> List.map (fun tok -> - match tok with - | TIdent (s,i1) when List.mem_assoc s env -> Common.assoc s env - | x -> [x] - ) - +> List.flatten - - - - (* ------------------------------------------------------------------------- *) -(* the pair is the status of '()' and '{}', ex: (-1,0) - * if too much ')' and good '{}' - * could do for [] too ? +(* the pair is the status of '()' and '{}', ex: (-1,0) + * if too much ')' and good '{}' + * could do for [] too ? * could do for ',' if encounter ',' at "toplevel", not inside () or {} * then if have ifdef, then certainly can lead to a problem. *) -let (count_open_close_stuff_ifdef_clause: ifdef_grouped list -> (int * int)) = - fun xs -> +let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))= + fun xs -> let cnt_paren, cnt_brace = ref 0, ref 0 in - xs +> iter_token_ifdef (fun x -> + xs +> TV.iter_token_ifdef (fun x -> (match x.tok with | x when TH.is_opar x -> incr cnt_paren | TOBrace _ -> incr cnt_brace @@ -836,19 +295,19 @@ let (count_open_close_stuff_ifdef_clause: ifdef_grouped list -> (int * int)) = (* ------------------------------------------------------------------------- *) let forLOOKAHEAD = 30 - + (* look if there is a '{' just after the closing ')', and handling the - * possibility to have nested expressions inside nested parenthesis - * + * possibility to have nested expressions inside nested parenthesis + * * todo: use indentation instead of premier(statement) ? *) -let rec is_really_foreach xs = +let rec is_really_foreach xs = let rec is_foreach_aux = function | [] -> false, [] | TCPar _::TOBrace _::xs -> true, xs (* the following attempts to handle the cases where there is a single statement in the body of the loop. undoubtedly more - cases are needed. + cases are needed. todo: premier(statement) - suivant(funcall) *) | TCPar _::TIdent _::xs -> true, xs @@ -860,7 +319,7 @@ let rec is_really_foreach xs = | TCPar _::xs -> false, xs - | TOPar _::xs -> + | TOPar _::xs -> let (_, xs') = is_foreach_aux xs in is_foreach_aux xs' | x::xs -> is_foreach_aux xs @@ -869,7 +328,7 @@ let rec is_really_foreach xs = (* ------------------------------------------------------------------------- *) -let set_ifdef_token_parenthize_info cnt x = +let set_ifdef_token_parenthize_info cnt x = match x with | TIfdef (tag, _) | TIfdefelse (tag, _) @@ -877,34 +336,269 @@ let set_ifdef_token_parenthize_info cnt x = | TEndif (tag, _) | TIfdefBool (_, tag, _) - | TIfdefMisc (_, tag, _) + | TIfdefMisc (_, tag, _) | TIfdefVersion (_, tag, _) - -> + -> tag := Some cnt; | _ -> raise Impossible - -let ifdef_paren_cnt = ref 0 + +let ifdef_paren_cnt = ref 0 -let rec set_ifdef_parenthize_info xs = +let rec set_ifdef_parenthize_info xs = xs +> List.iter (function | NotIfdefLine xs -> () - | Ifdefbool (_, xxs, info_ifdef) - | Ifdef (xxs, info_ifdef) -> - + | Ifdefbool (_, xxs, info_ifdef) + | Ifdef (xxs, info_ifdef) -> + incr ifdef_paren_cnt; let total_directives = List.length info_ifdef in - info_ifdef +> List.iter (fun x -> + info_ifdef +> List.iter (fun x -> set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives) x.tok); xxs +> List.iter set_ifdef_parenthize_info ) +(*****************************************************************************) +(* The parsing hack for #define *) +(*****************************************************************************) + +(* To parse macro definitions I need to do some tricks + * as some information can be get only at the lexing level. For instance + * the space after the name of the macro in '#define foo (x)' is meaningful + * but the grammar can not get this information. So define_ident below + * look at such space and generate a special TOpardefine. In a similar + * way macro definitions can contain some antislash and newlines + * and the grammar need to know where the macro ends (which is + * a line-level and so low token-level information). Hence the + * function 'define_line' below and the TDefEol. + * + * update: TDefEol is handled in a special way at different places, + * a little bit like EOF, especially for error recovery, so this + * is an important token that should not be retagged! + * + * + * ugly hack, a better solution perhaps would be to erase TDefEOL + * from the Ast and list of tokens in parse_c. + * + * note: I do a +1 somewhere, it's for the unparsing to correctly sync. + * + * note: can't replace mark_end_define by simply a fakeInfo(). The reason + * is where is the \n TCommentSpace. Normally there is always a last token + * to synchronize on, either EOF or the token of the next toplevel. + * In the case of the #define we got in list of token + * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will + * not synchronize on it and so we will not print the "\n". + * A solution would be to put the TDefEOL before the "\n". + * (jll: tried to do this, see the comment "Put end of line..." below) + * + * todo?: could put a ExpandedTok for that ? + *) +let mark_end_define ii = + let ii' = + { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with + Common.str = ""; + Common.charpos = Ast_c.pos_of_info ii + 1 + }; + cocci_tag = ref Ast_c.emptyAnnot; + comments_tag = ref Ast_c.emptyComments; + } + in + TDefEOL (ii') + +(* put the TDefEOL at the good place *) +let rec define_line_1 acc xs = + match xs with + | [] -> List.rev acc + | TDefine ii::xs -> + let line = Ast_c.line_of_info ii in + let acc = (TDefine ii) :: acc in + define_line_2 acc line ii xs + | TCppEscapedNewline ii::xs -> + pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at"); + pr2 (Ast_c.strloc_of_info ii); + let acc = (TCommentSpace ii) :: acc in + define_line_1 acc xs + | x::xs -> define_line_1 (x::acc) xs + +and define_line_2 acc line lastinfo xs = + match xs with + | [] -> + (* should not happened, should meet EOF before *) + pr2 "PB: WEIRD"; + List.rev (mark_end_define lastinfo::acc) + | x::xs -> + let line' = TH.line_of_tok x in + let info = TH.info_of_tok x in + + (match x with + | EOF ii -> + let acc = (mark_end_define lastinfo) :: acc in + let acc = (EOF ii) :: acc in + define_line_1 acc xs + | TCppEscapedNewline ii -> + if (line' <> line) then pr2 "PB: WEIRD: not same line number"; + let acc = (TCommentSpace ii) :: acc in + define_line_2 acc (line+1) info xs + | x -> + if line' =|= line + then define_line_2 (x::acc) line info xs + else + (* Put end of line token before the newline. A newline at least + must be there because the line changed and because we saw a + #define previously to get to this function at all *) + define_line_1 + ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc))) + (x::xs) + ) + +let rec define_ident acc xs = + match xs with + | [] -> List.rev acc + | TDefine ii::xs -> + let acc = TDefine ii :: acc in + (match xs with + | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs -> + (* Change also the kind of TIdent to avoid bad interaction + * with other parsing_hack tricks. For instant if keep TIdent then + * the stringication algo can believe the TIdent is a string-macro. + * So simpler to change the kind of the ident too. + *) + (* if TOParDefine sticked to the ident, then + * it's a macro-function. Change token to avoid ambiguity + * between #define foo(x) and #define foo (x) + *) + let acc = (TCommentSpace i1) :: acc in + let acc = (TIdentDefine (s,i2)) :: acc in + let acc = (TOParDefine i3) :: acc in + define_ident acc xs + + | TCommentSpace i1::TIdent (s,i2)::xs -> + let acc = (TCommentSpace i1) :: acc in + let acc = (TIdentDefine (s,i2)) :: acc in + define_ident acc xs + + (* bugfix: ident of macro (as well as params, cf below) can be tricky + * note, do we need to subst in the body of the define ? no cos + * here the issue is the name of the macro, as in #define inline, + * so obviously the name of this macro will not be used in its + * body (it would be a recursive macro, which is forbidden). + *) + + | TCommentSpace i1::t::xs -> + + let s = TH.str_of_tok t in + let ii = TH.info_of_tok t in + if s ==~ Common.regexp_alpha + then begin + pr2 (spf "remapping: %s to an ident in macro name" s); + let acc = (TCommentSpace i1) :: acc in + let acc = (TIdentDefine (s,ii)) :: acc in + define_ident acc xs + end + else begin + pr2 "WEIRD: weird #define body"; + define_ident acc xs + end + + | _ -> + pr2 "WEIRD: weird #define body"; + define_ident acc xs + ) + | x::xs -> + let acc = x :: acc in + define_ident acc xs + + + +let fix_tokens_define2 xs = + define_ident [] (define_line_1 [] xs) + +let fix_tokens_define a = + Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a) + + + + + +(* ------------------------------------------------------------------------- *) +(* Other parsing hacks related to cpp, Include/Define hacks *) +(* ------------------------------------------------------------------------- *) + +(* Sometimes I prefer to generate a single token for a list of things in the + * lexer so that if I have to passed them, like for passing TInclude then + * it's easy. Also if I don't do a single token, then I need to + * parse the rest which may not need special stuff, like detecting + * end of line which the parser is not really ready for. So for instance + * could I parse a #include as 2 or more tokens ? just + * lex #include ? so then need recognize as one token ? + * but this kind of token is valid only after a #include and the + * lexing and parsing rules are different for such tokens so not that + * easy to parse such things in parser_c.mly. Hence the following hacks. + * + * less?: maybe could get rid of this like I get rid of some of fix_define. + *) + +(* helpers *) + +(* used to generate new token from existing one *) +let new_info posadd str ii = + { Ast_c.pinfo = + Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with + charpos = Ast_c.pos_of_info ii + posadd; + str = str; + column = Ast_c.col_of_info ii + posadd; + }; + (* must generate a new ref each time, otherwise share *) + cocci_tag = ref Ast_c.emptyAnnot; + comments_tag = ref Ast_c.emptyComments; + } + + +let rec comment_until_defeol xs = + match xs with + | [] -> + (* job not done in Cpp_token_c.define_parse ? *) + failwith "cant find end of define token TDefEOL" + | x::xs -> + (match x with + | Parser_c.TDefEOL i -> + Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x) + ::xs + | _ -> + let x' = + (* bugfix: otherwise may lose a TComment token *) + if TH.is_real_comment x + then x + else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x) + in + x'::comment_until_defeol xs + ) + +let drop_until_defeol xs = + List.tl + (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) + + + +(* ------------------------------------------------------------------------- *) +(* returns a pair (replaced token, list of next tokens) *) +(* ------------------------------------------------------------------------- *) + +let tokens_include (info, includes, filename, inifdef) = + Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), + [Parser_c.TIncludeFilename + (filename, (new_info (String.length includes) filename info)) + ] + + + + (*****************************************************************************) (* CPP handling: macros, ifdefs, macros defs *) (*****************************************************************************) @@ -919,31 +613,31 @@ let rec set_ifdef_parenthize_info xs = let rec commentize_skip_start_to_end xs = match xs with | [] -> () - | x::xs -> + | x::xs -> (match x with - | {tok = TCommentSkipTagStart info} -> - (try - let (before, x2, after) = + | {tok = TCommentSkipTagStart info} -> + (try + let (before, x2, after) = xs +> Common.split_when (function | {tok = TCommentSkipTagEnd _ } -> true - | _ -> false + | _ -> false ) in let topass = x::x2::before in - topass +> List.iter (fun tok -> + topass +> List.iter (fun tok -> set_as_comment Token_c.CppPassingExplicit tok ); commentize_skip_start_to_end after - with Not_found -> + with Not_found -> failwith "could not find end of skip_start special comment" ) - | {tok = TCommentSkipTagEnd info} -> + | {tok = TCommentSkipTagEnd info} -> failwith "found skip_end comment but no skip_start" - | _ -> + | _ -> commentize_skip_start_to_end xs ) - - + + (* ------------------------------------------------------------------------- *) @@ -951,33 +645,33 @@ let rec commentize_skip_start_to_end xs = (* ------------------------------------------------------------------------- *) (* #if 0, #if 1, #if LINUX_VERSION handling *) -let rec find_ifdef_bool xs = - xs +> List.iter (function +let rec find_ifdef_bool xs = + xs +> List.iter (function | NotIfdefLine _ -> () - | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) -> + | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) -> msg_ifdef_bool_passing is_ifdef_positif; (match xxs with | [] -> raise Impossible - | firstclause::xxs -> + | firstclause::xxs -> info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); - + if is_ifdef_positif - then xxs +> List.iter + then xxs +> List.iter (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) else begin firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal); (match List.rev xxs with (* keep only last *) - | last::startxs -> - startxs +> List.iter + | last::startxs -> + startxs +> List.iter (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) | [] -> (* not #else *) () ); end ); - + | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool ) @@ -986,50 +680,50 @@ let rec find_ifdef_bool xs = let thresholdIfdefSizeMid = 6 (* infer ifdef involving not-closed expressions/statements *) -let rec find_ifdef_mid xs = - xs +> List.iter (function +let rec find_ifdef_mid xs = + xs +> List.iter (function | NotIfdefLine _ -> () - | Ifdef (xxs, info_ifdef_stmt) -> - (match xxs with + | Ifdef (xxs, info_ifdef_stmt) -> + (match xxs with | [] -> raise Impossible | [first] -> () - | first::second::rest -> + | first::second::rest -> (* don't analyse big ifdef *) - if xxs +> List.for_all - (fun xs -> List.length xs <= thresholdIfdefSizeMid) && + if xxs +> List.for_all + (fun xs -> List.length xs <= thresholdIfdefSizeMid) && (* don't want nested ifdef *) - xxs +> List.for_all (fun xs -> - xs +> List.for_all + xxs +> List.for_all (fun xs -> + xs +> List.for_all (function NotIfdefLine _ -> true | _ -> false) ) - - then + + then let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in - let cnt1, cnt2 = List.hd counts in - if cnt1 <> 0 || cnt2 <> 0 && + let cnt1, cnt2 = List.hd counts in + if cnt1 <> 0 || cnt2 <> 0 && counts +> List.for_all (fun x -> x =*= (cnt1, cnt2)) (* - if counts +> List.exists (fun (cnt1, cnt2) -> - cnt1 <> 0 || cnt2 <> 0 - ) + if counts +> List.exists (fun (cnt1, cnt2) -> + cnt1 <> 0 || cnt2 <> 0 + ) *) then begin msg_ifdef_mid_something(); (* keep only first, treat the rest as comment *) info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); - (second::rest) +> List.iter + (second::rest) +> List.iter (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); end - + ); List.iter find_ifdef_mid xxs - + (* no need complex analysis for ifdefbool *) - | Ifdefbool (_, xxs, info_ifdef_stmt) -> + | Ifdefbool (_, xxs, info_ifdef_stmt) -> List.iter find_ifdef_mid xxs - - + + ) @@ -1038,19 +732,19 @@ let thresholdFunheaderLimit = 4 (* ifdef defining alternate function header, type *) let rec find_ifdef_funheaders = function | [] -> () - | NotIfdefLine _::xs -> find_ifdef_funheaders xs + | NotIfdefLine _::xs -> find_ifdef_funheaders xs (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *) - | Ifdef + | Ifdef ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1; (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2 - ], info_ifdef_stmt + ], info_ifdef_stmt ) ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3) - ::xs + ::xs when List.length ifdefblock1 <= thresholdFunheaderLimit && List.length ifdefblock2 <= thresholdFunheaderLimit - -> + -> find_ifdef_funheaders xs; msg_ifdef_funheaders (); @@ -1060,19 +754,19 @@ let rec find_ifdef_funheaders = function ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError); (* ifdef with nested ifdef *) - | Ifdef + | Ifdef ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; - [Ifdef + [Ifdef ([[NotIfdefLine (({col = 0} as xline2)::line2)]; [NotIfdefLine (({col = 0} as xline3)::line3)]; ], info_ifdef_stmt2 ) ] - ], info_ifdef_stmt + ], info_ifdef_stmt ) ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) - ::xs - -> + ::xs + -> find_ifdef_funheaders xs; msg_ifdef_funheaders (); @@ -1082,38 +776,38 @@ let rec find_ifdef_funheaders = function all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError); (* ifdef with elseif *) - | Ifdef + | Ifdef ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; [NotIfdefLine (({col = 0} as xline2)::line2)]; [NotIfdefLine (({col = 0} as xline3)::line3)]; - ], info_ifdef_stmt + ], info_ifdef_stmt ) ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) - ::xs - -> + ::xs + -> find_ifdef_funheaders xs; msg_ifdef_funheaders (); info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); let all_toks = [xline2;xline3] @ line2 @ line3 in all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) - + (* recurse *) - | Ifdef (xxs,info_ifdef_stmt)::xs - | Ifdefbool (_, xxs,info_ifdef_stmt)::xs -> - List.iter find_ifdef_funheaders xxs; + | Ifdef (xxs,info_ifdef_stmt)::xs + | Ifdefbool (_, xxs,info_ifdef_stmt)::xs -> + List.iter find_ifdef_funheaders xxs; find_ifdef_funheaders xs - + (* ?? *) -let rec adjust_inifdef_include xs = - xs +> List.iter (function +let rec adjust_inifdef_include xs = + xs +> List.iter (function | NotIfdefLine _ -> () - | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) -> - xxs +> List.iter (iter_token_ifdef (fun tokext -> + | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) -> + xxs +> List.iter (iter_token_ifdef (fun tokext -> match tokext.tok with - | Parser_c.TInclude (s1, s2, inifdef_ref, ii) -> + | Parser_c.TInclude (s1, s2, inifdef_ref, ii) -> inifdef_ref := true; | _ -> () )); @@ -1121,267 +815,174 @@ let rec adjust_inifdef_include xs = -(* ------------------------------------------------------------------------- *) -(* cpp-builtin part2, macro, using standard.h or other defs *) -(* ------------------------------------------------------------------------- *) - -(* Thanks to this function many stuff are not anymore hardcoded in ocaml code - * (but they are now hardcoded in standard.h ...) - * - * - * - * No need to take care to not substitute the macro name itself - * that occurs in the macro definition because the macro name is - * after fix_token_define a TDefineIdent, no more a TIdent. - *) -let rec apply_macro_defs xs = - match xs with - | [] -> () - (* old: "but could do more, could reuse same original token - * so that have in the Ast a Dbg, not a MACROSTATEMENT" - * - * | PToken ({tok = TIdent (s,i1)} as id)::xs - * when s = "MACROSTATEMENT" -> - * - * msg_macro_statement_hint s; - * id.tok <- TMacroStmt(TH.info_of_tok id.tok); - * find_macro_paren xs - * - * let msg_macro_statement_hint s = - * incr Stat.nMacroHint; - * () - * - *) - (* recognized macro of standard.h (or other) *) - | PToken ({tok = TIdent (s,i1)} as id)::Parenthised (xxs,info_parens)::xs - when Hashtbl.mem !_defs s -> - - msg_apply_known_macro s; - let (s, params, body) = Hashtbl.find !_defs s in - - (match params with - | NoParam -> - pr2 ("WEIRD: macro without param used before parenthize: " ^ s); - (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *) - - (match body with - | DefineBody bodymacro -> - set_as_comment (Token_c.CppMacro) id; - id.new_tokens_before <- bodymacro; - | DefineHint hint -> - msg_apply_known_macro_hint s; - id.tok <- token_from_parsinghack_hint (s,i1) hint; - ) - | Params params -> - (match body with - | DefineBody bodymacro -> - - (* bugfix: better to put this that before the match body, - * cos our macrostatement hint can have variable number of - * arguments and so it's ok if it does not match exactly - * the number of arguments. *) - if List.length params != List.length xxs - then begin - pr2_once ("WEIRD: macro with wrong number of arguments: " ^ s); - (* old: id.new_tokens_before <- bodymacro; *) - () - end - else - - let xxs' = xxs +> List.map (fun x -> - (tokens_of_paren_ordered x) +> List.map (fun x -> - TH.visitor_info_of_tok Ast_c.make_expanded x.tok - ) - ) in - id.new_tokens_before <- - cpp_engine (Common.zip params xxs') bodymacro; - - (* important to do that after have apply the macro, otherwise - * will pass as argument to the macro some tokens that - * are all TCommentCpp - *) - [Parenthised (xxs, info_parens)] +> - iter_token_paren (set_as_comment Token_c.CppMacro); - set_as_comment Token_c.CppMacro id; - - | DefineHint (HintMacroStatement as hint) -> - (* important to do that after have apply the macro, otherwise - * will pass as argument to the macro some tokens that - * are all TCommentCpp - * - * note: such macrostatement can have a variable number of - * arguments but here we don't care, we just pass all the - * parameters. - *) - - (match xs with - | PToken ({tok = TPtVirg _} as id2)::_ -> - pr2_once - ("macro stmt with trailing ';', passing also ';' for: "^ - s); - (* sometimes still want pass its params ... as in - * DEBUGPOLL(static unsigned int prev_mask = 0); - *) - - msg_apply_known_macro_hint s; - id.tok <- token_from_parsinghack_hint (s,i1) hint; - [Parenthised (xxs, info_parens)] +> - iter_token_paren (set_as_comment Token_c.CppMacro); - set_as_comment Token_c.CppMacro id2; - - | _ -> - msg_apply_known_macro_hint s; - id.tok <- token_from_parsinghack_hint (s,i1) hint; - [Parenthised (xxs, info_parens)] +> - iter_token_paren (set_as_comment Token_c.CppMacro); - ) - - | DefineHint hint -> - msg_apply_known_macro_hint s; - id.tok <- token_from_parsinghack_hint (s,i1) hint; - ) - ); - apply_macro_defs xs - - | PToken ({tok = TIdent (s,i1)} as id)::xs - when Hashtbl.mem !_defs s -> - - msg_apply_known_macro s; - let (_s, params, body) = Hashtbl.find !_defs s in - - (match params with - | Params params -> - pr2 ("WEIRD: macro with params but no parens found: " ^ s); - (* dont apply the macro, perhaps a redefinition *) - () - | NoParam -> - (match body with - | DefineBody [newtok] -> - (* special case when 1-1 substitution, we reuse the token *) - id.tok <- (newtok +> TH.visitor_info_of_tok (fun _ -> - TH.info_of_tok id.tok)) - | DefineBody bodymacro -> - set_as_comment Token_c.CppMacro id; - id.new_tokens_before <- bodymacro; - | DefineHint hint -> - msg_apply_known_macro_hint s; - id.tok <- token_from_parsinghack_hint (s,i1) hint; - ) - ); - apply_macro_defs xs +let rec find_ifdef_cparen_else xs = + let rec aux xs = + xs +> List.iter (function + | NotIfdefLine _ -> () + | Ifdef (xxs, info_ifdef_stmt) -> + (match xxs with + | [] -> raise Impossible + | [first] -> () + | first::second::rest -> + (* found a closing ')' just after the #else *) + (* Too bad ocaml does not support better list pattern matching + * a la Prolog-III where can match the end of lists. + *) + let condition = + if List.length first = 0 then false + else + let last_line = Common.last first in + match last_line with + | NotIfdefLine xs -> + if List.length xs = 0 then false + else + let last_tok = Common.last xs in + TH.is_cpar last_tok.tok + | Ifdef _ | Ifdefbool _ -> false + in + if condition then begin + msg_ifdef_cparen_else(); + (* keep only first, treat the rest as comment *) + info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); + (second::rest) +> List.iter + (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); + end - (* recurse *) - | (PToken x)::xs -> apply_macro_defs xs - | (Parenthised (xxs, info_parens))::xs -> - xxs +> List.iter apply_macro_defs; - apply_macro_defs xs + ); + List.iter aux xxs + (* no need complex analysis for ifdefbool *) + | Ifdefbool (_, xxs, info_ifdef_stmt) -> + List.iter aux xxs + ) + in aux xs +(* ------------------------------------------------------------------------- *) +(* cpp-builtin part2, macro, using standard.h or other defs *) +(* ------------------------------------------------------------------------- *) +(* now in cpp_token_c.ml *) (* ------------------------------------------------------------------------- *) (* stringification *) (* ------------------------------------------------------------------------- *) -let rec find_string_macro_paren xs = +let rec find_string_macro_paren xs = match xs with | [] -> () - | Parenthised(xxs, info_parens)::xs -> - xxs +> List.iter (fun xs -> - if xs +> List.exists + | Parenthised(xxs, info_parens)::xs -> + xxs +> List.iter (fun xs -> + if xs +> List.exists (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) && - xs +> List.for_all - (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) -> + xs +> List.for_all + (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) -> true | _ -> false) then - xs +> List.iter (fun tok -> + xs +> List.iter (fun tok -> match tok with - | PToken({tok = TIdent (s,_)} as id) -> + | PToken({tok = TIdent (s,_)} as id) -> msg_stringification s; id.tok <- TMacroString (s, TH.info_of_tok id.tok); | _ -> () ) - else + else find_string_macro_paren xs ); find_string_macro_paren xs - | PToken(tok)::xs -> + | PToken(tok)::xs -> find_string_macro_paren xs - + (* ------------------------------------------------------------------------- *) (* macro2 *) (* ------------------------------------------------------------------------- *) (* don't forget to recurse in each case *) -let rec find_macro_paren xs = +let rec find_macro_paren xs = match xs with | [] -> () - + (* attribute *) | PToken ({tok = Tattribute _} as id) ::Parenthised (xxs,info_parens) ::xs - -> + -> pr2_cpp ("MACRO: __attribute detected "); - [Parenthised (xxs, info_parens)] +> + [Parenthised (xxs, info_parens)] +> iter_token_paren (set_as_comment Token_c.CppAttr); set_as_comment Token_c.CppAttr id; find_macro_paren xs + | PToken ({tok = TattributeNoarg _} as id) + ::xs + -> + pr2_cpp ("MACRO: __attributenoarg detected "); + set_as_comment Token_c.CppAttr id; + find_macro_paren xs + (* - (* attribute cpp, __xxx id() *) + (* attribute cpp, __xxx id *) | PToken ({tok = TIdent (s,i1)} as id) - ::PToken ({tok = TIdent (s2, i2)}) - ::Parenthised(xxs,info_parens) + ::PToken ({tok = TIdent (s2, i2)} as id2) ::xs when s ==~ regexp_annot - -> + -> msg_attribute s; id.tok <- TMacroAttr (s, i1); - find_macro_paren (Parenthised(xxs,info_parens)::xs) + find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *) - (* attribute cpp, id __xxx = *) - | PToken ({tok = TIdent (s,i1)}) - ::PToken ({tok = TIdent (s2, i2)} as id) - ::xs when s2 ==~ regexp_annot - -> + (* attribute cpp, id __xxx *) + | PToken ({tok = TIdent (s,i1)} as _id) + ::PToken ({tok = TIdent (s2, i2)} as id2) + ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef)) + -> msg_attribute s2; - id.tok <- TMacroAttr (s2, i2); - find_macro_paren (xs) + id2.tok <- TMacroAttr (s2, i2); + find_macro_paren xs + + | PToken ({tok = (Tstatic _ | Textern _)} as tok1) + ::PToken ({tok = TIdent (s,i1)} as attr) + ::xs when s ==~ regexp_annot + -> + pr2_cpp ("storage attribute: " ^ s); + attr.tok <- TMacroAttrStorage (s,i1); + (* recurse, may have other storage attributes *) + find_macro_paren (PToken (tok1)::xs) + + *) (* storage attribute *) | PToken ({tok = (Tstatic _ | Textern _)} as tok1) - ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs - -> + ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs + -> pr2_cpp ("storage attribute: " ^ s); attr.tok <- TMacroAttrStorage (s,i1); (* recurse, may have other storage attributes *) find_macro_paren (PToken (tok1)::xs) - + (* stringification - * + * * the order of the matching clause is important - * + * *) (* string macro with params, before case *) | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) ::Parenthised (xxs, info_parens) - ::xs -> + ::xs -> msg_stringification_params s; id.tok <- TMacroString (s, TH.info_of_tok id.tok); - [Parenthised (xxs, info_parens)] +> + [Parenthised (xxs, info_parens)] +> iter_token_paren (set_as_comment Token_c.CppMacro); find_macro_paren xs @@ -1389,11 +990,11 @@ let rec find_macro_paren xs = | PToken ({tok = TIdent (s,_)} as id) ::Parenthised (xxs, info_parens) ::PToken ({tok = (TString _ | TMacroString _)}) - ::xs -> + ::xs -> msg_stringification_params s; id.tok <- TMacroString (s, TH.info_of_tok id.tok); - [Parenthised (xxs, info_parens)] +> + [Parenthised (xxs, info_parens)] +> iter_token_paren (set_as_comment Token_c.CppMacro); find_macro_paren xs @@ -1401,10 +1002,10 @@ let rec find_macro_paren xs = (* for the case where the string is not inside a funcall, but * for instance in an initializer. *) - + (* string macro variable, before case *) | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) - ::xs -> + ::xs -> msg_stringification s; id.tok <- TMacroString (s, TH.info_of_tok id.tok); @@ -1413,19 +1014,19 @@ let rec find_macro_paren xs = (* after case *) | PToken ({tok = TIdent (s,_)} as id) ::PToken ({tok = (TString _ | TMacroString _)}) - ::xs -> + ::xs -> msg_stringification s; id.tok <- TMacroString (s, TH.info_of_tok id.tok); find_macro_paren xs - + (* recurse *) - | (PToken x)::xs -> find_macro_paren xs - | (Parenthised (xxs, info_parens))::xs -> + | (PToken x)::xs -> find_macro_paren xs + | (Parenthised (xxs, info_parens))::xs -> xxs +> List.iter find_macro_paren; find_macro_paren xs @@ -1434,21 +1035,21 @@ let rec find_macro_paren xs = (* don't forget to recurse in each case *) -let rec find_macro_lineparen xs = +let rec find_macro_lineparen xs = match xs with | [] -> () (* linuxext: ex: static [const] DEVICE_ATTR(); *) - | (Line + | (Line ( [PToken ({tok = Tstatic _}); PToken ({tok = TIdent (s,_)} as macro); Parenthised (xxs,info_parens); PToken ({tok = TPtVirg _}); - ] + ] )) - ::xs - when (s ==~ regexp_macro) -> + ::xs + when (s ==~ regexp_macro) -> msg_declare_macro s; let info = TH.info_of_tok macro.tok in @@ -1457,25 +1058,25 @@ let rec find_macro_lineparen xs = find_macro_lineparen (xs) (* the static const case *) - | (Line + | (Line ( [PToken ({tok = Tstatic _}); PToken ({tok = Tconst _} as const); PToken ({tok = TIdent (s,_)} as macro); Parenthised (xxs,info_parens); PToken ({tok = TPtVirg _}); - ] + ] (*as line1*) )) - ::xs - when (s ==~ regexp_macro) -> + ::xs + when (s ==~ regexp_macro) -> msg_declare_macro s; let info = TH.info_of_tok macro.tok in macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); - - (* need retag this const, otherwise ambiguity in grammar + + (* need retag this const, otherwise ambiguity in grammar 21: shift/reduce conflict (shift 121, reduce 137) on Tconst decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ... decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ... @@ -1487,18 +1088,18 @@ let rec find_macro_lineparen xs = (* same but without trailing ';' - * + * * I do not put the final ';' because it can be on a multiline and * because of the way mk_line is coded, we will not have access to * this ';' on the next line, even if next to the ')' *) - | (Line + | (Line ([PToken ({tok = Tstatic _}); PToken ({tok = TIdent (s,_)} as macro); Parenthised (xxs,info_parens); - ] + ] )) - ::xs - when s ==~ regexp_macro -> + ::xs + when s ==~ regexp_macro -> msg_declare_macro s; let info = TH.info_of_tok macro.tok in @@ -1510,20 +1111,20 @@ let rec find_macro_lineparen xs = (* on multiple lines *) - | (Line + | (Line ( (PToken ({tok = Tstatic _})::[] ))) - ::(Line + ::(Line ( [PToken ({tok = TIdent (s,_)} as macro); Parenthised (xxs,info_parens); PToken ({tok = TPtVirg _}); ] - ) + ) ) - ::xs - when (s ==~ regexp_macro) -> + ::xs + when (s ==~ regexp_macro) -> msg_declare_macro s; let info = TH.info_of_tok macro.tok in @@ -1532,8 +1133,8 @@ let rec find_macro_lineparen xs = find_macro_lineparen (xs) - (* linuxext: ex: DECLARE_BITMAP(); - * + (* linuxext: ex: DECLARE_BITMAP(); + * * Here I use regexp_declare and not regexp_macro because * Sometimes it can be a FunCallMacro such as DEBUG(foo()); * Here we don't have the preceding 'static' so only way to @@ -1543,15 +1144,15 @@ let rec find_macro_lineparen xs = * unless the parameter of the DECLARE_xxx are weird and can not be mapped * on a argument_list *) - - | (Line + + | (Line ([PToken ({tok = TIdent (s,_)} as macro); Parenthised (xxs,info_parens); PToken ({tok = TPtVirg _}); ] )) - ::xs - when (s ==~ regexp_declare) -> + ::xs + when (s ==~ regexp_declare) -> msg_declare_macro s; let info = TH.info_of_tok macro.tok in @@ -1559,34 +1160,34 @@ let rec find_macro_lineparen xs = find_macro_lineparen (xs) - + (* toplevel macros. * module_init(xxx) - * + * * Could also transform the TIdent in a TMacroTop but can have false * positive, so easier to just change the TCPar and so just solve * the end-of-stream pb of ocamlyacc *) - | (Line + | (Line ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro); Parenthised (xxs,info_parens); ] as _line1 )) ::xs when col1 =|= 0 - -> - let condition = + -> + let condition = (* to reduce number of false positive *) (match xs with - | (Line (PToken ({col = col2 } as other)::restline2))::_ -> + | (Line (PToken ({col = col2 } as other)::restline2))::_ -> TH.is_eof other.tok || (col2 =|= 0 && (match other.tok with | TOBrace _ -> false (* otherwise would match funcdecl *) | TCBrace _ when ctx <> InFunction -> false - | TPtVirg _ + | TPtVirg _ | TDotDot _ -> false | tok when TH.is_binary_operator tok -> false - + | _ -> true ) ) @@ -1600,44 +1201,44 @@ let rec find_macro_lineparen xs = (* just to avoid the end-of-stream pb of ocamlyacc *) let tcpar = Common.last info_parens in tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok); - + (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*) - + end; find_macro_lineparen (xs) - (* macro with parameters + (* macro with parameters * ex: DEBUG() * return x; *) - | (Line + | (Line ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); Parenthised (xxs,info_parens); ] as _line1 )) - ::(Line + ::(Line (PToken ({col = col2 } as other)::restline2 ) as line2) - ::xs + ::xs (* when s ==~ regexp_macro *) - -> - let condition = - (col1 =|= col2 && + -> + let condition = + (col1 =|= col2 && (match other.tok with | TOBrace _ -> false (* otherwise would match funcdecl *) | TCBrace _ when ctx <> InFunction -> false - | TPtVirg _ + | TPtVirg _ | TDotDot _ -> false | tok when TH.is_binary_operator tok -> false | _ -> true ) - ) - || + ) + || (col2 <= col1 && (match other.tok, restline2 with | TCBrace _, _ when ctx =*= InFunction -> true @@ -1646,7 +1247,7 @@ let rec find_macro_lineparen xs = | Telse _, _ -> true (* case of label, usually put in first line *) - | TIdent _, (PToken ({tok = TDotDot _}))::_ -> + | TIdent _, (PToken ({tok = TDotDot _}))::_ -> true @@ -1655,42 +1256,42 @@ let rec find_macro_lineparen xs = ) in - + if condition - then + then if col1 =|= 0 then () else begin msg_macro_noptvirg s; macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); - [Parenthised (xxs, info_parens)] +> + [Parenthised (xxs, info_parens)] +> iter_token_paren (set_as_comment Token_c.CppMacro); end; find_macro_lineparen (line2::xs) - - (* linuxext:? single macro + + (* linuxext:? single macro * ex: LOCK * foo(); * UNLOCK - * + * * todo: factorize code with previous rule ? *) - | (Line + | (Line ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); ] as _line1 )) - ::(Line + ::(Line (PToken ({col = col2 } as other)::restline2 ) as line2) - ::xs -> + ::xs -> (* when s ==~ regexp_macro *) - - let condition = - (col1 =|= col2 && + + let condition = + (col1 =|= col2 && col1 <> 0 && (* otherwise can match typedef of fundecl*) (match other.tok with - | TPtVirg _ -> false - | TOr _ -> false + | TPtVirg _ -> false + | TOr _ -> false | TCBrace _ when ctx <> InFunction -> false | tok when TH.is_binary_operator tok -> false @@ -1705,15 +1306,15 @@ let rec find_macro_lineparen xs = | _ -> false )) in - + if condition then begin msg_macro_noptvirg_single s; macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); end; find_macro_lineparen (line2::xs) - - | x::xs -> + + | x::xs -> find_macro_lineparen xs @@ -1722,8 +1323,8 @@ let rec find_macro_lineparen xs = (* define tobrace init *) (* ------------------------------------------------------------------------- *) -let rec find_define_init_brace_paren xs = - let rec aux xs = +let rec find_define_init_brace_paren xs = + let rec aux xs = match xs with | [] -> () @@ -1733,17 +1334,17 @@ let rec find_define_init_brace_paren xs = ::(PToken ({tok = TOBrace i1} as tokbrace)) ::(PToken tok2) ::(PToken tok3) - ::xs -> + ::xs -> let is_init = match tok2.tok, tok3.tok with | TInt _, TComma _ -> true | TString _, TComma _ -> true | TIdent _, TComma _ -> true | _ -> false - + in if is_init - then begin + then begin pr2_cpp("found define initializer: " ^s); tokbrace.tok <- TOBraceDefineInit i1; end; @@ -1757,30 +1358,30 @@ let rec find_define_init_brace_paren xs = ::(PToken ({tok = TOBrace i1} as tokbrace)) ::(PToken tok2) ::(PToken tok3) - ::xs -> + ::xs -> let is_init = match tok2.tok, tok3.tok with | TInt _, TComma _ -> true | TDot _, TIdent _ -> true | TIdent _, TComma _ -> true | _ -> false - + in if is_init - then begin + then begin pr2_cpp("found define initializer with param: " ^ s); tokbrace.tok <- TOBraceDefineInit i1; end; aux xs - + (* recurse *) - | (PToken x)::xs -> aux xs - | (Parenthised (xxs, info_parens))::xs -> + | (PToken x)::xs -> aux xs + | (Parenthised (xxs, info_parens))::xs -> (* not need for tobrace init: - * xxs +> List.iter aux; + * xxs +> List.iter aux; *) aux xs in @@ -1791,35 +1392,51 @@ let rec find_define_init_brace_paren xs = (* action *) (* ------------------------------------------------------------------------- *) +(* obsolete now with macro expansion ? get some regression if comment. + * todo: if do bad decision here, then it can influence other phases + * and make it hard to parse. So maybe when have a parse error, should + * undo some of the guess those heuristics have done, and restore + * the original token value. + *) + let rec find_actions = function | [] -> () | PToken ({tok = TIdent (s,ii)}) ::Parenthised (xxs,info_parens) - ::xs -> + ::xs -> find_actions xs; xxs +> List.iter find_actions; let modified = find_actions_params xxs in - if modified + if modified then msg_macro_higher_order s - - | x::xs -> + + | x::xs -> find_actions xs -and find_actions_params xxs = - xxs +> List.fold_left (fun acc xs -> +and find_actions_params xxs = + xxs +> List.fold_left (fun acc xs -> let toks = tokens_of_paren xs in if toks +> List.exists (fun x -> TH.is_statement x.tok) + (* undo: && List.length toks > 1 + * good for sparse, not good for linux + *) then begin - xs +> iter_token_paren (fun x -> + xs +> iter_token_paren (fun x -> if TH.is_eof x.tok - then + then (* certainly because paren detection had a pb because of - * some ifdef-exp + * some ifdef-exp. Do similar additional checking than + * what is done in set_as_comment. *) - pr2 "PB: weird, I try to tag an EOF token as action" - else - x.tok <- TAction (TH.info_of_tok x.tok); + pr2 "PB: weird, I try to tag an EOF token as an action" + else + (* cf tests-bis/no_cpar_macro.c *) + if TH.is_eom x.tok + then + pr2 "PB: weird, I try to tag an EOM token as an action" + else + x.tok <- TAction (TH.info_of_tok x.tok); ); true (* modified *) end @@ -1832,39 +1449,26 @@ and find_actions_params xxs = (* main fix cpp function *) (* ------------------------------------------------------------------------- *) -let rebuild_tokens_extented toks_ext = - let _tokens = ref [] in - toks_ext +> List.iter (fun tok -> - tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); - push2 tok.tok _tokens - ); - let tokens = List.rev !_tokens in - (tokens +> acc_map mk_token_extended) - -let filter_cpp_stuff xs = - let rec aux xs = - match xs with - | [] -> [] - | x::xs -> - (match x.tok with - | tok when TH.is_comment tok -> aux xs +let filter_cpp_stuff xs = + List.filter + (function x -> + (match x.tok with + | tok when TH.is_comment tok -> false (* don't want drop the define, or if drop, have to drop * also its body otherwise the line heuristics may be lost * by not finding the TDefine in column 0 but by finding * a TDefineIdent in a column > 0 *) - | Parser_c.TDefine _ -> - x::aux xs - | tok when TH.is_cpp_instruction tok -> aux xs - | _ -> x::aux xs - ) - in - aux xs + | Parser_c.TDefine _ -> true + | tok when TH.is_cpp_instruction tok -> false + | _ -> true + )) + xs let insert_virtual_positions l = let strlen x = String.length (Ast_c.str_of_info x) in - let rec loop prev offset = function - [] -> [] + let rec loop prev offset acc = function + [] -> List.rev acc | x::xs -> let ii = TH.info_of_tok x in let inject pi = @@ -1872,301 +1476,141 @@ let insert_virtual_positions l = match Ast_c.pinfo_of_info ii with Ast_c.OriginTok pi -> let prev = Ast_c.parse_info_of_info ii in - x::(loop prev (strlen ii) xs) + loop prev (strlen ii) (x::acc) xs | Ast_c.ExpandedTok (pi,_) -> - inject (Ast_c.ExpandedTok (pi,(prev,offset))) :: - (loop prev (offset + (strlen ii)) xs) + let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in + loop prev (offset + (strlen ii)) (x'::acc) xs | Ast_c.FakeTok (s,_) -> - inject (Ast_c.FakeTok (s,(prev,offset))) :: - (loop prev (offset + (strlen ii)) xs) + let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in + loop prev (offset + (strlen ii)) (x'::acc) xs | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in let rec skip_fake = function - [] -> [] + | [] -> [] | x::xs -> let ii = TH.info_of_tok x in match Ast_c.pinfo_of_info ii with - Ast_c.OriginTok pi -> + | Ast_c.OriginTok pi -> let prev = Ast_c.parse_info_of_info ii in - x::(loop prev (strlen ii) xs) + let res = loop prev (strlen ii) [] xs in + x::res | _ -> x::skip_fake xs in skip_fake l + (* ------------------------------------------------------------------------- *) -let fix_tokens_cpp2 tokens = - let tokens2 = ref (tokens +> acc_map mk_token_extended) in - - begin +let fix_tokens_cpp2 ~macro_defs tokens = + let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in + + begin (* the order is important, if you put the action heuristic first, * then because of ifdef, can have not closed paren - * and so may believe that higher order macro - * and it will eat too much tokens. So important to do + * and so may believe that higher order macro + * and it will eat too much tokens. So important to do * first the ifdef. - * + * * I recompute multiple times cleaner cos the mutable * can have be changed and so may have more comments * in the token original list. - * + * *) commentize_skip_start_to_end !tokens2; (* ifdef *) - let cleaner = !tokens2 +> List.filter (fun x -> - (* is_comment will also filter the TCommentCpp created in + let cleaner = !tokens2 +> List.filter (fun x -> + (* is_comment will also filter the TCommentCpp created in * commentize_skip_start_to_end *) not (TH.is_comment x.tok) (* could filter also #define/#include *) ) in - let ifdef_grouped = mk_ifdef cleaner in + let ifdef_grouped = TV.mk_ifdef cleaner in set_ifdef_parenthize_info ifdef_grouped; find_ifdef_funheaders ifdef_grouped; find_ifdef_bool ifdef_grouped; find_ifdef_mid ifdef_grouped; + (* change order ? maybe cparen_else heuristic make some of the funheaders + * heuristics irrelevant ? + *) + find_ifdef_cparen_else ifdef_grouped; adjust_inifdef_include ifdef_grouped; (* macro 1 *) let cleaner = !tokens2 +> filter_cpp_stuff in - let paren_grouped = mk_parenthised cleaner in - apply_macro_defs paren_grouped; + let paren_grouped = TV.mk_parenthised cleaner in + Cpp_token_c.apply_macro_defs + ~msg_apply_known_macro + ~msg_apply_known_macro_hint + macro_defs paren_grouped; (* because the before field is used by apply_macro_defs *) - tokens2 := rebuild_tokens_extented !tokens2; + tokens2 := TV.rebuild_tokens_extented !tokens2; (* tagging contextual info (InFunc, InStruct, etc). Better to do * that after the "ifdef-simplification" phase. *) - let cleaner = !tokens2 +> List.filter (fun x -> + let cleaner = !tokens2 +> List.filter (fun x -> not (TH.is_comment x.tok) (* could filter also #define/#include *) ) in - let brace_grouped = mk_braceised cleaner in + let brace_grouped = TV.mk_braceised cleaner in set_context_tag brace_grouped; - - (* macro *) let cleaner = !tokens2 +> filter_cpp_stuff in - let paren_grouped = mk_parenthised cleaner in - let line_paren_grouped = mk_line_parenthised paren_grouped in + let paren_grouped = TV.mk_parenthised cleaner in + let line_paren_grouped = TV.mk_line_parenthised paren_grouped in find_define_init_brace_paren paren_grouped; find_string_macro_paren paren_grouped; find_macro_lineparen line_paren_grouped; find_macro_paren paren_grouped; - (* actions *) + (* obsolete: actions ? not yet *) let cleaner = !tokens2 +> filter_cpp_stuff in - let paren_grouped = mk_parenthised cleaner in + let paren_grouped = TV.mk_parenthised cleaner in find_actions paren_grouped; - insert_virtual_positions (!tokens2 +> acc_map (fun x -> x.tok)) - end - -let time_hack1 a = - Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 a) - -let fix_tokens_cpp a = - Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 a) - - - - -(*****************************************************************************) -(* The #define tricks *) -(*****************************************************************************) - -(* ugly hack, a better solution perhaps would be to erase TDefEOL - * from the Ast and list of tokens in parse_c. - * - * note: I do a +1 somewhere, it's for the unparsing to correctly sync. - * - * note: can't replace mark_end_define by simply a fakeInfo(). The reason - * is where is the \n TCommentSpace. Normally there is always a last token - * to synchronize on, either EOF or the token of the next toplevel. - * In the case of the #define we got in list of token - * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will - * not synchronize on it and so we will not print the "\n". - * A solution would be to put the TDefEOL before the "\n". - * - * todo?: could put a ExpandedTok for that ? - *) -let mark_end_define ii = - let ii' = - { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with - Common.str = ""; - Common.charpos = Ast_c.pos_of_info ii + 1 - }; - cocci_tag = ref Ast_c.emptyAnnot; - comments_tag = ref Ast_c.emptyComments; - } - in - TDefEOL (ii') - -(* put the TDefEOL at the good place *) -let rec define_line_1 acc xs = - match xs with - | [] -> List.rev acc - | TDefine ii::xs -> - let line = Ast_c.line_of_info ii in - let acc = (TDefine ii) :: acc in - define_line_2 acc line ii xs - | TCppEscapedNewline ii::xs -> - pr2 "WEIRD: a \\ outside a #define"; - let acc = (TCommentSpace ii) :: acc in - define_line_1 acc xs - | x::xs -> define_line_1 (x::acc) xs - -and define_line_2 acc line lastinfo xs = - match xs with - | [] -> - (* should not happened, should meet EOF before *) - pr2 "PB: WEIRD"; - List.rev (mark_end_define lastinfo::acc) - | x::xs -> - let line' = TH.line_of_tok x in - let info = TH.info_of_tok x in - - (match x with - | EOF ii -> - let acc = (mark_end_define lastinfo) :: acc in - let acc = (EOF ii) :: acc in - define_line_1 acc xs - | TCppEscapedNewline ii -> - if (line' <> line) then pr2 "PB: WEIRD: not same line number"; - let acc = (TCommentSpace ii) :: acc in - define_line_2 acc (line+1) info xs - | x -> - if line' =|= line - then define_line_2 (x::acc) line info xs - else define_line_1 (mark_end_define lastinfo::acc) (x::xs) - ) - -let rec define_ident acc xs = - match xs with - | [] -> List.rev acc - | TDefine ii::xs -> - let acc = TDefine ii :: acc in - (match xs with - | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs -> - (* Change also the kind of TIdent to avoid bad interaction - * with other parsing_hack tricks. For instant if keep TIdent then - * the stringication algo can believe the TIdent is a string-macro. - * So simpler to change the kind of the ident too. - *) - (* if TOParDefine sticked to the ident, then - * it's a macro-function. Change token to avoid ambiguity - * between #define foo(x) and #define foo (x) - *) - let acc = (TCommentSpace i1) :: acc in - let acc = (TIdentDefine (s,i2)) :: acc in - let acc = (TOParDefine i3) :: acc in - define_ident acc xs - | TCommentSpace i1::TIdent (s,i2)::xs -> - let acc = (TCommentSpace i1) :: acc in - let acc = (TIdentDefine (s,i2)) :: acc in - define_ident acc xs - | _ -> - pr2 "WEIRD: weird #define body"; - define_ident acc xs - ) - | x::xs -> - let acc = x :: acc in - define_ident acc xs - - -let fix_tokens_define2 xs = - define_ident [] (define_line_1 [] xs) + insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok)) + end -let fix_tokens_define a = - Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a) - +let time_hack1 ~macro_defs a = + Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a) -(*****************************************************************************) -(* for the cpp-builtin, standard.h, part 0 *) -(*****************************************************************************) +let fix_tokens_cpp ~macro_defs a = + Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a) -let macro_body_to_maybe_hint body = - match body with - | [] -> DefineBody body - | [TIdent (s,i1)] -> - (match parsinghack_hint_of_string s with - | Some hint -> DefineHint hint - | None -> DefineBody body - ) - | xs -> DefineBody body -let rec define_parse xs = - match xs with - | [] -> [] - | TDefine i1::TIdentDefine (s,i2)::TOParDefine i3::xs -> - let (tokparams, _, xs) = - xs +> Common.split_when (function TCPar _ -> true | _ -> false) in - let (body, _, xs) = - xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in - let params = - tokparams +> Common.map_filter (function - | TComma _ -> None - | TIdent (s, _) -> Some s - | x -> error_cant_have x - ) in - let body = body +> List.map - (TH.visitor_info_of_tok Ast_c.make_expanded) in - let def = (s, (s, Params params, macro_body_to_maybe_hint body)) in - def::define_parse xs - - | TDefine i1::TIdentDefine (s,i2)::xs -> - let (body, _, xs) = - xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in - let body = body +> List.map - (TH.visitor_info_of_tok Ast_c.make_expanded) in - let def = (s, (s, NoParam, macro_body_to_maybe_hint body)) in - def::define_parse xs - - | TDefine i1::_ -> - pr2_gen i1; - raise Impossible - | x::xs -> define_parse xs - - -let extract_cpp_define xs = - let cleaner = xs +> List.filter (fun x -> - not (TH.is_comment x) - ) in - define_parse cleaner - - - (*****************************************************************************) (* Lexing with lookahead *) (*****************************************************************************) (* Why using yet another parsing_hack technique ? The fix_xxx where do - * some pre-processing on the full list of tokens is not enough ? + * some pre-processing on the full list of tokens is not enough ? * No cos sometimes we need more contextual info, and even if * set_context() tries to give some contextual info, it's not completely * accurate so the following code give yet another alternative, yet another * chance to transform some tokens. - * + * * todo?: maybe could try to get rid of this technique. Maybe a better * set_context() would make possible to move this code using a fix_xx * technique. - * + * * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but * it is more general to do it via my LALR(k) tech. Because here we can * transform some token give some context information. So sometimes it * makes sense to transform a token in one context, sometimes not, and * lex can not provide us this context information. Note that the order - * in the pattern matching in lookahead is important. Do not cut/paste. - * + * in the pattern matching in lookahead is important. Do not cut/paste. + * * Note that in next there is only "clean" tokens, there is no comment * or space tokens. This is done by the caller. - * + * *) open Lexer_parser (* for the fields of lexer_hint type *) @@ -2176,7 +1620,7 @@ let not_struct_enum = function | _ -> true -let lookahead2 ~pass next before = +let lookahead2 ~pass next before = match (next, before) with @@ -2187,16 +1631,16 @@ let lookahead2 ~pass next before = | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2 && ok_typedef s (* (take_safe 1 !passed_tok <> [TOPar]) -> *) - -> + -> (* parse_typedef_fix3: * acpi_object acpi_object; - * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec. + * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec. * Le parser en interne a deja appelé le prochain token pour pouvoir * decider des choses. * => special case in lexer_heuristic, again *) - if !Flag_parsing_c.debug_typedef - then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s); + if !Flag_parsing_c.debug_typedef + then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s); LP.disable_typedef(); @@ -2204,7 +1648,7 @@ let lookahead2 ~pass next before = TypedefIdent (s, i1) (* xx yy *) - | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before + | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before && ok_typedef s -> (* && not_annot s2 BUT lead to false positive*) @@ -2214,9 +1658,9 @@ let lookahead2 ~pass next before = (* xx inline *) - | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before + | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before && ok_typedef s - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2225,7 +1669,7 @@ let lookahead2 ~pass next before = | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ ) when not_struct_enum before && (LP.current_context() =*= LP.InParameter) && ok_typedef s - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2235,7 +1679,7 @@ let lookahead2 ~pass next before = when not_struct_enum before (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) && ok_typedef s - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2246,25 +1690,25 @@ let lookahead2 ~pass next before = when not_struct_enum before (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) && ok_typedef s - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* xx const * USELESS because of next rule ? *) - | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ ) - when not_struct_enum before + | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ ) + when not_struct_enum before (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (* xx const *) - | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ ) - when not_struct_enum before + | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ ) + when not_struct_enum before && ok_typedef s (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) -> @@ -2274,8 +1718,8 @@ let lookahead2 ~pass next before = (* xx * const *) - | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ ) - when not_struct_enum before + | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ ) + when not_struct_enum before && ok_typedef s -> (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) @@ -2289,14 +1733,14 @@ let lookahead2 ~pass next before = ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (* ( xx ) [sizeof, ~] *) | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ ) when not_struct_enum before && ok_typedef s - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2304,31 +1748,31 @@ let lookahead2 ~pass next before = | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_) when (LP.current_context() =*= LP.InParameter) && ok_typedef s - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (*------------------------------------------------------------*) (* if 'x*y' maybe an expr, maybe just a classic multiplication *) (* but if have a '=', or ',' I think not *) (*------------------------------------------------------------*) (* static xx * yy *) - | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , + | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when - ok_typedef s + ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (* TODO xx * yy ; AND in start of compound element *) (* xx * yy, AND in paramdecl *) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _) when not_struct_enum before && (LP.current_context() =*= LP.InParameter) - && ok_typedef s - -> + && ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2339,46 +1783,46 @@ let lookahead2 ~pass next before = TIdent (s, i1) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _) when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ())) - -> + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* xx * yy , AND in Toplevel *) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _) when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel) - && ok_typedef s - -> + && ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* xx * yy ( AND in Toplevel *) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _) - when not_struct_enum before + when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ())) - && ok_typedef s + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (* xx * yy [ *) (* todo? enough ? cos in struct def we can have some expression ! *) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _) - when not_struct_enum before && + when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ())) - && ok_typedef s - -> + && ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* u16: 10; in struct *) | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_) when (LP.is_top_or_struct (LP.current_context ())) - && ok_typedef s - -> + && ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (* why need TOPar condition as stated in preceding rule ? really needed ? *) (* YES cos at toplevel can have some expression !! for instance when *) @@ -2388,16 +1832,15 @@ let lookahead2 ~pass next before = when (take_safe 1 !passed_tok <> [Tstruct] && (take_safe 1 !passed_tok <> [Tenum])) && - !LP._lexer_hint = Some LP.Toplevel -> - msg_typedef s; - LP.add_typedef_root s; + !LP._lexer_hint = Some LP.Toplevel -> + msg_typedef s; LP.add_typedef_root s; TypedefIdent s *) (* xx * yy = *) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _) - when not_struct_enum before - && ok_typedef s + when not_struct_enum before + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2406,16 +1849,16 @@ let lookahead2 ~pass next before = (* xx * yy) AND in paramdecl *) | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _) when not_struct_enum before && (LP.current_context () =*= LP.InParameter) - && ok_typedef s + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) - + (* xx * yy; *) (* wrong ? *) - | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , - (TOBrace _| TPtVirg _)::_) when not_struct_enum before - && ok_typedef s + | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , + (TOBrace _| TPtVirg _)::_) when not_struct_enum before + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; msg_maybe_dangereous_typedef s; @@ -2423,20 +1866,20 @@ let lookahead2 ~pass next before = (* xx * yy, and ';' before xx *) (* wrong ? *) - | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , + | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , (TOBrace _| TPtVirg _)::_) when - ok_typedef s + ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* xx_t * yy *) - | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _) - when s ==~ regexp_typedef && not_struct_enum before - (* struct user_info_t sometimes *) - && ok_typedef s - -> + | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _) + when s ==~ regexp_typedef && not_struct_enum before + (* struct user_info_t sometimes *) + && ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2444,15 +1887,15 @@ let lookahead2 ~pass next before = | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _) when not_struct_enum before (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) - && ok_typedef s + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* xx *** yy *) | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _) - when not_struct_enum before - && ok_typedef s + when not_struct_enum before + && ok_typedef s (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) -> msg_typedef s; LP.add_typedef_root s; @@ -2460,9 +1903,9 @@ let lookahead2 ~pass next before = (* xx ** ) *) | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _) - when not_struct_enum before + when not_struct_enum before (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) - && ok_typedef s + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2470,41 +1913,41 @@ let lookahead2 ~pass next before = (* ----------------------------------- *) - (* old: why not do like for other rules and start with TIdent ? + (* old: why not do like for other rules and start with TIdent ? * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ? * new: prefer now start with TIdent because otherwise the add_typedef_root * may have no effect if in second pass or if have disable the add_typedef. *) (* (xx) yy *) - | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ , - (TOPar info)::x::_) + | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ , + (TOPar info)::x::_) when not (TH.is_stuff_taking_parenthized x) && Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - && ok_typedef s - -> + && ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; (*TOPar info*) TypedefIdent (s, i1) - (* (xx) ( yy) + (* (xx) ( yy) * but false positif: typedef int (xxx_t)(...), so do specialisation below. *) (* - | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_) - when not (TH.is_stuff_taking_parenthized x) - && ok_typedef s + | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_) + when not (TH.is_stuff_taking_parenthized x) + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; (* TOPar info *) TypedefIdent (s, i1) *) (* special case: = (xx) ( yy) *) - | (TIdent (s, i1)::TCPar _::TOPar _::_ , + | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::(TEq _ |TEqEq _)::_) - when ok_typedef s + when ok_typedef s -> msg_typedef s; LP.add_typedef_root s; (* TOPar info *) @@ -2512,44 +1955,43 @@ let lookahead2 ~pass next before = (* (xx * ) yy *) - | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when - ok_typedef s - -> + | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when + ok_typedef s + -> msg_typedef s; LP.add_typedef_root s; (*TOPar info*) TypedefIdent (s,i1) (* (xx){ ... } constructor *) - | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_) - when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x) - && ok_typedef s + | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_) + when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x) + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* can have sizeof on expression - | (Tsizeof::TOPar::TIdent s::TCPar::_, _) -> - msg_typedef s; - LP.add_typedef_root s; + | (Tsizeof::TOPar::TIdent s::TCPar::_, _) -> + msg_typedef s; LP.add_typedef_root s; Tsizeof *) (* ----------------------------------- *) (* x ( *y )(params), function pointer *) - | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) + | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) when not_struct_enum before - && ok_typedef s + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) (* x* ( *y )(params), function pointer 2 *) - | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) + | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) when not_struct_enum before - && ok_typedef s + && ok_typedef s -> msg_typedef s; LP.add_typedef_root s; TypedefIdent (s, i1) @@ -2561,42 +2003,42 @@ let lookahead2 ~pass next before = | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) | TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii)) as x) - ::_, _ - -> + ::_, _ + -> (* - if not !Flag_parsing_c.ifdef_to_if + if not !Flag_parsing_c.ifdef_to_if then TCommentCpp (Ast_c.CppDirective, ii) - else + else *) (* not !LP._lexer_hint.toplevel *) if !Flag_parsing_c.ifdef_directive_passing - || (pass =|= 2) + || (pass >= 2) then begin - + if (LP.current_context () =*= LP.InInitializer) - then begin + then begin pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *) incr Stat.nIfdefInitializer; - end else begin - pr2_cpp("IFDEF: or related insde function. I treat it as comment"); + end else begin + pr2_cpp("IFDEF: or related inside function. I treat it as comment"); incr Stat.nIfdefPassing; end; TCommentCpp (Token_c.CppDirective, ii) end else x - - | (TUndef (id, ii) as x)::_, _ - -> - if (pass =|= 2) + + | (TUndef (id, ii) as x)::_, _ + -> + if (pass >= 2) then begin pr2_cpp("UNDEF: I treat it as comment"); TCommentCpp (Token_c.CppDirective, ii) end else x - | (TCppDirectiveOther (ii) as x)::_, _ - -> - if (pass =|= 2) + | (TCppDirectiveOther (ii) as x)::_, _ + -> + if (pass >= 2) then begin pr2_cpp ("OTHER directive: I treat it as comment"); TCommentCpp (Token_c.CppDirective, ii) @@ -2606,19 +2048,18 @@ let lookahead2 ~pass next before = (* If ident contain a for_each, then certainly a macro. But to be * sure should look if there is a '{' after the ')', but it requires * to count the '('. Because this can be expensive, we do that only - * when the token contains "for_each". + * when the token contains "for_each". *) - | (TIdent (s, i1)::TOPar _::rest, _) + | (TIdent (s, i1)::TOPar _::rest, _) when not (LP.current_context () =*= LP.InTopLevel) - (* otherwise a function such as static void loopback_enable(int i) { - * will be considered as a loop + (* otherwise a function such as static void loopback_enable(int i) { + * will be considered as a loop *) -> - - if s ==~ regexp_foreach && + if s ==~ regexp_foreach && is_really_foreach (Common.take_safe forLOOKAHEAD rest) - + then begin msg_foreach s; TMacroIterator (s, i1) @@ -2626,12 +2067,12 @@ let lookahead2 ~pass next before = else TIdent (s, i1) - + (*-------------------------------------------------------------*) | v::xs, _ -> v | _ -> raise Impossible -let lookahead ~pass a b = +let lookahead ~pass a b = Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)