(* Yoann Padioleau
*
* Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
* Copyright (C) 2007, 2008 Ecole des Mines de Nantes
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)
open Common
module TH = Token_helpers
module TV = Token_views_c
module LP = Lexer_parser
module Stat = Parsing_stat
open Parser_c
open TV
(*****************************************************************************)
(* Some debugging functions *)
(*****************************************************************************)
let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
let pr2_cpp s =
if !Flag_parsing_c.debug_cpp
then Common.pr2_once ("CPP-" ^ s)
let msg_gen cond is_known printer s =
if cond
then
if not (!Flag_parsing_c.filter_msg)
then printer s
else
if not (is_known s)
then printer s
(* In the following, there are some harcoded names of types or macros
* but they are not used by our heuristics! They are just here to
* enable to detect false positive by printing only the typedef/macros
* that we don't know yet. If we print everything, then we can easily
* get lost with too much verbose tracing information. So those
* functions "filter" some messages. So our heuristics are still good,
* there is no more (or not that much) hardcoded linux stuff.
*)
let is_known_typdef =
(fun s ->
(match s with
| "u_char" | "u_short" | "u_int" | "u_long"
| "u8" | "u16" | "u32" | "u64"
| "s8" | "s16" | "s32" | "s64"
| "__u8" | "__u16" | "__u32" | "__u64"
-> true
| "acpi_handle"
| "acpi_status"
-> true
| "FILE"
| "DIR"
-> true
| s when s =~ ".*_t$" -> true
| _ -> false
)
)
(* note: cant use partial application with let msg_typedef =
* because it would compute msg_typedef at compile time when
* the flag debug_typedef is always false
*)
let msg_typedef s ii n =
incr Stat.nTypedefInfer;
msg_gen (!Flag_parsing_c.debug_typedef)
is_known_typdef
(fun s ->
pr2_cpp
(Printf.sprintf "TYPEDEF: promoting:(%d) %s on line %d" n s
(Ast_c.line_of_info ii))
(*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s
(Ast_c.line_of_info ii))*)
)
s
let msg_maybe_dangereous_typedef s =
if not (is_known_typdef s)
then
pr2
("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
let msg_declare_macro s =
incr Stat.nMacroDecl;
msg_gen (!Flag_parsing_c.debug_cpp)
(fun s ->
(match s with
| "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
| "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
| "DEFINE_SPINLOCK" | "DEFINE_TIMER"
| "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
| "SENSOR_DEVICE_ATTR"
| "LIST_HEAD"
| "DECLARE_WORK" | "DECLARE_TASKLET"
| "PORT_ATTR_RO" | "PORT_PMA_ATTR"
| "DECLARE_BITMAP"
-> true
(*
| s when s =~ "^DECLARE_.*" -> true
| s when s =~ ".*_ATTR$" -> true
| s when s =~ "^DEFINE_.*" -> true
*)
| _ -> false
)
)
(fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
s
let msg_foreach s =
incr Stat.nIteratorHeuristic;
pr2_cpp ("MACRO: found foreach: " ^ s)
(* ??
let msg_debug_macro s =
pr2_cpp ("MACRO: found debug-macro: " ^ s)
*)
let msg_macro_noptvirg s =
incr Stat.nMacroStmt;
pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
let msg_macro_toplevel_noptvirg s =
incr Stat.nMacroStmt;
pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
let msg_macro_noptvirg_single s =
incr Stat.nMacroStmt;
pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
let msg_macro_higher_order s =
incr Stat.nMacroHigherOrder;
msg_gen (!Flag_parsing_c.debug_cpp)
(fun s ->
(match s with
| "DBGINFO"
| "DBGPX"
| "DFLOW"
-> true
| _ -> false
)
)
(fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
s
let msg_stringification s =
incr Stat.nMacroString;
msg_gen (!Flag_parsing_c.debug_cpp)
(fun s ->
(match s with
| "REVISION"
| "UTS_RELEASE"
| "SIZE_STR"
| "DMA_STR"
-> true
(* s when s =~ ".*STR.*" -> true *)
| _ -> false
)
)
(fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
s
let msg_stringification_params s =
incr Stat.nMacroString;
pr2_cpp ("MACRO: string-macro with params : " ^ s)
let msg_apply_known_macro s =
incr Stat.nMacroExpand;
pr2_cpp ("MACRO: found known macro = " ^ s)
let msg_apply_known_macro_hint s =
incr Stat.nMacroHint;
pr2_cpp ("MACRO: found known macro hint = " ^ s)
let msg_ifdef_bool_passing is_ifdef_positif =
incr Stat.nIfdefZero; (* of Version ? *)
if is_ifdef_positif
then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
let msg_ifdef_mid_something () =
incr Stat.nIfdefExprPassing;
pr2_cpp "found ifdef-mid-something"
let msg_ifdef_funheaders () =
incr Stat.nIfdefFunheader;
()
let msg_ifdef_cparen_else () =
incr Stat.nIfdefPassing;
pr2_cpp("found ifdef-cparen-else")
let msg_attribute s =
incr Stat.nMacroAttribute;
pr2_cpp("ATTR:" ^ s)
(*****************************************************************************)
(* The regexp and basic view definitions *)
(*****************************************************************************)
(* opti: better to built then once and for all, especially regexp_foreach *)
let regexp_macro = Str.regexp
"^[A-Z_][A-Z_0-9]*$"
(* linuxext: *)
let regexp_annot = Str.regexp
"^__.*$"
(* linuxext: *)
let regexp_declare = Str.regexp
".*DECLARE.*"
(* linuxext: *)
let regexp_foreach = Str.regexp_case_fold
".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
let regexp_typedef = Str.regexp
".*_t$"
let false_typedef = [
"printk";
]
let ok_typedef s = not (List.mem s false_typedef)
let not_annot s =
not (s ==~ regexp_annot)
let is_macro s =
s ==~ regexp_macro
let not_macro s =
not (is_macro s)
(*****************************************************************************)
(* Helpers *)
(*****************************************************************************)
(* ------------------------------------------------------------------------- *)
(* the pair is the status of '()' and '{}', ex: (-1,0)
* if too much ')' and good '{}'
* could do for [] too ?
* could do for ',' if encounter ',' at "toplevel", not inside () or {}
* then if have ifdef, then certainly can lead to a problem.
*)
let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
fun xs ->
let cnt_paren, cnt_brace = ref 0, ref 0 in
xs +> TV.iter_token_ifdef (fun x ->
(match x.tok with
| x when TH.is_opar x -> incr cnt_paren
| TOBrace _ -> incr cnt_brace
| x when TH.is_cpar x -> decr cnt_paren
| TCBrace _ -> decr cnt_brace
| _ -> ()
)
);
!cnt_paren, !cnt_brace
(* ------------------------------------------------------------------------- *)
let forLOOKAHEAD = 30
(* look if there is a '{' just after the closing ')', and handling the
* possibility to have nested expressions inside nested parenthesis
*
* todo: use indentation instead of premier(statement) ?
*)
let rec is_really_foreach xs =
let rec is_foreach_aux = function
| [] -> false, []
| TCPar _::TOBrace _::xs -> true, xs
(* the following attempts to handle the cases where there is a
single statement in the body of the loop. undoubtedly more
cases are needed.
todo: premier(statement) - suivant(funcall)
*)
| TCPar _::TIdent _::xs -> true, xs
| TCPar _::Tif _::xs -> true, xs
| TCPar _::Twhile _::xs -> true, xs
| TCPar _::Tfor _::xs -> true, xs
| TCPar _::Tswitch _::xs -> true, xs
| TCPar _::Treturn _::xs -> true, xs
| TCPar _::xs -> false, xs
| TOPar _::xs ->
let (_, xs') = is_foreach_aux xs in
is_foreach_aux xs'
| x::xs -> is_foreach_aux xs
in
is_foreach_aux xs +> fst
(* ------------------------------------------------------------------------- *)
let set_ifdef_token_parenthize_info cnt x =
match x with
| TIfdef (tag, _)
| TIfdefelse (tag, _)
| TIfdefelif (tag, _)
| TEndif (tag, _)
| TIfdefBool (_, tag, _)
| TIfdefMisc (_, tag, _)
| TIfdefVersion (_, tag, _) ->
tag := Some cnt;
| _ -> raise (Impossible 89)
let ifdef_paren_cnt = ref 0
let rec set_ifdef_parenthize_info xs =
xs +> List.iter (function
| NotIfdefLine xs -> ()
| Ifdefbool (_, xxs, info_ifdef)
| Ifdef (xxs, info_ifdef) ->
incr ifdef_paren_cnt;
let total_directives = List.length info_ifdef in
info_ifdef +> List.iter (fun x ->
set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
x.tok);
xxs +> List.iter set_ifdef_parenthize_info
)
(*****************************************************************************)
(* The parsing hack for #define *)
(*****************************************************************************)
(* To parse macro definitions I need to do some tricks
* as some information can be get only at the lexing level. For instance
* the space after the name of the macro in '#define foo (x)' is meaningful
* but the grammar can not get this information. So define_ident below
* look at such space and generate a special TOpardefine. In a similar
* way macro definitions can contain some antislash and newlines
* and the grammar need to know where the macro ends (which is
* a line-level and so low token-level information). Hence the
* function 'define_line' below and the TDefEol.
*
* update: TDefEol is handled in a special way at different places,
* a little bit like EOF, especially for error recovery, so this
* is an important token that should not be retagged!
*
*
* ugly hack, a better solution perhaps would be to erase TDefEOL
* from the Ast and list of tokens in parse_c.
*
* note: I do a +1 somewhere, it's for the unparsing to correctly sync.
*
* note: can't replace mark_end_define by simply a fakeInfo(). The reason
* is where is the \n TCommentSpace. Normally there is always a last token
* to synchronize on, either EOF or the token of the next toplevel.
* In the case of the #define we got in list of token
* [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
* not synchronize on it and so we will not print the "\n".
* A solution would be to put the TDefEOL before the "\n".
* (jll: tried to do this, see the comment "Put end of line..." below)
*
* todo?: could put a ExpandedTok for that ?
*)
let mark_end_define ii =
let ii' =
{ Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
Common.str = "";
Common.charpos = Ast_c.pos_of_info ii + 1
};
cocci_tag = ref Ast_c.emptyAnnot;
annots_tag = Token_annot.empty;
comments_tag = ref Ast_c.emptyComments;
}
in
TDefEOL (ii')
(* put the TDefEOL at the good place *)
let rec define_line_1 acc xs =
match xs with
| [] -> List.rev acc
| TDefine ii::xs ->
let line = Ast_c.line_of_info ii in
let acc = (TDefine ii) :: acc in
define_line_2 acc ((=|=) line) ii xs
| TUndef ii::xs ->
let line = Ast_c.line_of_info ii in
let acc = (TUndef ii) :: acc in
define_line_2 acc ((=|=) line) ii xs
| TCppEscapedNewline ii::xs ->
pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
pr2 (Ast_c.strloc_of_info ii);
let acc = (TCommentSpace ii) :: acc in
define_line_1 acc xs
| x::xs -> define_line_1 (x::acc) xs
and define_line_2 acc lineOk lastinfo xs =
match xs with
| [] ->
(* should not happened, should meet EOF before *)
pr2 "PB: WEIRD";
List.rev (mark_end_define lastinfo::acc)
| x::xs ->
let line = TH.line_of_tok x in
let info = TH.info_of_tok x in
(match x with
| EOF ii ->
let acc = (mark_end_define lastinfo) :: acc in
let acc = (EOF ii) :: acc in
define_line_1 acc xs
| TCppEscapedNewline ii ->
if not (lineOk line) then pr2 "PB: WEIRD: not same line number";
let acc = (TCommentSpace ii) :: acc in
define_line_2 acc ((=|=) (line + 1)) info xs
| TComment _ when lineOk line ->
define_line_2 (x::acc) (function x -> true) info xs
| TString _ when lineOk line ->
define_line_2 (x::acc) (function x -> true) info xs
| x ->
if lineOk line
then define_line_2 (x::acc) ((=|=) line) info xs
else
(* Put end of line token before the newline. A newline at least
must be there because the line changed and because we saw a
#define previously to get to this function at all *)
define_line_1
((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
(x::xs)
)
let rec define_ident acc xs =
match xs with
| [] -> List.rev acc
| TUndef ii::xs ->
let acc = TUndef ii :: acc in
(match xs with
TCommentSpace i1::TIdent (s,i2)::xs ->
let acc = (TCommentSpace i1) :: acc in
let acc = (TIdentDefine (s,i2)) :: acc in
define_ident acc xs
| _ ->
pr2 "WEIRD: weird #define body";
define_ident acc xs
)
| TDefine ii::xs ->
let acc = TDefine ii :: acc in
(match xs with
| TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
(* Change also the kind of TIdent to avoid bad interaction
* with other parsing_hack tricks. For instant if keep TIdent then
* the stringication algo can believe the TIdent is a string-macro.
* So simpler to change the kind of the ident too.
*)
(* if TOParDefine sticked to the ident, then
* it's a macro-function. Change token to avoid ambiguity
* between #define foo(x) and #define foo (x)
*)
let acc = (TCommentSpace i1) :: acc in
let acc = (TIdentDefine (s,i2)) :: acc in
let acc = (TOParDefine i3) :: acc in
define_ident acc xs
| TCommentSpace i1::TIdent (s,i2)::xs ->
let acc = (TCommentSpace i1) :: acc in
let acc = (TIdentDefine (s,i2)) :: acc in
define_ident acc xs
(* bugfix: ident of macro (as well as params, cf below) can be tricky
* note, do we need to subst in the body of the define ? no cos
* here the issue is the name of the macro, as in #define inline,
* so obviously the name of this macro will not be used in its
* body (it would be a recursive macro, which is forbidden).
*)
| TCommentSpace i1::t::xs
when TH.str_of_tok t ==~ Common.regexp_alpha
->
let s = TH.str_of_tok t in
let ii = TH.info_of_tok t in
pr2 (spf "remapping: %s to an ident in macro name" s);
let acc = (TCommentSpace i1) :: acc in
let acc = (TIdentDefine (s,ii)) :: acc in
define_ident acc xs
| TCommentSpace _::_::xs
| xs ->
pr2 "WEIRD: weird #define body";
define_ident acc xs
)
| x::xs ->
let acc = x :: acc in
define_ident acc xs
let fix_tokens_define2 xs =
define_ident [] (define_line_1 [] xs)
let fix_tokens_define a =
Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
(* ------------------------------------------------------------------------- *)
(* Other parsing hacks related to cpp, Include/Define hacks *)
(* ------------------------------------------------------------------------- *)
(* Sometimes I prefer to generate a single token for a list of things in the
* lexer so that if I have to passed them, like for passing TInclude then
* it's easy. Also if I don't do a single token, then I need to
* parse the rest which may not need special stuff, like detecting
* end of line which the parser is not really ready for. So for instance
* could I parse a #include as 2 or more tokens ? just
* lex #include ? so then need recognize as one token ?
* but this kind of token is valid only after a #include and the
* lexing and parsing rules are different for such tokens so not that
* easy to parse such things in parser_c.mly. Hence the following hacks.
*
* less?: maybe could get rid of this like I get rid of some of fix_define.
*)
(* helpers *)
(* used to generate new token from existing one *)
let new_info posadd str ii =
{ Ast_c.pinfo =
Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
charpos = Ast_c.pos_of_info ii + posadd;
str = str;
column = Ast_c.col_of_info ii + posadd;
};
(* must generate a new ref each time, otherwise share *)
cocci_tag = ref Ast_c.emptyAnnot;
annots_tag = Token_annot.empty;
comments_tag = ref Ast_c.emptyComments;
}
let rec comment_until_defeol xs =
match xs with
| [] ->
(* job not done in Cpp_token_c.define_parse ? *)
failwith "cant find end of define token TDefEOL"
| x::xs ->
(match x with
| Parser_c.TDefEOL i ->
Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
::xs
| _ ->
let x' =
(* bugfix: otherwise may lose a TComment token *)
if TH.is_real_comment x
then x
else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
in
x'::comment_until_defeol xs
)
let drop_until_defeol xs =
List.tl
(Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
(* ------------------------------------------------------------------------- *)
(* returns a pair (replaced token, list of next tokens) *)
(* ------------------------------------------------------------------------- *)
let tokens_include (info, includes, filename, inifdef) =
Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
[Parser_c.TIncludeFilename
(filename, (new_info (String.length includes) filename info))
]
(*****************************************************************************)
(* CPP handling: macros, ifdefs, macros defs *)
(*****************************************************************************)
(* ------------------------------------------------------------------------- *)
(* special skip_start skip_end handling *)
(* ------------------------------------------------------------------------- *)
(* note: after this normally the token list should not contain any more the
* TCommentSkipTagStart and End tokens.
*)
let rec commentize_skip_start_to_end xs =
match xs with
| [] -> ()
| x::xs ->
(match x with
| {tok = TCommentSkipTagStart info} ->
(try
let (before, x2, after) =
xs +> Common.split_when (function
| {tok = TCommentSkipTagEnd _ } -> true
| _ -> false
)
in
let topass = x::x2::before in
topass +> List.iter (fun tok ->
TV.set_as_comment Token_c.CppPassingExplicit tok
);
commentize_skip_start_to_end after
with Not_found ->
failwith "could not find end of skip_start special comment"
)
| {tok = TCommentSkipTagEnd info} ->
failwith "found skip_end comment but no skip_start"
| _ ->
commentize_skip_start_to_end xs
)
(* ------------------------------------------------------------------------- *)
(* ifdef keeping/passing *)
(* ------------------------------------------------------------------------- *)
(* #if 0, #if 1, #if LINUX_VERSION handling *)
let rec find_ifdef_bool xs =
xs +> List.iter (function
| NotIfdefLine _ -> ()
| Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
msg_ifdef_bool_passing is_ifdef_positif;
(match xxs with
| [] -> raise (Impossible 90)
| firstclause::xxs ->
info_ifdef_stmt +>
List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
if is_ifdef_positif
then xxs +> List.iter
(iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
else begin
firstclause +>
iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal);
(match List.rev xxs with
(* keep only last *)
| last::startxs ->
startxs +> List.iter
(iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
| [] -> (* not #else *) ()
);
end
);
| Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
)
let thresholdIfdefSizeMid = 6
(* infer ifdef involving not-closed expressions/statements *)
let rec find_ifdef_mid xs =
xs +> List.iter (function
| NotIfdefLine _ -> ()
| Ifdef (xxs, info_ifdef_stmt) ->
(match xxs with
| [] -> raise (Impossible 91)
| [first] -> ()
| first::second::rest ->
(* don't analyse big ifdef *)
if xxs +> List.for_all
(fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
(* don't want nested ifdef *)
xxs +> List.for_all (fun xs ->
xs +> List.for_all
(function NotIfdefLine _ -> true | _ -> false)
)
then
let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
let cnt1, cnt2 = List.hd counts in
if cnt1 <> 0 || cnt2 <> 0 &&
counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
(*
if counts +> List.exists (fun (cnt1, cnt2) ->
cnt1 <> 0 || cnt2 <> 0
)
*)
then begin
msg_ifdef_mid_something();
(* keep only first, treat the rest as comment *)
info_ifdef_stmt +>
List.iter
(TV.save_as_comment (function x -> Token_c.CppIfDirective x));
(second::rest) +> List.iter
(iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
end
);
List.iter find_ifdef_mid xxs
(* no need complex analysis for ifdefbool *)
| Ifdefbool (_, xxs, info_ifdef_stmt) ->
List.iter find_ifdef_mid xxs
)
let thresholdFunheaderLimit = 4
(* ifdef defining alternate function header, type *)
let rec find_ifdef_funheaders = function
| [] -> ()
| NotIfdefLine _::xs -> find_ifdef_funheaders xs
(* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
| Ifdef
([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
(NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
], info_ifdef_stmt
)
::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
::xs
when List.length ifdefblock1 <= thresholdFunheaderLimit &&
List.length ifdefblock2 <= thresholdFunheaderLimit
->
find_ifdef_funheaders xs;
msg_ifdef_funheaders ();
info_ifdef_stmt +>
List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
let all_toks = [xline2] @ line2 in
all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError) ;
ifdefblock2 +> iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
(* ifdef with nested ifdef *)
| Ifdef
([[NotIfdefLine (({col = 0} as _xline1)::line1)];
[Ifdef
([[NotIfdefLine (({col = 0} as xline2)::line2)];
[NotIfdefLine (({col = 0} as xline3)::line3)];
], info_ifdef_stmt2
)
]
], info_ifdef_stmt
)
::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
::xs
->
find_ifdef_funheaders xs;
msg_ifdef_funheaders ();
info_ifdef_stmt +>
List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
info_ifdef_stmt2 +>
List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
let all_toks = [xline2;xline3] @ line2 @ line3 in
all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
(* ifdef with elseif *)
| Ifdef
([[NotIfdefLine (({col = 0} as _xline1)::line1)];
[NotIfdefLine (({col = 0} as xline2)::line2)];
[NotIfdefLine (({col = 0} as xline3)::line3)];
], info_ifdef_stmt
)
::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
::xs
->
find_ifdef_funheaders xs;
msg_ifdef_funheaders ();
info_ifdef_stmt +>
List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
let all_toks = [xline2;xline3] @ line2 @ line3 in
all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError)
(* recurse *)
| Ifdef (xxs,info_ifdef_stmt)::xs
| Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
List.iter find_ifdef_funheaders xxs;
find_ifdef_funheaders xs
(* ?? *)
let rec adjust_inifdef_include xs =
xs +> List.iter (function
| NotIfdefLine _ -> ()
| Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
xxs +> List.iter (iter_token_ifdef (fun tokext ->
match tokext.tok with
| Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
inifdef_ref := true;
| _ -> ()
));
)
let rec find_ifdef_cparen_else xs =
let rec aux xs =
xs +> List.iter (function
| NotIfdefLine _ -> ()
| Ifdef (xxs, info_ifdef_stmt) ->
(match xxs with
| [] -> raise (Impossible 92)
| [first] -> ()
| first::second::rest ->
(* found a closing ')' just after the #else *)
(* Too bad ocaml does not support better list pattern matching
* a la Prolog-III where can match the end of lists.
*)
let condition =
if List.length first = 0 then false
else
let last_line = Common.last first in
match last_line with
| NotIfdefLine xs ->
if List.length xs = 0 then false
else
let last_tok = Common.last xs in
TH.is_cpar last_tok.tok
| Ifdef _ | Ifdefbool _ -> false
in
if condition then begin
msg_ifdef_cparen_else();
(* keep only first, treat the rest as comment *)
info_ifdef_stmt +>
List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
(second::rest) +> List.iter
(iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
end
);
List.iter aux xxs
(* no need complex analysis for ifdefbool *)
| Ifdefbool (_, xxs, info_ifdef_stmt) ->
List.iter aux xxs
)
in aux xs
(* ------------------------------------------------------------------------- *)
(* cpp-builtin part2, macro, using standard.h or other defs *)
(* ------------------------------------------------------------------------- *)
(* now in cpp_token_c.ml *)
(* ------------------------------------------------------------------------- *)
(* stringification *)
(* ------------------------------------------------------------------------- *)
let rec find_string_macro_paren xs =
match xs with
| [] -> ()
| Parenthised(xxs, info_parens)::xs ->
xxs +> List.iter (fun xs ->
if xs +> List.exists
(function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
xs +> List.for_all
(function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
true | _ -> false)
then
xs +> List.iter (fun tok ->
match tok with
| PToken({tok = TIdent (s,_)} as id) ->
msg_stringification s;
id.tok <- TMacroString (s, TH.info_of_tok id.tok);
| _ -> ()
)
else
find_string_macro_paren xs
);
find_string_macro_paren xs
| PToken(tok)::xs ->
find_string_macro_paren xs
(* ------------------------------------------------------------------------- *)
(* macro2 *)
(* ------------------------------------------------------------------------- *)
(* don't forget to recurse in each case *)
let rec find_macro_paren xs =
match xs with
| [] -> ()
(* attribute *)
| PToken ({tok = Tattribute _} as id)
::Parenthised (xxs,info_parens)
::xs
->
pr2_cpp ("MACRO: __attribute detected ");
[Parenthised (xxs, info_parens)] +>
iter_token_paren (TV.set_as_comment Token_c.CppAttr);
TV.set_as_comment Token_c.CppAttr id;
find_macro_paren xs
| PToken ({tok = TattributeNoarg _} as id)
::xs
->
pr2_cpp ("MACRO: __attributenoarg detected ");
TV.set_as_comment Token_c.CppAttr id;
find_macro_paren xs
(*
(* attribute cpp, __xxx id *)
| PToken ({tok = TIdent (s,i1)} as id)
::PToken ({tok = TIdent (s2, i2)} as id2)
::xs when s ==~ regexp_annot
->
msg_attribute s;
id.tok <- TMacroAttr (s, i1);
find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
(* attribute cpp, id __xxx *)
| PToken ({tok = TIdent (s,i1)} as _id)
::PToken ({tok = TIdent (s2, i2)} as id2)
::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
->
msg_attribute s2;
id2.tok <- TMacroAttr (s2, i2);
find_macro_paren xs
| PToken ({tok = (Tstatic _ | Textern _)} as tok1)
::PToken ({tok = TIdent (s,i1)} as attr)
::xs when s ==~ regexp_annot
->
pr2_cpp ("storage attribute: " ^ s);
attr.tok <- TMacroAttrStorage (s,i1);
(* recurse, may have other storage attributes *)
find_macro_paren (PToken (tok1)::xs)
*)
(* storage attribute *)
| PToken ({tok = (Tstatic _ | Textern _)} as tok1)
::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
->
pr2_cpp ("storage attribute: " ^ s);
attr.tok <- TMacroAttrStorage (s,i1);
(* recurse, may have other storage attributes *)
find_macro_paren (PToken (tok1)::xs)
(* stringification
*
* the order of the matching clause is important
*
*)
(* string macro with params, before case *)
| PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
::Parenthised (xxs, info_parens)
::xs ->
msg_stringification_params s;
id.tok <- TMacroString (s, TH.info_of_tok id.tok);
[Parenthised (xxs, info_parens)] +>
iter_token_paren (TV.set_as_comment Token_c.CppMacro);
find_macro_paren xs
(* after case *)
| PToken ({tok = TIdent (s,_)} as id)
::Parenthised (xxs, info_parens)
::PToken ({tok = (TString _ | TMacroString _)})
::xs ->
msg_stringification_params s;
id.tok <- TMacroString (s, TH.info_of_tok id.tok);
[Parenthised (xxs, info_parens)] +>
iter_token_paren (TV.set_as_comment Token_c.CppMacro);
find_macro_paren xs
(* for the case where the string is not inside a funcall, but
* for instance in an initializer.
*)
(* string macro variable, before case *)
| PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
::xs when not !Flag.c_plus_plus ->
msg_stringification s;
id.tok <- TMacroString (s, TH.info_of_tok id.tok);
find_macro_paren xs
(* after case *)
| PToken ({tok = TIdent (s,_)} as id)
::PToken ({tok = (TString _ | TMacroString _)})
::xs ->
msg_stringification s;
id.tok <- TMacroString (s, TH.info_of_tok id.tok);
find_macro_paren xs
(* recurse *)
| (PToken x)::xs -> find_macro_paren xs
| (Parenthised (xxs, info_parens))::xs ->
xxs +> List.iter find_macro_paren;
find_macro_paren xs
(* don't forget to recurse in each case *)
let rec find_macro_lineparen xs =
match xs with
| [] -> ()
(* linuxext: ex: static [const] DEVICE_ATTR(); *)
| (Line
(
[PToken ({tok = Tstatic _});
PToken ({tok = TIdent (s,_)} as macro);
Parenthised (xxs,info_parens);
PToken ({tok = TPtVirg _});
]
))
::xs
when (s ==~ regexp_macro) ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
find_macro_lineparen (xs)
(* the static const case *)
| (Line
(
[PToken ({tok = Tstatic _});
PToken ({tok = Tconst _} as const);
PToken ({tok = TIdent (s,_)} as macro);
Parenthised (xxs,info_parens);
PToken ({tok = TPtVirg _});
]
(*as line1*)
))
::xs
when (s ==~ regexp_macro) ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
(* need retag this const, otherwise ambiguity in grammar
21: shift/reduce conflict (shift 121, reduce 137) on Tconst
decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
storage_class_spec : Tstatic . (137)
*)
const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
find_macro_lineparen (xs)
(* same but without trailing ';'
*
* I do not put the final ';' because it can be on a multiline and
* because of the way mk_line is coded, we will not have access to
* this ';' on the next line, even if next to the ')' *)
| (Line
([PToken ({tok = Tstatic _});
PToken ({tok = TIdent (s,_)} as macro);
Parenthised (xxs,info_parens);
]
))
::xs
when s ==~ regexp_macro ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
find_macro_lineparen (xs)
(* on multiple lines *)
| (Line
(
(PToken ({tok = Tstatic _})::[]
)))
::(Line
(
[PToken ({tok = TIdent (s,_)} as macro);
Parenthised (xxs,info_parens);
PToken ({tok = TPtVirg _});
]
)
)
::xs
when (s ==~ regexp_macro) ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
find_macro_lineparen (xs)
| (Line (* initializer case *)
(
PToken ({tok = Tstatic _}) ::
PToken ({tok = TIdent (s,_)} as macro) ::
Parenthised (xxs,info_parens) ::
PToken ({tok = TEq _}) :: rest
))
::xs
when (s ==~ regexp_macro) ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
(* continue with the rest of the line *)
find_macro_lineparen ((Line(rest))::xs)
| (Line (* multi-line initializer case *)
(
(PToken ({tok = Tstatic _})::[]
)))
::(Line
(
PToken ({tok = Tstatic _}) ::
PToken ({tok = TIdent (s,_)} as macro) ::
Parenthised (xxs,info_parens) ::
PToken ({tok = TEq _}) :: rest
))
::xs
when (s ==~ regexp_macro) ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
(* continue with the rest of the line *)
find_macro_lineparen ((Line(rest))::xs)
(* linuxext: ex: DECLARE_BITMAP();
*
* Here I use regexp_declare and not regexp_macro because
* Sometimes it can be a FunCallMacro such as DEBUG(foo());
* Here we don't have the preceding 'static' so only way to
* not have positive is to restrict to .*DECLARE.* macros.
*
* but there is a grammar rule for that, so don't need this case anymore
* unless the parameter of the DECLARE_xxx are weird and can not be mapped
* on a argument_list
*)
| (Line
([PToken ({tok = TIdent (s,_)} as macro);
Parenthised (xxs,info_parens);
PToken ({tok = TPtVirg _});
]
))
::xs
when (s ==~ regexp_declare) ->
msg_declare_macro s;
let info = TH.info_of_tok macro.tok in
macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
find_macro_lineparen (xs)
(* toplevel macros.
* module_init(xxx)
*
* Could also transform the TIdent in a TMacroTop but can have false
* positive, so easier to just change the TCPar and so just solve
* the end-of-stream pb of ocamlyacc
*)
| (Line
([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
Parenthised (xxs,info_parens);
] as _line1
))
::xs when col1 =|= 0
->
let condition =
(* to reduce number of false positive *)
(match xs with
| (Line (PToken ({col = col2 } as other)::restline2))::_ ->
TH.is_eof other.tok || (col2 =|= 0 &&
(match other.tok with
| TOBrace _ -> false (* otherwise would match funcdecl *)
| TCBrace _ when ctx <> InFunction -> false
| TPtVirg _
| TDotDot _
| TWhy _
-> false
| tok when TH.is_binary_operator tok -> false
| _ -> true
)
)
| _ -> false
)
in
if condition
then begin
msg_macro_toplevel_noptvirg s;
(* just to avoid the end-of-stream pb of ocamlyacc *)
let tcpar = Common.last info_parens in
tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
(*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
end;
find_macro_lineparen (xs)
(* macro with parameters
* ex: DEBUG()
* return x;
*)
| (Line
([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
Parenthised (xxs,info_parens);
] as _line1
))
::(Line
(PToken ({col = col2 } as other)::restline2
) as line2)
::xs
(* when s ==~ regexp_macro *)
->
(* This can give a false positive for K&R functions if the function
name is in the same column as the first parameter declaration. *)
let condition =
(col1 =|= col2 &&
(match other.tok with
| TOBrace _ -> false (* otherwise would match funcdecl *)
| TCBrace _ when ctx <> InFunction -> false
| TPtVirg _
| TDotDot _
| TWhy _
-> false
| tok when TH.is_binary_operator tok -> false
| _ -> true
)
)
||
(col2 <= col1 &&
(match other.tok, restline2 with
| TCBrace _, _ when ctx =*= InFunction -> true
| Treturn _, _ -> true
| Tif _, _ -> true
| Telse _, _ -> true
(* case of label, usually put in first line *)
| TIdent _, (PToken ({tok = TDotDot _}))::_ ->
true
| _ -> false
)
)
in
if condition
then
if col1 =|= 0 then ()
else begin
msg_macro_noptvirg s;
macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
[Parenthised (xxs, info_parens)] +>
iter_token_paren (TV.set_as_comment Token_c.CppMacro);
end;
find_macro_lineparen (line2::xs)
(* linuxext:? single macro
* ex: LOCK
* foo();
* UNLOCK
*
* todo: factorize code with previous rule ?
*)
| (Line
([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
] as _line1
))
::(Line
(PToken ({col = col2 } as other)::restline2
) as line2)
::xs ->
(* when s ==~ regexp_macro *)
let condition =
(col1 =|= col2 &&
col1 <> 0 && (* otherwise can match typedef of fundecl*)
(match other.tok with
| TPtVirg _ -> false
| TOr _ -> false
| TCBrace _ when ctx <> InFunction -> false
| TWhy _ -> false
| tok when TH.is_binary_operator tok -> false
| _ -> true
)) ||
(col2 <= col1 &&
(match other.tok with
| TCBrace _ when ctx =*= InFunction -> true
| Treturn _ -> true
| Tif _ -> true
| Telse _ -> true
| _ -> false
))
in
if condition
then begin
msg_macro_noptvirg_single s;
macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
end;
find_macro_lineparen (line2::xs)
| x::xs ->
find_macro_lineparen xs
(* ------------------------------------------------------------------------- *)
(* define tobrace init *)
(* ------------------------------------------------------------------------- *)
let rec find_define_init_brace_paren xs =
let rec aux xs =
match xs with
| [] -> ()
(* mainly for firefox *)
| (PToken {tok = TDefine _})
::(PToken {tok = TIdentDefine (s,_)})
::(PToken ({tok = TOBrace i1} as tokbrace))
::(PToken tok2)
::(PToken tok3)
::xs ->
let is_init =
match tok2.tok, tok3.tok with
| TInt _, TComma _ -> true
| TString _, TComma _ -> true
| TIdent _, TComma _ -> true
| _ -> false
in
if is_init
then begin
pr2_cpp("found define initializer: " ^s);
tokbrace.tok <- TOBraceDefineInit i1;
end;
aux xs
(* mainly for linux, especially in sound/ *)
| (PToken {tok = TDefine _})
::(PToken {tok = TIdentDefine (s,_)})
::(Parenthised(xxx, info_parens))
::(PToken ({tok = TOBrace i1} as tokbrace))
::(PToken tok2)
::(PToken tok3)
::xs ->
let is_init =
match tok2.tok, tok3.tok with
| TInt _, TComma _ -> true
| TDot _, TIdent _ -> true
| TIdent _, TComma _ -> true
| _ -> false
in
if is_init
then begin
pr2_cpp("found define initializer with param: " ^ s);
tokbrace.tok <- TOBraceDefineInit i1;
end;
aux xs
(* recurse *)
| (PToken x)::xs -> aux xs
| (Parenthised (xxs, info_parens))::xs ->
(* not need for tobrace init:
* xxs +> List.iter aux;
*)
aux xs
in
aux xs
(* ------------------------------------------------------------------------- *)
(* action *)
(* ------------------------------------------------------------------------- *)
(* obsolete now with macro expansion ? get some regression if comment.
* todo: if do bad decision here, then it can influence other phases
* and make it hard to parse. So maybe when have a parse error, should
* undo some of the guess those heuristics have done, and restore
* the original token value.
*)
let rec find_actions = function
| [] -> ()
| PToken ({tok = TIdent (s,ii)})
::Parenthised (xxs,info_parens)
::xs ->
find_actions xs;
xxs +> List.iter find_actions;
let modified = find_actions_params xxs in
if modified
then msg_macro_higher_order s
| x::xs ->
find_actions xs
and find_actions_params xxs =
xxs +> List.fold_left (fun acc xs ->
let toks = tokens_of_paren xs in
if toks +> List.exists (fun x -> TH.is_statement x.tok)
(* undo: && List.length toks > 1
* good for sparse, not good for linux
*)
then begin
xs +> iter_token_paren (fun x ->
if TH.is_eof x.tok
then
(* certainly because paren detection had a pb because of
* some ifdef-exp. Do similar additional checking than
* what is done in TV.set_as_comment.
*)
pr2 "PB: weird, I try to tag an EOF token as an action"
else
(* cf tests-bis/no_cpar_macro.c *)
if TH.is_eom x.tok
then
pr2 "PB: weird, I try to tag an EOM token as an action"
else
x.tok <- TAction (TH.info_of_tok x.tok);
);
true (* modified *)
end
else acc
) false
(* ------------------------------------------------------------------------- *)
(* main fix cpp function *)
(* ------------------------------------------------------------------------- *)
let filter_cpp_stuff xs =
List.filter
(function x ->
(match x.tok with
| tok when TH.is_comment tok -> false
(* don't want drop the define, or if drop, have to drop
* also its body otherwise the line heuristics may be lost
* by not finding the TDefine in column 0 but by finding
* a TDefineIdent in a column > 0
*)
| Parser_c.TDefine _ -> true
| tok when TH.is_cpp_instruction tok -> false
| _ -> true
))
xs
let insert_virtual_positions l =
let strlen x = String.length (Ast_c.str_of_info x) in
let rec loop prev offset acc = function
[] -> List.rev acc
| x::xs ->
let ii = TH.info_of_tok x in
let inject pi =
TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
match Ast_c.pinfo_of_info ii with
Ast_c.OriginTok pi ->
let prev = Ast_c.parse_info_of_info ii in
loop prev (strlen ii) (x::acc) xs
| Ast_c.ExpandedTok (pi,_) ->
let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
loop prev (offset + (strlen ii)) (x'::acc) xs
| Ast_c.FakeTok (s,_) ->
let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
loop prev (offset + (strlen ii)) (x'::acc) xs
| Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
let rec skip_fake = function
| [] -> []
| x::xs ->
let ii = TH.info_of_tok x in
match Ast_c.pinfo_of_info ii with
| Ast_c.OriginTok pi ->
let prev = Ast_c.parse_info_of_info ii in
let res = loop prev (strlen ii) [] xs in
x::res
| _ -> x::skip_fake xs in
skip_fake l
(* ------------------------------------------------------------------------- *)
let fix_tokens_cpp2 ~macro_defs tokens =
let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
begin
(* the order is important, if you put the action heuristic first,
* then because of ifdef, can have not closed paren
* and so may believe that higher order macro
* and it will eat too much tokens. So important to do
* first the ifdef.
*
* I recompute multiple times cleaner cos the mutable
* can have be changed and so may have more comments
* in the token original list.
*
*)
commentize_skip_start_to_end !tokens2;
(* ifdef *)
let cleaner = !tokens2 +> List.filter (fun x ->
(* is_comment will also filter the TCommentCpp created in
* commentize_skip_start_to_end *)
not (TH.is_comment x.tok) (* could filter also #define/#include *)
) in
let ifdef_grouped = TV.mk_ifdef cleaner in
set_ifdef_parenthize_info ifdef_grouped;
find_ifdef_funheaders ifdef_grouped;
find_ifdef_bool ifdef_grouped;
find_ifdef_mid ifdef_grouped;
(* change order ? maybe cparen_else heuristic make some of the funheaders
* heuristics irrelevant ?
*)
find_ifdef_cparen_else ifdef_grouped;
adjust_inifdef_include ifdef_grouped;
(* macro 1 *)
let cleaner = !tokens2 +> filter_cpp_stuff in
let paren_grouped = TV.mk_parenthised cleaner in
Cpp_token_c.apply_macro_defs
~msg_apply_known_macro
~msg_apply_known_macro_hint
macro_defs paren_grouped;
(* because the before field is used by apply_macro_defs *)
tokens2 := TV.rebuild_tokens_extented !tokens2;
(* tagging contextual info (InFunc, InStruct, etc). Better to do
* that after the "ifdef-simplification" phase.
*)
let cleaner = !tokens2 +> List.filter (fun x ->
not (TH.is_comment x.tok) (* could filter also #define/#include *)
) in
let brace_grouped = TV.mk_braceised cleaner in
set_context_tag brace_grouped;
(* macro *)
let cleaner = !tokens2 +> filter_cpp_stuff in
let paren_grouped = TV.mk_parenthised cleaner in
let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
find_define_init_brace_paren paren_grouped;
find_string_macro_paren paren_grouped;
find_macro_lineparen line_paren_grouped;
find_macro_paren paren_grouped;
(* obsolete: actions ? not yet *)
let cleaner = !tokens2 +> filter_cpp_stuff in
let paren_grouped = TV.mk_parenthised cleaner in
find_actions paren_grouped;
insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
end
let time_hack1 ~macro_defs a =
Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
let fix_tokens_cpp ~macro_defs a =
Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
let can_be_on_top_level tl =
match tl with
| Tstruct _
| Ttypedef _
| TDefine _
| TIfdef _
| TIfdefelse _
| TIfdefelif _
| TIfdefBool _
| TIfdefMisc _
| TIfdefVersion _
| TEndif _ -> true
| _ -> false
(*****************************************************************************)
(* Lexing with lookahead *)
(*****************************************************************************)
(* Why using yet another parsing_hack technique ? The fix_xxx where do
* some pre-processing on the full list of tokens is not enough ?
* No cos sometimes we need more contextual info, and even if
* set_context() tries to give some contextual info, it's not completely
* accurate so the following code give yet another alternative, yet another
* chance to transform some tokens.
*
* todo?: maybe could try to get rid of this technique. Maybe a better
* set_context() would make possible to move this code using a fix_xx
* technique.
*
* LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
* it is more general to do it via my LALR(k) tech. Because here we can
* transform some token give some context information. So sometimes it
* makes sense to transform a token in one context, sometimes not, and
* lex can not provide us this context information. Note that the order
* in the pattern matching in lookahead is important. Do not cut/paste.
*
* Note that in next there is only "clean" tokens, there is no comment
* or space tokens. This is done by the caller.
*
*)
open Lexer_parser (* for the fields of lexer_hint type *)
let not_struct_enum = function
| (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
| _ -> true
let pointer ?(followed_by=fun _ -> true)
?(followed_by_more=fun _ -> true) ts =
let rec loop ts =
match ts with
| TMul _ :: rest -> loop rest
| TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
| t :: ts' -> followed_by t && followed_by_more ts'
| [] -> failwith "unexpected end of token stream" in
match ts with
| TMul _ :: rest -> loop rest
| TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
| _ -> false
let ident = function
TIdent _ -> true
| _ -> false
let is_type = function
| TypedefIdent _
| Tvoid _
| Tchar _
| Tfloat _
| Tdouble _
| Tsize_t _
| Tssize_t _
| Tptrdiff_t _
| Tint _
| Tlong _
| Tshort _ -> true
| _ -> false
let is_cparen = function (TCPar _) -> true | _ -> false
let is_oparen = function (TOPar _) -> true | _ -> false
let rec not_has_type_before f xs =
match xs with
| [] -> raise (Impossible 666)
| x :: xs ->
if f x then
true
else if is_type x then
false
else
not_has_type_before f xs
(* This function is inefficient, because it will look over a K&R header,
or function prototype multiple times. At least when we see a , and are in a
parameter list, we know we will eventually see a close paren, and it
should come fairly soon. *)
let k_and_r l =
let l1 = drop_until is_cparen l in
match l1 with
(TCPar _) :: (TOCro _) :: _ -> false
| (TCPar _) :: _ -> true
| _ -> false
(* (a)(b) is ambiguous, because (a) could be a function name or a cast.
At this point, we just see an ident for a; we don't know if it is eg a local
variable. This function sees at least if b is the only argument, ie there
are no commas at top level *)
let paren_before_comma l =
let rec loop level = function
[] -> false
| (TComma _)::_ when level = 1 -> false
| (TCPar _)::_ when level = 1 -> true
| (TCPar _)::rest -> loop (level-1) rest
| (TOPar _)::rest -> loop (level+1) rest
| x::rest -> loop level rest in
loop 0 l
let lookahead2 ~pass next before =
match (next, before) with
(* c++ hacks *)
(* yy xx( and in function *)
| TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_
when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
pr2_cpp("constructed_object: " ^s);
TOParCplusplusInit i1
| TOPar i1::_, TIdent(s,i2)::ptr
when !Flag.c_plus_plus
&& pointer ~followed_by:(function TypedefIdent _ -> true | _ -> false) ptr
&& (LP.current_context () = (LP.InFunction)) ->
pr2_cpp("constructed_object: " ^s);
TOParCplusplusInit i1
| TypedefIdent(s,i)::TOPar i1::_,_
when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
TIdent(s,i)
(*-------------------------------------------------------------*)
(* typedef inference, parse_typedef_fix3 *)
(*-------------------------------------------------------------*)
(* xx xx *)
| (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
&& ok_typedef s
(* (take_safe 1 !passed_tok <> [TOPar]) -> *)
->
(* parse_typedef_fix3:
* acpi_object acpi_object;
* etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
* Le parser en interne a deja appelé le prochain token pour pouvoir
* decider des choses.
* => special case in lexer_heuristic, again
*)
if !Flag_parsing_c.debug_typedef
then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
LP.disable_typedef();
msg_typedef s i1 1; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* christia *)
(* delete[] *)
| (TOCro i1 :: _, Tdelete _ :: _)
when !Flag.c_plus_plus ->
TCommentCpp (Token_c.CppDirective, i1)
(* delete[] *)
| (TCCro i1 :: _, Tdelete _ :: _)
when !Flag.c_plus_plus ->
TCommentCpp (Token_c.CppDirective, i1)
(* extern "_" tt *)
| ((TString ((s, _), i1) | TMacroString (s, i1)) :: _ , Textern _ :: _)
when !Flag.c_plus_plus ->
TCommentCpp (Token_c.CppDirective, i1)
(* ) const { *)
| (Tconst i1 :: TOBrace _ :: _ , TCPar _ :: _)
when !Flag.c_plus_plus ->
TCommentCpp (Token_c.CppDirective, i1)
(* xx const tt *)
| (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::type_::_ , _)
when not_struct_enum before
&& is_type type_ ->
TCommentCpp (Token_c.CppDirective, i1)
(* xx struct *)
| (TIdent (s, i1)::Tstruct _::_ , _)
when not_struct_enum before ->
TCommentCpp (Token_c.CppDirective, i1)
(* xx tt *)
| (TIdent (s, i1)::type_::_ , _)
when not_struct_enum before
&& is_type type_ ->
TCommentCpp (Token_c.CppDirective, i1)
(* tt xx yy *)
| (TIdent (s, i1)::TIdent (s2, i2)::_ , seen::_)
when not_struct_enum before
&& is_type seen ->
if is_macro s2 then
TIdent (s, i1)
else
TCommentCpp (Token_c.CppDirective, i1)
(* exception to next rule *)
| (TIdent (s2, i2)::TOPar _::_ , TIdent (s1, i1)::seen::_)
when not_struct_enum before
&& is_macro s2 && is_type seen ->
TIdent (s2, i2)
| (TIdent (s2, i2)::_ , TIdent (s, i1)::seen::_)
when not_struct_enum before
&& is_macro s2 && is_type seen ->
TCommentCpp (Token_c.CppDirective, i2)
(* tt xx * *)
| (TIdent (s, i1)::ptr , seen::_)
when not_struct_enum before
&& pointer ptr && is_type seen ->
TCommentCpp (Token_c.CppDirective, i1)
(* tt * xx yy *)
| (TIdent (s, i1)::TIdent(s2, i2)::_ , ptr)
when not_struct_enum before
&& pointer ptr ->
if is_macro s2 then
TIdent (s, i1)
else
TCommentCpp (Token_c.CppDirective, i1)
(* exception to next rule *)
| (TIdent (s2, i2)::TOPar _::_ , TIdent (s1, i1)::ptr)
when not_struct_enum before
&& is_macro s2 && pointer ptr ->
TIdent (s2, i2)
(* tt * xx yy *)
| (TIdent(s2, i2)::_ , TIdent (s, i1)::ptr)
when not_struct_enum before
&& is_macro s2 && pointer ptr ->
TCommentCpp (Token_c.CppDirective, i2)
(* exception to next rule *)
| (TIdent(s2, i2)::TOPar _ :: _ , TIdent(s, i1)::seen::_)
when not_struct_enum before
&& is_macro s2 && is_type seen ->
TIdent(s2, i2)
(* tt xx yy *)
| (TIdent(s2, i2)::_ , TIdent(s, i1)::seen::_)
when not_struct_enum before
&& is_macro s2 && is_type seen ->
TCommentCpp (Token_c.CppDirective, i2)
(* xx * yy AND in paramdecl *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
&& ok_typedef s ->
msg_typedef s i1 14; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx MM ( *)
(* exception to next rule *)
| (TIdent (s, i1)::TIdent (s2, i2)::TOPar _::_ , _) when not_struct_enum before
&& ok_typedef s && is_macro s2
->
TIdent (s, i1)
(* xx yy *)
| (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
&& ok_typedef s
->
(* && not_annot s2 BUT lead to false positive*)
msg_typedef s i1 2; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx inline *)
| (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
&& ok_typedef s
->
msg_typedef s i1 3; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* [,(] xx [,)] AND param decl *)
| (TIdent (s, i1)::(((TComma _|TCPar _)::_) as rest) ,
((TComma _ |TOPar _)::_ as bef))
when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
&& k_and_r rest
&& not_has_type_before is_cparen rest
&& not_has_type_before is_oparen bef
->
TKRParam(s,i1)
| (TIdent (s, i1)::((TComma _|TCPar _)::_) , (TComma _ |TOPar _)::_ )
when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
&& ok_typedef s
->
msg_typedef s i1 4; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx* [,)] *)
(* specialcase: [,(] xx* [,)] *)
| (TIdent (s, i1)::ptr , (*(TComma _|TOPar _)::*)_ )
when pointer ~followed_by:(function TComma _ |TCPar _ -> true | _ -> false) ptr
&& not_struct_enum before
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
&& ok_typedef s
->
msg_typedef s i1 5; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx** [,)] *)
(* specialcase: [,(] xx** [,)] *)
| (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
when not_struct_enum before
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
&& ok_typedef s
->
msg_typedef s i1 6; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx const * USELESS because of next rule ? *)
| (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
when not_struct_enum before
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
&& ok_typedef s
->
msg_typedef s i1 7; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx const *)
| (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
when not_struct_enum before
&& ok_typedef s
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
->
msg_typedef s i1 8; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * const *)
| (TIdent (s, i1)::ptr , _ )
when pointer ~followed_by:(function Tconst _ | Tvolatile _ | Trestrict _ -> true | _ -> false) ptr
&& not_struct_enum before
&& ok_typedef s
->
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
msg_typedef s i1 9; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* ( const xx) *)
| (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
ok_typedef s ->
msg_typedef s i1 10; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* ( xx ) [sizeof, ~] *)
| (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
when not_struct_enum before
&& ok_typedef s
->
msg_typedef s i1 11; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* [(,] xx [ AND parameterdeclaration *)
| (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
when (LP.current_context() =*= LP.InParameter)
&& ok_typedef s
->
msg_typedef s i1 12; LP.add_typedef_root s;
TypedefIdent (s, i1)
(*------------------------------------------------------------*)
(* if 'x*y' maybe an expr, maybe just a classic multiplication *)
(* but if have a '=', or ',' I think not *)
(*------------------------------------------------------------*)
(* static xx * yy *)
| (TIdent (s, i1)::ptr ,
(Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
&& ok_typedef s
->
msg_typedef s i1 13; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* TODO xx * yy ; AND in start of compound element *)
(* xx * yy, AND in paramdecl *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
&& ok_typedef s
->
msg_typedef s i1 14; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * yy ; AND in Toplevel, except when have = before *)
| (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
TIdent (s, i1)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
&& (LP.is_top_or_struct (LP.current_context ()))
->
msg_typedef s i1 15; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * yy , AND in Toplevel *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 16; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * yy ( AND in Toplevel *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before
&& (LP.is_top_or_struct (LP.current_context ()))
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TOPar _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 17; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * yy [ *)
(* todo? enough ? cos in struct def we can have some expression ! *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before
&& (LP.is_top_or_struct (LP.current_context ()))
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TOCro _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 18; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* u16: 10; in struct *)
| (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
when (LP.is_top_or_struct (LP.current_context ()))
&& ok_typedef s
->
msg_typedef s i1 19; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* why need TOPar condition as stated in preceding rule ? really needed ? *)
(* YES cos at toplevel can have some expression !! for instance when *)
(* enter in the dimension of an array *)
(*
| (TIdent s::TMul::TIdent s2::_ , _)
when (take_safe 1 !passed_tok <> [Tstruct] &&
(take_safe 1 !passed_tok <> [Tenum]))
&&
!LP._lexer_hint = Some LP.Toplevel ->
msg_typedef s 20; LP.add_typedef_root s;
TypedefIdent s
*)
(* xx * yy = *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TEq _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 21; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * yy) AND in paramdecl *)
| (TIdent (s, i1)::ptr , _)
when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TCPar _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 22; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx * yy; *) (* wrong ? *)
| (TIdent (s, i1)::ptr ,
(TOBrace _| TPtVirg _)::_) when not_struct_enum before
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 23; LP.add_typedef_root s;
msg_maybe_dangereous_typedef s;
TypedefIdent (s, i1)
(* xx * yy, and ';' before xx *) (* wrong ? *)
| (TIdent (s, i1)::ptr ,
(TOBrace _| TPtVirg _)::_) when
ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false)
~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 24; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx_t * yy *)
| (TIdent (s, i1)::ptr , _)
when s ==~ regexp_typedef && not_struct_enum before
(* struct user_info_t sometimes *)
&& ok_typedef s
&& pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
->
msg_typedef s i1 25; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx ** yy *) (* wrong ? *)
| (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
when not_struct_enum before
&& (LP.current_context() =*= LP.InParameter)
&& ok_typedef s
->
msg_typedef s i1 26; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx ** yy *) (* wrong ? *)
| (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , (TOBrace _ | TPtVirg _)::_)
when not_struct_enum before
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
&& ok_typedef s
(* christia : this code catches 'a * *b' which is wrong
*)
->
msg_typedef s i1 26; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx *** yy *)
| (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
when not_struct_enum before
&& ok_typedef s
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
->
msg_typedef s i1 27; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* xx ** ) *)
| (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
when not_struct_enum before
(* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
&& ok_typedef s
->
msg_typedef s i1 28; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* ----------------------------------- *)
(* old: why not do like for other rules and start with TIdent ?
* why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
* new: prefer now start with TIdent because otherwise the add_typedef_root
* may have no effect if in second pass or if have disable the add_typedef.
*)
(* (xx) yy *)
| (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
(TOPar info)::x::_)
when not (TH.is_stuff_taking_parenthized x) (* &&
Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful?
*)
&& ok_typedef s
&& not (ident x) (* possible K&R declaration *)
->
msg_typedef s i1 29; LP.add_typedef_root s;
(*TOPar info*)
TypedefIdent (s, i1)
(* (xx) ( yy)
* but false positif: typedef int (xxx_t)(...), so do specialisation below.
*)
(*
| (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
when not (TH.is_stuff_taking_parenthized x)
&& ok_typedef s
->
msg_typedef s 30; LP.add_typedef_root s;
(* TOPar info *)
TypedefIdent (s, i1)
*)
(* special case: = (xx) ( yy) *)
| (TIdent (s, i1)::TCPar _::((TOPar _::_) as rest) ,
(TOPar info)::(TEq _ |TEqEq _)::_)
when ok_typedef s && paren_before_comma rest
->
msg_typedef s i1 31; LP.add_typedef_root s;
(* TOPar info *)
TypedefIdent (s, i1)
(* (xx * ) yy *)
| (TIdent (s, i1)::ptr, (TOPar info)::_)
when ok_typedef s
&& pointer ~followed_by:(function TCPar _ -> true | _ -> false)
~followed_by_more:(function TIdent _ :: _ -> true | _ -> false) ptr
->
msg_typedef s i1 32; LP.add_typedef_root s;
(*TOPar info*)
TypedefIdent (s,i1)
(* (xx){ ... } constructor *)
| (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
&& ok_typedef s
->
msg_typedef s i1 33; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* can have sizeof on expression
| (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
msg_typedef s; LP.add_typedef_root s;
Tsizeof
*)
(* ----------------------------------- *)
(* x ( *y )(params), function pointer *)
| (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
when not_struct_enum before
&& ok_typedef s
->
msg_typedef s i1 34; LP.add_typedef_root s;
TypedefIdent (s, i1)
(* x* ( *y )(params), function pointer 2 *)
| (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
when not_struct_enum before
&& ok_typedef s
->
msg_typedef s i1 35; LP.add_typedef_root s;
TypedefIdent (s, i1)
(*-------------------------------------------------------------*)
(* CPP *)
(*-------------------------------------------------------------*)
| ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
as x)
::_, _
->
(*
if not !Flag_parsing_c.ifdef_to_if
then TCommentCpp (Ast_c.CppIfDirective, ii)
else
*)
(* not !LP._lexer_hint.toplevel *)
if !Flag_parsing_c.ifdef_directive_passing
|| (pass >= 2)
then begin
if (LP.current_context () =*= LP.InInitializer)
then begin
pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
incr Stat.nIfdefInitializer;
end else begin
pr2_cpp("IFDEF: or related inside function. I treat it as comment");
incr Stat.nIfdefPassing;
end;
let x =
match x with
TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef
| TIfdefBool _ -> Token_c.IfDef0
| TIfdefelse _ | TIfdefelif _ -> Token_c.Else
| TEndif _ -> Token_c.Endif
| _ -> Token_c.Other in (* not possible here *)
TCommentCpp (Token_c.CppIfDirective x, ii)
end
else x
| (TUndef (ii) as x)::_, _
->
if (pass >= 2)
then begin
pr2_cpp("UNDEF: I treat it as comment");
TCommentCpp (Token_c.CppDirective, ii)
end
else x
| (TCppDirectiveOther (ii) as x)::_, _
->
if (pass >= 2)
then begin
pr2_cpp ("OTHER directive: I treat it as comment");
TCommentCpp (Token_c.CppDirective, ii)
end
else x
(* If ident contain a for_each, then certainly a macro. But to be
* sure should look if there is a '{' after the ')', but it requires
* to count the '('. Because this can be expensive, we do that only
* when the token contains "for_each".
*)
| (TIdent (s, i1)::TOPar _::rest, _)
when not (LP.current_context () =*= LP.InTopLevel)
(* otherwise a function such as static void loopback_enable(int i) {
* will be considered as a loop
*)
->
if s ==~ regexp_foreach &&
is_really_foreach (Common.take_safe forLOOKAHEAD rest)
then begin
msg_foreach s;
TMacroIterator (s, i1)
end
else TIdent (s, i1)
(* (* christia: here insert support for macros on top level *)
| TIdent (s, ii) :: tl :: _, _ when
can_be_on_top_level tl && LP.current_context () = InTopLevel ->
pr2_cpp ("'" ^ s ^ "' looks like a macro, I treat it as comment");
TCommentCpp (Token_c.CppDirective, ii)
*)
(*-------------------------------------------------------------*)
| v::xs, _ -> v
| _ -> raise (Impossible 93)
let lookahead ~pass a b =
Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)