3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
18 module TH
= Token_helpers
19 module TV
= Token_views_c
20 module LP
= Lexer_parser
22 module Stat
= Parsing_stat
28 (*****************************************************************************)
29 (* Some debugging functions *)
30 (*****************************************************************************)
32 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once
("CPP-" ^ s
)
39 let msg_gen cond is_known printer s
=
42 if not
(!Flag_parsing_c.filter_msg
)
49 (* In the following, there are some harcoded names of types or macros
50 * but they are not used by our heuristics! They are just here to
51 * enable to detect false positive by printing only the typedef/macros
52 * that we don't know yet. If we print everything, then we can easily
53 * get lost with too much verbose tracing information. So those
54 * functions "filter" some messages. So our heuristics are still good,
55 * there is no more (or not that much) hardcoded linux stuff.
61 | "u_char" | "u_short" | "u_int" | "u_long"
62 | "u8" | "u16" | "u32" | "u64"
63 | "s8" | "s16" | "s32" | "s64"
64 | "__u8" | "__u16" | "__u32" | "__u64"
75 | s
when s
=~
".*_t$" -> true
80 (* note: cant use partial application with let msg_typedef =
81 * because it would compute msg_typedef at compile time when
82 * the flag debug_typedef is always false
84 let msg_typedef s ii n
=
85 incr
Stat.nTypedefInfer
;
86 msg_gen (!Flag_parsing_c.debug_typedef
)
90 (Printf.sprintf
"TYPEDEF: promoting:(%d) %s on line %d" n s
91 (Ast_c.line_of_info ii
))
92 (*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s
93 (Ast_c.line_of_info ii))*)
97 let msg_maybe_dangereous_typedef s
=
98 if not
(is_known_typdef s
)
101 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s
)
105 let msg_declare_macro s
=
106 incr
Stat.nMacroDecl
;
107 msg_gen (!Flag_parsing_c.debug_cpp
)
110 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
111 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
112 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
113 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
114 | "SENSOR_DEVICE_ATTR"
116 | "DECLARE_WORK" | "DECLARE_TASKLET"
117 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
122 | s when s =~ "^DECLARE_.*" -> true
123 | s when s =~ ".*_ATTR$" -> true
124 | s when s =~ "^DEFINE_.*" -> true
130 (fun s
-> pr2_cpp ("MACRO: found declare-macro: " ^ s
))
135 incr
Stat.nIteratorHeuristic
;
136 pr2_cpp ("MACRO: found foreach: " ^ s
)
140 let msg_debug_macro s =
141 pr2_cpp ("MACRO: found debug-macro: " ^ s)
145 let msg_macro_noptvirg s
=
146 incr
Stat.nMacroStmt
;
147 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s
)
149 let msg_macro_toplevel_noptvirg s
=
150 incr
Stat.nMacroStmt
;
151 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s
)
153 let msg_macro_noptvirg_single s
=
154 incr
Stat.nMacroStmt
;
155 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s
)
160 let msg_macro_higher_order s
=
161 incr
Stat.nMacroHigherOrder
;
162 msg_gen (!Flag_parsing_c.debug_cpp
)
172 (fun s
-> pr2_cpp ("MACRO: found higher ordre macro : " ^ s
))
176 let msg_stringification s
=
177 incr
Stat.nMacroString
;
178 msg_gen (!Flag_parsing_c.debug_cpp
)
186 (* s when s =~ ".*STR.*" -> true *)
190 (fun s
-> pr2_cpp ("MACRO: found string-macro " ^ s
))
193 let msg_stringification_params s
=
194 incr
Stat.nMacroString
;
195 pr2_cpp ("MACRO: string-macro with params : " ^ s
)
199 let msg_apply_known_macro s
=
200 incr
Stat.nMacroExpand
;
201 pr2_cpp ("MACRO: found known macro = " ^ s
)
203 let msg_apply_known_macro_hint s
=
204 incr
Stat.nMacroHint
;
205 pr2_cpp ("MACRO: found known macro hint = " ^ s
)
210 let msg_ifdef_bool_passing is_ifdef_positif
=
211 incr
Stat.nIfdefZero
; (* of Version ? *)
213 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
214 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
217 let msg_ifdef_mid_something () =
218 incr
Stat.nIfdefExprPassing
;
219 pr2_cpp "found ifdef-mid-something"
221 let msg_ifdef_funheaders () =
222 incr
Stat.nIfdefFunheader
;
225 let msg_ifdef_cparen_else () =
226 incr
Stat.nIfdefPassing
;
227 pr2_cpp("found ifdef-cparen-else")
230 let msg_attribute s
=
231 incr
Stat.nMacroAttribute
;
236 (*****************************************************************************)
237 (* The regexp and basic view definitions *)
238 (*****************************************************************************)
240 (* opti: better to built then once and for all, especially regexp_foreach *)
242 let regexp_macro = Str.regexp
246 let regexp_annot = Str.regexp
250 let regexp_declare = Str.regexp
254 let regexp_foreach = Str.regexp_case_fold
255 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
257 let regexp_typedef = Str.regexp
260 let false_typedef = [
265 let ok_typedef s
= not
(List.mem s
false_typedef)
268 not
(s
==~
regexp_annot)
273 (*****************************************************************************)
275 (*****************************************************************************)
277 (* ------------------------------------------------------------------------- *)
278 (* the pair is the status of '()' and '{}', ex: (-1,0)
279 * if too much ')' and good '{}'
280 * could do for [] too ?
281 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
282 * then if have ifdef, then certainly can lead to a problem.
284 let (count_open_close_stuff_ifdef_clause
: TV.ifdef_grouped list
-> (int * int))=
286 let cnt_paren, cnt_brace
= ref 0, ref 0 in
287 xs
+> TV.iter_token_ifdef
(fun x
->
289 | x
when TH.is_opar x
-> incr
cnt_paren
290 | TOBrace _
-> incr cnt_brace
291 | x
when TH.is_cpar x
-> decr
cnt_paren
292 | TCBrace _
-> decr cnt_brace
296 !cnt_paren, !cnt_brace
299 (* ------------------------------------------------------------------------- *)
300 let forLOOKAHEAD = 30
303 (* look if there is a '{' just after the closing ')', and handling the
304 * possibility to have nested expressions inside nested parenthesis
306 * todo: use indentation instead of premier(statement) ?
308 let rec is_really_foreach xs
=
309 let rec is_foreach_aux = function
311 | TCPar _
::TOBrace _
::xs
-> true, xs
312 (* the following attempts to handle the cases where there is a
313 single statement in the body of the loop. undoubtedly more
315 todo: premier(statement) - suivant(funcall)
317 | TCPar _
::TIdent _
::xs
-> true, xs
318 | TCPar _
::Tif _
::xs
-> true, xs
319 | TCPar _
::Twhile _
::xs
-> true, xs
320 | TCPar _
::Tfor _
::xs
-> true, xs
321 | TCPar _
::Tswitch _
::xs
-> true, xs
322 | TCPar _
::Treturn _
::xs
-> true, xs
325 | TCPar _
::xs
-> false, xs
327 let (_
, xs'
) = is_foreach_aux xs
in
329 | x
::xs
-> is_foreach_aux xs
331 is_foreach_aux xs
+> fst
334 (* ------------------------------------------------------------------------- *)
335 let set_ifdef_token_parenthize_info cnt x
=
338 | TIfdefelse
(tag
, _
)
339 | TIfdefelif
(tag
, _
)
342 | TIfdefBool
(_
, tag
, _
)
343 | TIfdefMisc
(_
, tag
, _
)
344 | TIfdefVersion
(_
, tag
, _
)
348 | _
-> raise Impossible
352 let ifdef_paren_cnt = ref 0
355 let rec set_ifdef_parenthize_info xs
=
356 xs
+> List.iter
(function
357 | NotIfdefLine xs
-> ()
358 | Ifdefbool
(_
, xxs
, info_ifdef
)
359 | Ifdef
(xxs
, info_ifdef
) ->
361 incr
ifdef_paren_cnt;
362 let total_directives = List.length info_ifdef
in
364 info_ifdef
+> List.iter
(fun x
->
365 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
367 xxs
+> List.iter
set_ifdef_parenthize_info
371 (*****************************************************************************)
372 (* The parsing hack for #define *)
373 (*****************************************************************************)
375 (* To parse macro definitions I need to do some tricks
376 * as some information can be get only at the lexing level. For instance
377 * the space after the name of the macro in '#define foo (x)' is meaningful
378 * but the grammar can not get this information. So define_ident below
379 * look at such space and generate a special TOpardefine. In a similar
380 * way macro definitions can contain some antislash and newlines
381 * and the grammar need to know where the macro ends (which is
382 * a line-level and so low token-level information). Hence the
383 * function 'define_line' below and the TDefEol.
385 * update: TDefEol is handled in a special way at different places,
386 * a little bit like EOF, especially for error recovery, so this
387 * is an important token that should not be retagged!
390 * ugly hack, a better solution perhaps would be to erase TDefEOL
391 * from the Ast and list of tokens in parse_c.
393 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
395 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
396 * is where is the \n TCommentSpace. Normally there is always a last token
397 * to synchronize on, either EOF or the token of the next toplevel.
398 * In the case of the #define we got in list of token
399 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
400 * not synchronize on it and so we will not print the "\n".
401 * A solution would be to put the TDefEOL before the "\n".
402 * (jll: tried to do this, see the comment "Put end of line..." below)
404 * todo?: could put a ExpandedTok for that ?
406 let mark_end_define ii
=
408 { Ast_c.pinfo
= Ast_c.OriginTok
{ (Ast_c.parse_info_of_info
ii) with
410 Common.charpos
= Ast_c.pos_of_info
ii + 1
412 cocci_tag
= ref Ast_c.emptyAnnot
;
413 comments_tag
= ref Ast_c.emptyComments
;
418 (* put the TDefEOL at the good place *)
419 let rec define_line_1 acc xs
=
423 let line = Ast_c.line_of_info
ii in
424 let acc = (TDefine
ii) :: acc in
425 define_line_2
acc line ii xs
427 let line = Ast_c.line_of_info
ii in
428 let acc = (TUndef
ii) :: acc in
429 define_line_2
acc line ii xs
430 | TCppEscapedNewline
ii::xs
->
431 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
432 pr2 (Ast_c.strloc_of_info
ii);
433 let acc = (TCommentSpace
ii) :: acc in
435 | x
::xs
-> define_line_1 (x
::acc) xs
437 and define_line_2
acc line lastinfo xs
=
440 (* should not happened, should meet EOF before *)
442 List.rev
(mark_end_define lastinfo
::acc)
444 let line'
= TH.line_of_tok x
in
445 let info = TH.info_of_tok x
in
449 let acc = (mark_end_define lastinfo
) :: acc in
450 let acc = (EOF
ii) :: acc in
452 | TCppEscapedNewline
ii ->
453 if (line'
<> line) then pr2 "PB: WEIRD: not same line number";
454 let acc = (TCommentSpace
ii) :: acc in
455 define_line_2
acc (line+1) info xs
458 then define_line_2
(x
::acc) line info xs
460 (* Put end of line token before the newline. A newline at least
461 must be there because the line changed and because we saw a
462 #define previously to get to this function at all *)
464 ((List.hd
acc)::(mark_end_define lastinfo
::(List.tl
acc)))
468 let rec define_ident acc xs
=
472 let acc = TUndef
ii :: acc in
474 TCommentSpace i1
::TIdent
(s
,i2
)::xs
->
475 let acc = (TCommentSpace i1
) :: acc in
476 let acc = (TIdentDefine
(s
,i2
)) :: acc in
479 pr2 "WEIRD: weird #define body";
483 let acc = TDefine
ii :: acc in
485 | TCommentSpace i1
::TIdent
(s
,i2
)::TOPar
(i3
)::xs
->
486 (* Change also the kind of TIdent to avoid bad interaction
487 * with other parsing_hack tricks. For instant if keep TIdent then
488 * the stringication algo can believe the TIdent is a string-macro.
489 * So simpler to change the kind of the ident too.
491 (* if TOParDefine sticked to the ident, then
492 * it's a macro-function. Change token to avoid ambiguity
493 * between #define foo(x) and #define foo (x)
495 let acc = (TCommentSpace i1
) :: acc in
496 let acc = (TIdentDefine
(s
,i2
)) :: acc in
497 let acc = (TOParDefine i3
) :: acc in
500 | TCommentSpace i1
::TIdent
(s
,i2
)::xs
->
501 let acc = (TCommentSpace i1
) :: acc in
502 let acc = (TIdentDefine
(s
,i2
)) :: acc in
505 (* bugfix: ident of macro (as well as params, cf below) can be tricky
506 * note, do we need to subst in the body of the define ? no cos
507 * here the issue is the name of the macro, as in #define inline,
508 * so obviously the name of this macro will not be used in its
509 * body (it would be a recursive macro, which is forbidden).
512 | TCommentSpace i1
::t
::xs
->
514 let s = TH.str_of_tok t
in
515 let ii = TH.info_of_tok t
in
516 if s ==~
Common.regexp_alpha
518 pr2 (spf
"remapping: %s to an ident in macro name" s);
519 let acc = (TCommentSpace i1
) :: acc in
520 let acc = (TIdentDefine
(s,ii)) :: acc in
524 pr2 "WEIRD: weird #define body";
529 pr2 "WEIRD: weird #define body";
533 let acc = x
:: acc in
538 let fix_tokens_define2 xs
=
539 define_ident [] (define_line_1 [] xs
)
541 let fix_tokens_define a
=
542 Common.profile_code
"C parsing.fix_define" (fun () -> fix_tokens_define2 a
)
548 (* ------------------------------------------------------------------------- *)
549 (* Other parsing hacks related to cpp, Include/Define hacks *)
550 (* ------------------------------------------------------------------------- *)
552 (* Sometimes I prefer to generate a single token for a list of things in the
553 * lexer so that if I have to passed them, like for passing TInclude then
554 * it's easy. Also if I don't do a single token, then I need to
555 * parse the rest which may not need special stuff, like detecting
556 * end of line which the parser is not really ready for. So for instance
557 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
558 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
559 * but this kind of token is valid only after a #include and the
560 * lexing and parsing rules are different for such tokens so not that
561 * easy to parse such things in parser_c.mly. Hence the following hacks.
563 * less?: maybe could get rid of this like I get rid of some of fix_define.
568 (* used to generate new token from existing one *)
569 let new_info posadd str
ii =
571 Ast_c.OriginTok
{ (Ast_c.parse_info_of_info
ii) with
572 charpos
= Ast_c.pos_of_info
ii + posadd
;
574 column
= Ast_c.col_of_info
ii + posadd
;
576 (* must generate a new ref each time, otherwise share *)
577 cocci_tag
= ref Ast_c.emptyAnnot
;
578 comments_tag
= ref Ast_c.emptyComments
;
582 let rec comment_until_defeol xs
=
585 (* job not done in Cpp_token_c.define_parse ? *)
586 failwith
"cant find end of define token TDefEOL"
589 | Parser_c.TDefEOL i
->
590 Parser_c.TCommentCpp
(Token_c.CppDirective
, TH.info_of_tok x
)
594 (* bugfix: otherwise may lose a TComment token *)
595 if TH.is_real_comment
x
597 else Parser_c.TCommentCpp
(Token_c.CppPassingNormal
(*good?*), TH.info_of_tok
x)
599 x'
::comment_until_defeol xs
602 let drop_until_defeol xs
=
604 (Common.drop_until
(function Parser_c.TDefEOL _
-> true | _
-> false) xs
)
608 (* ------------------------------------------------------------------------- *)
609 (* returns a pair (replaced token, list of next tokens) *)
610 (* ------------------------------------------------------------------------- *)
612 let tokens_include (info, includes
, filename
, inifdef
) =
613 Parser_c.TIncludeStart
(Ast_c.rewrap_str includes
info, inifdef
),
614 [Parser_c.TIncludeFilename
615 (filename
, (new_info (String.length includes
) filename
info))
621 (*****************************************************************************)
622 (* CPP handling: macros, ifdefs, macros defs *)
623 (*****************************************************************************)
625 (* ------------------------------------------------------------------------- *)
626 (* special skip_start skip_end handling *)
627 (* ------------------------------------------------------------------------- *)
629 (* note: after this normally the token list should not contain any more the
630 * TCommentSkipTagStart and End tokens.
632 let rec commentize_skip_start_to_end xs
=
637 | {tok
= TCommentSkipTagStart
info} ->
639 let (before
, x2
, after
) =
640 xs
+> Common.split_when
(function
641 | {tok
= TCommentSkipTagEnd _
} -> true
645 let topass = x::x2
::before
in
646 topass +> List.iter
(fun tok
->
647 set_as_comment
Token_c.CppPassingExplicit tok
649 commentize_skip_start_to_end after
651 failwith
"could not find end of skip_start special comment"
653 | {tok
= TCommentSkipTagEnd
info} ->
654 failwith
"found skip_end comment but no skip_start"
656 commentize_skip_start_to_end xs
662 (* ------------------------------------------------------------------------- *)
663 (* ifdef keeping/passing *)
664 (* ------------------------------------------------------------------------- *)
666 (* #if 0, #if 1, #if LINUX_VERSION handling *)
667 let rec find_ifdef_bool xs
=
668 xs
+> List.iter
(function
669 | NotIfdefLine _
-> ()
670 | Ifdefbool
(is_ifdef_positif
, xxs
, info_ifdef_stmt
) ->
672 msg_ifdef_bool_passing is_ifdef_positif
;
675 | [] -> raise Impossible
676 | firstclause
::xxs
->
677 info_ifdef_stmt
+> List.iter
(set_as_comment
Token_c.CppDirective
);
680 then xxs
+> List.iter
681 (iter_token_ifdef
(set_as_comment
Token_c.CppPassingNormal
))
683 firstclause
+> iter_token_ifdef
(set_as_comment
Token_c.CppPassingNormal
);
684 (match List.rev xxs
with
688 (iter_token_ifdef
(set_as_comment
Token_c.CppPassingNormal
))
689 | [] -> (* not #else *) ()
694 | Ifdef
(xxs
, info_ifdef_stmt
) -> xxs
+> List.iter
find_ifdef_bool
699 let thresholdIfdefSizeMid = 6
701 (* infer ifdef involving not-closed expressions/statements *)
702 let rec find_ifdef_mid xs
=
703 xs
+> List.iter
(function
704 | NotIfdefLine _
-> ()
705 | Ifdef
(xxs
, info_ifdef_stmt
) ->
707 | [] -> raise Impossible
709 | first
::second
::rest
->
710 (* don't analyse big ifdef *)
711 if xxs
+> List.for_all
712 (fun xs
-> List.length xs
<= thresholdIfdefSizeMid) &&
713 (* don't want nested ifdef *)
714 xxs
+> List.for_all
(fun xs
->
716 (function NotIfdefLine _
-> true | _
-> false)
720 let counts = xxs
+> List.map count_open_close_stuff_ifdef_clause
in
721 let cnt1, cnt2
= List.hd
counts in
722 if cnt1 <> 0 || cnt2
<> 0 &&
723 counts +> List.for_all
(fun x -> x =*= (cnt1, cnt2
))
725 if counts +> List.exists (fun (cnt1, cnt2) ->
726 cnt1 <> 0 || cnt2 <> 0
730 msg_ifdef_mid_something();
732 (* keep only first, treat the rest as comment *)
733 info_ifdef_stmt
+> List.iter
(set_as_comment
Token_c.CppDirective
);
734 (second
::rest
) +> List.iter
735 (iter_token_ifdef
(set_as_comment
Token_c.CppPassingCosWouldGetError
));
739 List.iter
find_ifdef_mid xxs
741 (* no need complex analysis for ifdefbool *)
742 | Ifdefbool
(_
, xxs
, info_ifdef_stmt
) ->
743 List.iter
find_ifdef_mid xxs
749 let thresholdFunheaderLimit = 4
751 (* ifdef defining alternate function header, type *)
752 let rec find_ifdef_funheaders = function
754 | NotIfdefLine _
::xs
-> find_ifdef_funheaders xs
756 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
758 ([(NotIfdefLine
(({col
= 0} as _xline1
)::line1
))::ifdefblock1
;
759 (NotIfdefLine
(({col
= 0} as xline2
)::line2
))::ifdefblock2
762 ::NotIfdefLine
(({tok
= TOBrace i
; col
= 0})::line3
)
764 when List.length ifdefblock1
<= thresholdFunheaderLimit &&
765 List.length ifdefblock2
<= thresholdFunheaderLimit
767 find_ifdef_funheaders xs
;
769 msg_ifdef_funheaders ();
770 info_ifdef_stmt
+> List.iter
(set_as_comment
Token_c.CppDirective
);
771 let all_toks = [xline2
] @ line2
in
772 all_toks +> List.iter
(set_as_comment
Token_c.CppPassingCosWouldGetError
) ;
773 ifdefblock2
+> iter_token_ifdef
(set_as_comment
Token_c.CppPassingCosWouldGetError
);
775 (* ifdef with nested ifdef *)
777 ([[NotIfdefLine
(({col
= 0} as _xline1
)::line1
)];
779 ([[NotIfdefLine
(({col
= 0} as xline2
)::line2
)];
780 [NotIfdefLine
(({col
= 0} as xline3
)::line3
)];
786 ::NotIfdefLine
(({tok
= TOBrace i
; col
= 0})::line4
)
789 find_ifdef_funheaders xs
;
791 msg_ifdef_funheaders ();
792 info_ifdef_stmt
+> List.iter
(set_as_comment
Token_c.CppDirective
);
793 info_ifdef_stmt2
+> List.iter
(set_as_comment
Token_c.CppDirective
);
794 let all_toks = [xline2
;xline3
] @ line2
@ line3
in
795 all_toks +> List.iter
(set_as_comment
Token_c.CppPassingCosWouldGetError
);
797 (* ifdef with elseif *)
799 ([[NotIfdefLine
(({col
= 0} as _xline1
)::line1
)];
800 [NotIfdefLine
(({col
= 0} as xline2
)::line2
)];
801 [NotIfdefLine
(({col
= 0} as xline3
)::line3
)];
804 ::NotIfdefLine
(({tok
= TOBrace i
; col
= 0})::line4
)
807 find_ifdef_funheaders xs
;
809 msg_ifdef_funheaders ();
810 info_ifdef_stmt
+> List.iter
(set_as_comment
Token_c.CppDirective
);
811 let all_toks = [xline2
;xline3
] @ line2
@ line3
in
812 all_toks +> List.iter
(set_as_comment
Token_c.CppPassingCosWouldGetError
)
815 | Ifdef
(xxs
,info_ifdef_stmt
)::xs
816 | Ifdefbool
(_
, xxs
,info_ifdef_stmt
)::xs
->
817 List.iter
find_ifdef_funheaders xxs
;
818 find_ifdef_funheaders xs
823 let rec adjust_inifdef_include xs
=
824 xs
+> List.iter
(function
825 | NotIfdefLine _
-> ()
826 | Ifdef
(xxs
, info_ifdef_stmt
) | Ifdefbool
(_
, xxs
, info_ifdef_stmt
) ->
827 xxs
+> List.iter
(iter_token_ifdef
(fun tokext
->
828 match tokext
.tok
with
829 | Parser_c.TInclude
(s1
, s2
, inifdef_ref
, ii) ->
841 let rec find_ifdef_cparen_else xs
=
843 xs
+> List.iter
(function
844 | NotIfdefLine _
-> ()
845 | Ifdef
(xxs
, info_ifdef_stmt
) ->
847 | [] -> raise Impossible
849 | first
::second
::rest
->
851 (* found a closing ')' just after the #else *)
853 (* Too bad ocaml does not support better list pattern matching
854 * a la Prolog-III where can match the end of lists.
857 if List.length first
= 0 then false
859 let last_line = Common.last first
in
862 if List.length xs
= 0 then false
864 let last_tok = Common.last xs
in
865 TH.is_cpar
last_tok.tok
866 | Ifdef _
| Ifdefbool _
-> false
868 if condition then begin
869 msg_ifdef_cparen_else();
871 (* keep only first, treat the rest as comment *)
872 info_ifdef_stmt
+> List.iter
(set_as_comment
Token_c.CppDirective
);
873 (second
::rest
) +> List.iter
874 (iter_token_ifdef
(set_as_comment
Token_c.CppPassingCosWouldGetError
));
880 (* no need complex analysis for ifdefbool *)
881 | Ifdefbool
(_
, xxs
, info_ifdef_stmt
) ->
887 (* ------------------------------------------------------------------------- *)
888 (* cpp-builtin part2, macro, using standard.h or other defs *)
889 (* ------------------------------------------------------------------------- *)
891 (* now in cpp_token_c.ml *)
893 (* ------------------------------------------------------------------------- *)
894 (* stringification *)
895 (* ------------------------------------------------------------------------- *)
897 let rec find_string_macro_paren xs
=
900 | Parenthised
(xxs
, info_parens
)::xs
->
901 xxs
+> List.iter
(fun xs
->
903 (function PToken
({tok
= (TString _
| TMacroString _
)}) -> true | _
-> false) &&
905 (function PToken
({tok
= (TString _
| TMacroString _
)}) | PToken
({tok
= TIdent _
}) ->
908 xs
+> List.iter
(fun tok
->
910 | PToken
({tok
= TIdent
(s,_
)} as id
) ->
911 msg_stringification s;
912 id
.tok
<- TMacroString
(s, TH.info_of_tok id
.tok
);
916 find_string_macro_paren xs
918 find_string_macro_paren xs
920 find_string_macro_paren xs
923 (* ------------------------------------------------------------------------- *)
925 (* ------------------------------------------------------------------------- *)
927 (* don't forget to recurse in each case *)
928 let rec find_macro_paren xs
=
933 | PToken
({tok
= Tattribute _
} as id
)
934 ::Parenthised
(xxs
,info_parens
)
937 pr2_cpp ("MACRO: __attribute detected ");
938 [Parenthised
(xxs
, info_parens
)] +>
939 iter_token_paren
(set_as_comment
Token_c.CppAttr
);
940 set_as_comment
Token_c.CppAttr id
;
943 | PToken
({tok
= TattributeNoarg _
} as id
)
946 pr2_cpp ("MACRO: __attributenoarg detected ");
947 set_as_comment
Token_c.CppAttr id
;
951 (* attribute cpp, __xxx id *)
952 | PToken
({tok
= TIdent
(s,i1
)} as id
)
953 ::PToken
({tok
= TIdent
(s2
, i2
)} as id2
)
954 ::xs
when s ==~
regexp_annot
957 id
.tok
<- TMacroAttr
(s, i1
);
958 find_macro_paren ((PToken id2
)::xs
); (* recurse also on id2 ? *)
960 (* attribute cpp, id __xxx *)
961 | PToken
({tok
= TIdent
(s,i1
)} as _id
)
962 ::PToken
({tok
= TIdent
(s2
, i2
)} as id2
)
963 ::xs
when s2
==~
regexp_annot && (not
(s ==~
regexp_typedef))
966 id2
.tok
<- TMacroAttr
(s2
, i2
);
969 | PToken
({tok
= (Tstatic _
| Textern _
)} as tok1
)
970 ::PToken
({tok
= TIdent
(s,i1
)} as attr
)
971 ::xs
when s ==~
regexp_annot
973 pr2_cpp ("storage attribute: " ^
s);
974 attr
.tok
<- TMacroAttrStorage
(s,i1
);
975 (* recurse, may have other storage attributes *)
976 find_macro_paren (PToken
(tok1
)::xs
)
980 (* storage attribute *)
981 | PToken
({tok
= (Tstatic _
| Textern _
)} as tok1
)
982 ::PToken
({tok
= TMacroAttr
(s,i1
)} as attr
)::xs
984 pr2_cpp ("storage attribute: " ^
s);
985 attr
.tok
<- TMacroAttrStorage
(s,i1
);
986 (* recurse, may have other storage attributes *)
987 find_macro_paren (PToken
(tok1
)::xs
)
992 * the order of the matching clause is important
996 (* string macro with params, before case *)
997 | PToken
({tok
= (TString _
| TMacroString _
)})::PToken
({tok
= TIdent
(s,_
)} as id
)
998 ::Parenthised
(xxs
, info_parens
)
1001 msg_stringification_params s;
1002 id
.tok
<- TMacroString
(s, TH.info_of_tok id
.tok
);
1003 [Parenthised
(xxs
, info_parens
)] +>
1004 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
1008 | PToken
({tok
= TIdent
(s,_
)} as id
)
1009 ::Parenthised
(xxs
, info_parens
)
1010 ::PToken
({tok
= (TString _
| TMacroString _
)})
1013 msg_stringification_params s;
1014 id
.tok
<- TMacroString
(s, TH.info_of_tok id
.tok
);
1015 [Parenthised
(xxs
, info_parens
)] +>
1016 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
1020 (* for the case where the string is not inside a funcall, but
1021 * for instance in an initializer.
1024 (* string macro variable, before case *)
1025 | PToken
({tok
= (TString _
| TMacroString _
)})::PToken
({tok
= TIdent
(s,_
)} as id
)
1028 msg_stringification s;
1029 id
.tok
<- TMacroString
(s, TH.info_of_tok id
.tok
);
1033 | PToken
({tok
= TIdent
(s,_
)} as id
)
1034 ::PToken
({tok
= (TString _
| TMacroString _
)})
1037 msg_stringification s;
1038 id
.tok
<- TMacroString
(s, TH.info_of_tok id
.tok
);
1046 | (PToken
x)::xs
-> find_macro_paren xs
1047 | (Parenthised
(xxs
, info_parens
))::xs
->
1048 xxs
+> List.iter
find_macro_paren;
1055 (* don't forget to recurse in each case *)
1056 let rec find_macro_lineparen xs
=
1060 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
1063 [PToken
({tok
= Tstatic _
});
1064 PToken
({tok
= TIdent
(s,_
)} as macro
);
1065 Parenthised
(xxs
,info_parens
);
1066 PToken
({tok
= TPtVirg _
});
1070 when (s ==~
regexp_macro) ->
1072 msg_declare_macro s;
1073 let info = TH.info_of_tok macro
.tok
in
1074 macro
.tok
<- TMacroDecl
(Ast_c.str_of_info
info, info);
1076 find_macro_lineparen (xs
)
1078 (* the static const case *)
1081 [PToken
({tok
= Tstatic _
});
1082 PToken
({tok
= Tconst _
} as const
);
1083 PToken
({tok
= TIdent
(s,_
)} as macro
);
1084 Parenthised
(xxs
,info_parens
);
1085 PToken
({tok
= TPtVirg _
});
1091 when (s ==~
regexp_macro) ->
1093 msg_declare_macro s;
1094 let info = TH.info_of_tok macro
.tok
in
1095 macro
.tok
<- TMacroDecl
(Ast_c.str_of_info
info, info);
1097 (* need retag this const, otherwise ambiguity in grammar
1098 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1099 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1100 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1101 storage_class_spec : Tstatic . (137)
1103 const
.tok
<- TMacroDeclConst
(TH.info_of_tok const
.tok
);
1105 find_macro_lineparen (xs
)
1108 (* same but without trailing ';'
1110 * I do not put the final ';' because it can be on a multiline and
1111 * because of the way mk_line is coded, we will not have access to
1112 * this ';' on the next line, even if next to the ')' *)
1114 ([PToken
({tok
= Tstatic _
});
1115 PToken
({tok
= TIdent
(s,_
)} as macro
);
1116 Parenthised
(xxs
,info_parens
);
1120 when s ==~
regexp_macro ->
1122 msg_declare_macro s;
1123 let info = TH.info_of_tok macro
.tok
in
1124 macro
.tok
<- TMacroDecl
(Ast_c.str_of_info
info, info);
1126 find_macro_lineparen (xs
)
1131 (* on multiple lines *)
1134 (PToken
({tok
= Tstatic _
})::[]
1138 [PToken
({tok
= TIdent
(s,_
)} as macro
);
1139 Parenthised
(xxs
,info_parens
);
1140 PToken
({tok
= TPtVirg _
});
1145 when (s ==~
regexp_macro) ->
1147 msg_declare_macro s;
1148 let info = TH.info_of_tok macro
.tok
in
1149 macro
.tok
<- TMacroDecl
(Ast_c.str_of_info
info, info);
1151 find_macro_lineparen (xs
)
1154 (* linuxext: ex: DECLARE_BITMAP();
1156 * Here I use regexp_declare and not regexp_macro because
1157 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1158 * Here we don't have the preceding 'static' so only way to
1159 * not have positive is to restrict to .*DECLARE.* macros.
1161 * but there is a grammar rule for that, so don't need this case anymore
1162 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
1163 * on a argument_list
1167 ([PToken
({tok
= TIdent
(s,_
)} as macro
);
1168 Parenthised
(xxs
,info_parens
);
1169 PToken
({tok
= TPtVirg _
});
1173 when (s ==~
regexp_declare) ->
1175 msg_declare_macro s;
1176 let info = TH.info_of_tok macro
.tok
in
1177 macro
.tok
<- TMacroDecl
(Ast_c.str_of_info
info, info);
1179 find_macro_lineparen (xs
)
1185 * Could also transform the TIdent in a TMacroTop but can have false
1186 * positive, so easier to just change the TCPar and so just solve
1187 * the end-of-stream pb of ocamlyacc
1190 ([PToken
({tok
= TIdent
(s,ii); col
= col1
; where
= ctx
} as _macro
);
1191 Parenthised
(xxs
,info_parens
);
1194 ::xs
when col1
=|= 0
1197 (* to reduce number of false positive *)
1199 | (Line
(PToken
({col
= col2
} as other
)::restline2
))::_
->
1200 TH.is_eof other
.tok
|| (col2
=|= 0 &&
1201 (match other
.tok
with
1202 | TOBrace _
-> false (* otherwise would match funcdecl *)
1203 | TCBrace _
when ctx
<> InFunction
-> false
1207 | tok
when TH.is_binary_operator tok
-> false
1218 msg_macro_toplevel_noptvirg s;
1219 (* just to avoid the end-of-stream pb of ocamlyacc *)
1220 let tcpar = Common.last info_parens
in
1221 tcpar.tok
<- TCParEOL
(TH.info_of_tok
tcpar.tok
);
1223 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
1227 find_macro_lineparen (xs
)
1231 (* macro with parameters
1236 ([PToken
({tok
= TIdent
(s,ii); col
= col1
; where
= ctx
} as macro
);
1237 Parenthised
(xxs
,info_parens
);
1241 (PToken
({col
= col2
} as other
)::restline2
1244 (* when s ==~ regexp_macro *)
1246 (* This can give a false positive for K&R functions if the function
1247 name is in the same column as the first parameter declaration. *)
1250 (match other
.tok
with
1251 | TOBrace _
-> false (* otherwise would match funcdecl *)
1252 | TCBrace _
when ctx
<> InFunction
-> false
1256 | tok
when TH.is_binary_operator tok
-> false
1263 (match other
.tok
, restline2
with
1264 | TCBrace _
, _
when ctx
=*= InFunction
-> true
1265 | Treturn _
, _
-> true
1267 | Telse _
, _
-> true
1269 (* case of label, usually put in first line *)
1270 | TIdent _
, (PToken
({tok
= TDotDot _
}))::_
->
1282 if col1
=|= 0 then ()
1284 msg_macro_noptvirg s;
1285 macro
.tok
<- TMacroStmt
(s, TH.info_of_tok macro
.tok
);
1286 [Parenthised
(xxs
, info_parens
)] +>
1287 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
1290 find_macro_lineparen (line2
::xs
)
1292 (* linuxext:? single macro
1297 * todo: factorize code with previous rule ?
1300 ([PToken
({tok
= TIdent
(s,ii); col
= col1
; where
= ctx
} as macro
);
1304 (PToken
({col
= col2
} as other
)::restline2
1307 (* when s ==~ regexp_macro *)
1311 col1
<> 0 && (* otherwise can match typedef of fundecl*)
1312 (match other
.tok
with
1313 | TPtVirg _
-> false
1315 | TCBrace _
when ctx
<> InFunction
-> false
1316 | tok
when TH.is_binary_operator tok
-> false
1321 (match other
.tok
with
1322 | TCBrace _
when ctx
=*= InFunction
-> true
1332 msg_macro_noptvirg_single s;
1333 macro
.tok
<- TMacroStmt
(s, TH.info_of_tok macro
.tok
);
1335 find_macro_lineparen (line2
::xs
)
1338 find_macro_lineparen xs
1342 (* ------------------------------------------------------------------------- *)
1343 (* define tobrace init *)
1344 (* ------------------------------------------------------------------------- *)
1346 let rec find_define_init_brace_paren xs
=
1351 (* mainly for firefox *)
1352 | (PToken
{tok
= TDefine _
})
1353 ::(PToken
{tok
= TIdentDefine
(s,_
)})
1354 ::(PToken
({tok
= TOBrace i1
} as tokbrace
))
1359 match tok2
.tok
, tok3
.tok
with
1360 | TInt _
, TComma _
-> true
1361 | TString _
, TComma _
-> true
1362 | TIdent _
, TComma _
-> true
1368 pr2_cpp("found define initializer: " ^
s);
1369 tokbrace
.tok
<- TOBraceDefineInit i1
;
1374 (* mainly for linux, especially in sound/ *)
1375 | (PToken
{tok
= TDefine _
})
1376 ::(PToken
{tok
= TIdentDefine
(s,_
)})
1377 ::(Parenthised
(xxx
, info_parens
))
1378 ::(PToken
({tok
= TOBrace i1
} as tokbrace
))
1383 match tok2
.tok
, tok3
.tok
with
1384 | TInt _
, TComma _
-> true
1385 | TDot _
, TIdent _
-> true
1386 | TIdent _
, TComma _
-> true
1392 pr2_cpp("found define initializer with param: " ^
s);
1393 tokbrace
.tok
<- TOBraceDefineInit i1
;
1401 | (PToken
x)::xs
-> aux xs
1402 | (Parenthised
(xxs
, info_parens
))::xs
->
1403 (* not need for tobrace init:
1404 * xxs +> List.iter aux;
1411 (* ------------------------------------------------------------------------- *)
1413 (* ------------------------------------------------------------------------- *)
1415 (* obsolete now with macro expansion ? get some regression if comment.
1416 * todo: if do bad decision here, then it can influence other phases
1417 * and make it hard to parse. So maybe when have a parse error, should
1418 * undo some of the guess those heuristics have done, and restore
1419 * the original token value.
1422 let rec find_actions = function
1425 | PToken
({tok
= TIdent
(s,ii)})
1426 ::Parenthised
(xxs
,info_parens
)
1429 xxs
+> List.iter
find_actions;
1430 let modified = find_actions_params xxs
in
1432 then msg_macro_higher_order s
1437 and find_actions_params xxs
=
1438 xxs
+> List.fold_left
(fun acc xs
->
1439 let toks = tokens_of_paren xs
in
1440 if toks +> List.exists
(fun x -> TH.is_statement
x.tok
)
1441 (* undo: && List.length toks > 1
1442 * good for sparse, not good for linux
1445 xs
+> iter_token_paren
(fun x ->
1448 (* certainly because paren detection had a pb because of
1449 * some ifdef-exp. Do similar additional checking than
1450 * what is done in set_as_comment.
1452 pr2 "PB: weird, I try to tag an EOF token as an action"
1454 (* cf tests-bis/no_cpar_macro.c *)
1457 pr2 "PB: weird, I try to tag an EOM token as an action"
1459 x.tok
<- TAction
(TH.info_of_tok
x.tok
);
1468 (* ------------------------------------------------------------------------- *)
1469 (* main fix cpp function *)
1470 (* ------------------------------------------------------------------------- *)
1472 let filter_cpp_stuff xs
=
1476 | tok
when TH.is_comment tok
-> false
1477 (* don't want drop the define, or if drop, have to drop
1478 * also its body otherwise the line heuristics may be lost
1479 * by not finding the TDefine in column 0 but by finding
1480 * a TDefineIdent in a column > 0
1482 | Parser_c.TDefine _
-> true
1483 | tok
when TH.is_cpp_instruction tok
-> false
1488 let insert_virtual_positions l
=
1489 let strlen x = String.length
(Ast_c.str_of_info
x) in
1490 let rec loop prev offset
acc = function
1493 let ii = TH.info_of_tok
x in
1495 TH.visitor_info_of_tok
(function ii -> Ast_c.rewrap_pinfo pi
ii) x in
1496 match Ast_c.pinfo_of_info
ii with
1497 Ast_c.OriginTok pi
->
1498 let prev = Ast_c.parse_info_of_info
ii in
1499 loop prev (strlen ii) (x::acc) xs
1500 | Ast_c.ExpandedTok
(pi
,_
) ->
1501 let x'
= inject (Ast_c.ExpandedTok
(pi
,(prev,offset
))) in
1502 loop prev (offset
+ (strlen ii)) (x'
::acc) xs
1503 | Ast_c.FakeTok
(s,_
) ->
1504 let x'
= inject (Ast_c.FakeTok
(s,(prev,offset
))) in
1505 loop prev (offset
+ (strlen ii)) (x'
::acc) xs
1506 | Ast_c.AbstractLineTok _
-> failwith
"abstract not expected" in
1507 let rec skip_fake = function
1510 let ii = TH.info_of_tok
x in
1511 match Ast_c.pinfo_of_info
ii with
1512 | Ast_c.OriginTok pi
->
1513 let prev = Ast_c.parse_info_of_info
ii in
1514 let res = loop prev (strlen ii) [] xs
in
1516 | _
-> x::skip_fake xs
in
1519 (* ------------------------------------------------------------------------- *)
1521 let fix_tokens_cpp2 ~macro_defs tokens
=
1522 let tokens2 = ref (tokens
+> Common.acc_map
TV.mk_token_extended
) in
1525 (* the order is important, if you put the action heuristic first,
1526 * then because of ifdef, can have not closed paren
1527 * and so may believe that higher order macro
1528 * and it will eat too much tokens. So important to do
1531 * I recompute multiple times cleaner cos the mutable
1532 * can have be changed and so may have more comments
1533 * in the token original list.
1537 commentize_skip_start_to_end !tokens2;
1540 let cleaner = !tokens2 +> List.filter
(fun x ->
1541 (* is_comment will also filter the TCommentCpp created in
1542 * commentize_skip_start_to_end *)
1543 not
(TH.is_comment
x.tok
) (* could filter also #define/#include *)
1545 let ifdef_grouped = TV.mk_ifdef
cleaner in
1546 set_ifdef_parenthize_info ifdef_grouped;
1548 find_ifdef_funheaders ifdef_grouped;
1549 find_ifdef_bool ifdef_grouped;
1550 find_ifdef_mid ifdef_grouped;
1551 (* change order ? maybe cparen_else heuristic make some of the funheaders
1552 * heuristics irrelevant ?
1554 find_ifdef_cparen_else ifdef_grouped;
1555 adjust_inifdef_include ifdef_grouped;
1559 let cleaner = !tokens2 +> filter_cpp_stuff in
1561 let paren_grouped = TV.mk_parenthised
cleaner in
1562 Cpp_token_c.apply_macro_defs
1563 ~
msg_apply_known_macro
1564 ~
msg_apply_known_macro_hint
1565 macro_defs
paren_grouped;
1566 (* because the before field is used by apply_macro_defs *)
1567 tokens2 := TV.rebuild_tokens_extented
!tokens2;
1569 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1570 * that after the "ifdef-simplification" phase.
1572 let cleaner = !tokens2 +> List.filter
(fun x ->
1573 not
(TH.is_comment
x.tok
) (* could filter also #define/#include *)
1576 let brace_grouped = TV.mk_braceised
cleaner in
1577 set_context_tag
brace_grouped;
1580 let cleaner = !tokens2 +> filter_cpp_stuff in
1582 let paren_grouped = TV.mk_parenthised
cleaner in
1583 let line_paren_grouped = TV.mk_line_parenthised
paren_grouped in
1584 find_define_init_brace_paren paren_grouped;
1585 find_string_macro_paren paren_grouped;
1586 find_macro_lineparen line_paren_grouped;
1587 find_macro_paren paren_grouped;
1590 (* obsolete: actions ? not yet *)
1591 let cleaner = !tokens2 +> filter_cpp_stuff in
1592 let paren_grouped = TV.mk_parenthised
cleaner in
1593 find_actions paren_grouped;
1597 insert_virtual_positions (!tokens2 +> Common.acc_map
(fun x -> x.tok
))
1600 let time_hack1 ~macro_defs a
=
1601 Common.profile_code_exclusif
"HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a
)
1603 let fix_tokens_cpp ~macro_defs a
=
1604 Common.profile_code
"C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a
)
1609 (*****************************************************************************)
1610 (* Lexing with lookahead *)
1611 (*****************************************************************************)
1613 (* Why using yet another parsing_hack technique ? The fix_xxx where do
1614 * some pre-processing on the full list of tokens is not enough ?
1615 * No cos sometimes we need more contextual info, and even if
1616 * set_context() tries to give some contextual info, it's not completely
1617 * accurate so the following code give yet another alternative, yet another
1618 * chance to transform some tokens.
1620 * todo?: maybe could try to get rid of this technique. Maybe a better
1621 * set_context() would make possible to move this code using a fix_xx
1624 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
1625 * it is more general to do it via my LALR(k) tech. Because here we can
1626 * transform some token give some context information. So sometimes it
1627 * makes sense to transform a token in one context, sometimes not, and
1628 * lex can not provide us this context information. Note that the order
1629 * in the pattern matching in lookahead is important. Do not cut/paste.
1631 * Note that in next there is only "clean" tokens, there is no comment
1632 * or space tokens. This is done by the caller.
1636 open Lexer_parser
(* for the fields of lexer_hint type *)
1638 let not_struct_enum = function
1639 | (Parser_c.Tstruct _
| Parser_c.Tunion _
| Parser_c.Tenum _
)::_
-> false
1642 let pointer = function
1644 | TAnd _
when !Flag.c_plus_plus
-> true
1647 let ident_or_star = function
1651 (* This function is inefficient, because it will look over a K&R header,
1652 or function prototype multiple times. At least when we see a , and are in a
1653 parameter list, we know we will eventually see a close paren, and it
1654 should come fairly soon. *)
1656 let l1 = drop_until
(function (TCPar _
) -> true | _
-> false) l
in
1658 (TCPar _
) :: (TOCro _
) :: _
-> false
1659 | (TCPar _
) :: _
-> true
1662 (* (a)(b) is ambiguous, because (a) could be a function name or a cast.
1663 At this point, we just see an ident for a; we don't know if it is eg a local
1664 variable. This function sees at least if b is the only argument, ie there
1665 are no commas at top level *)
1666 let paren_before_comma l
=
1667 let rec loop level
= function
1669 | (TComma _
)::_
when level
= 1 -> false
1670 | (TCPar _
)::_
when level
= 1 -> true
1671 | (TCPar _
)::rest
-> loop (level
-1) rest
1672 | (TOPar _
)::rest
-> loop (level
+1) rest
1673 | x::rest
-> loop level rest
in
1676 let lookahead2 ~pass next before
=
1677 match (next
, before
) with
1680 (* yy xx( and in function *)
1681 | TOPar i1
::_
, TIdent
(s,i2
)::TypedefIdent _
::_
1682 when !Flag.c_plus_plus
&& (LP.current_context
() = (LP.InFunction
)) ->
1683 pr2_cpp("constructed_object: " ^
s);
1684 TOParCplusplusInit i1
1685 | TypedefIdent
(s,i
)::TOPar i1
::_
,_
1686 when !Flag.c_plus_plus
&& (LP.current_context
() = (LP.InFunction
)) ->
1689 (*-------------------------------------------------------------*)
1690 (* typedef inference, parse_typedef_fix3 *)
1691 (*-------------------------------------------------------------*)
1693 | (TIdent
(s,i1
)::TIdent
(s2
,i2
)::_
, _
) when not_struct_enum before
&& s =$
= s2
1695 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
1697 (* parse_typedef_fix3:
1698 * acpi_object acpi_object;
1699 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
1700 * Le parser en interne a deja appelé le prochain token pour pouvoir
1701 * decider des choses.
1702 * => special case in lexer_heuristic, again
1704 if !Flag_parsing_c.debug_typedef
1705 then pr2 ("TYPEDEF: disable typedef cos special case: " ^
s);
1707 LP.disable_typedef
();
1709 msg_typedef s i1
1; LP.add_typedef_root
s;
1710 TypedefIdent
(s, i1
)
1713 | (TIdent
(s, i1
)::TIdent
(s2
, i2
)::_
, _
) when not_struct_enum before
1716 (* && not_annot s2 BUT lead to false positive*)
1718 msg_typedef s i1
2; LP.add_typedef_root
s;
1719 TypedefIdent
(s, i1
)
1723 | (TIdent
(s, i1
)::Tinline i2
::_
, _
) when not_struct_enum before
1726 msg_typedef s i1
3; LP.add_typedef_root
s;
1727 TypedefIdent
(s, i1
)
1730 (* [,(] xx [,)] AND param decl *)
1731 | (TIdent
(s, i1
)::(((TComma _
|TCPar _
)::_
) as rest
) ,
1732 (TComma _
|TOPar _
)::_
)
1733 when not_struct_enum before
&& (LP.current_context
() =*= LP.InParameter
)
1738 | (TIdent
(s, i1
)::((TComma _
|TCPar _
)::_
) , (TComma _
|TOPar _
)::_
)
1739 when not_struct_enum before
&& (LP.current_context
() =*= LP.InParameter
)
1742 msg_typedef s i1
4; LP.add_typedef_root
s;
1743 TypedefIdent
(s, i1
)
1746 (* specialcase: [,(] xx* [,)] *)
1747 | (TIdent
(s, i1
)::ptr
::(TComma _
|TCPar _
)::_
, (*(TComma _|TOPar _)::*)_
)
1748 when pointer ptr
&& not_struct_enum before
1749 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1752 msg_typedef s i1
5; LP.add_typedef_root
s;
1753 TypedefIdent
(s, i1
)
1757 (* specialcase: [,(] xx** [,)] *)
1758 | (TIdent
(s, i1
)::TMul _
::TMul _
::(TComma _
|TCPar _
)::_
, (*(TComma _|TOPar _)::*)_
)
1759 when not_struct_enum before
1760 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1763 msg_typedef s i1
6; LP.add_typedef_root
s;
1764 TypedefIdent
(s, i1
)
1768 (* xx const * USELESS because of next rule ? *)
1769 | (TIdent
(s, i1
)::(Tconst _
|Tvolatile _
|Trestrict _
)::TMul _
::_
, _
)
1770 when not_struct_enum before
1771 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1775 msg_typedef s i1
7; LP.add_typedef_root
s;
1776 TypedefIdent
(s, i1
)
1779 | (TIdent
(s, i1
)::(Tconst _
|Tvolatile _
|Trestrict _
)::_
, _
)
1780 when not_struct_enum before
1782 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1785 msg_typedef s i1
8; LP.add_typedef_root
s;
1786 TypedefIdent
(s, i1
)
1790 | (TIdent
(s, i1
)::ptr
::(Tconst _
| Tvolatile _
|Trestrict _
)::_
, _
)
1791 when pointer ptr
&& not_struct_enum before
1794 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1796 msg_typedef s i1
9; LP.add_typedef_root
s;
1797 TypedefIdent
(s, i1
)
1801 | (TIdent
(s, i1
)::TCPar _
::_
, (Tconst _
| Tvolatile _
|Trestrict _
)::TOPar _
::_
) when
1803 msg_typedef s i1
10; LP.add_typedef_root
s;
1804 TypedefIdent
(s, i1
)
1808 (* ( xx ) [sizeof, ~] *)
1809 | (TIdent
(s, i1
)::TCPar _
::(Tsizeof _
|TTilde _
)::_
, TOPar _
::_
)
1810 when not_struct_enum before
1813 msg_typedef s i1
11; LP.add_typedef_root
s;
1814 TypedefIdent
(s, i1
)
1816 (* [(,] xx [ AND parameterdeclaration *)
1817 | (TIdent
(s, i1
)::TOCro _
::_
, (TComma _
|TOPar _
)::_
)
1818 when (LP.current_context
() =*= LP.InParameter
)
1821 msg_typedef s i1
12; LP.add_typedef_root
s;
1822 TypedefIdent
(s, i1
)
1824 (*------------------------------------------------------------*)
1825 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
1826 (* but if have a '=', or ',' I think not *)
1827 (*------------------------------------------------------------*)
1829 (* static xx * yy *)
1830 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::_
,
1831 (Tregister _
|Tstatic _
|Tvolatile _
|Tconst _
|Trestrict _
)::_
) when
1832 pointer ptr
&& ok_typedef s
1834 msg_typedef s i1
13; LP.add_typedef_root
s;
1835 TypedefIdent
(s, i1
)
1837 (* TODO xx * yy ; AND in start of compound element *)
1840 (* xx * yy, AND in paramdecl *)
1841 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TComma _
::_
, _
)
1842 when not_struct_enum before
&& (LP.current_context
() =*= LP.InParameter
)
1843 && pointer ptr
&& ok_typedef s
1846 msg_typedef s i1
14; LP.add_typedef_root
s;
1847 TypedefIdent
(s, i1
)
1850 (* xx * yy ; AND in Toplevel, except when have = before *)
1851 | (TIdent
(s, i1
)::TMul _
::TIdent
(s2
, i2
)::TPtVirg _
::_
, TEq _
::_
) ->
1853 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TPtVirg _
::_
, _
)
1854 when not_struct_enum before
&& pointer ptr
&&
1855 (LP.is_top_or_struct
(LP.current_context
()))
1857 msg_typedef s i1
15; LP.add_typedef_root
s;
1858 TypedefIdent
(s, i1
)
1860 (* xx * yy , AND in Toplevel *)
1861 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TComma _
::_
, _
)
1862 when not_struct_enum before
&& (LP.current_context
() =*= LP.InTopLevel
)
1863 && ok_typedef s && pointer ptr
1866 msg_typedef s i1
16; LP.add_typedef_root
s;
1867 TypedefIdent
(s, i1
)
1869 (* xx * yy ( AND in Toplevel *)
1870 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TOPar _
::_
, _
)
1871 when not_struct_enum before
1872 && (LP.is_top_or_struct
(LP.current_context
()))
1873 && ok_typedef s && pointer ptr
1875 msg_typedef s i1
17; LP.add_typedef_root
s;
1876 TypedefIdent
(s, i1
)
1879 (* todo? enough ? cos in struct def we can have some expression ! *)
1880 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TOCro _
::_
, _
)
1881 when not_struct_enum before
&&
1882 (LP.is_top_or_struct
(LP.current_context
()))
1883 && ok_typedef s && pointer ptr
1885 msg_typedef s i1
18; LP.add_typedef_root
s;
1886 TypedefIdent
(s, i1
)
1888 (* u16: 10; in struct *)
1889 | (TIdent
(s, i1
)::TDotDot _
::_
, (TOBrace _
| TPtVirg _
)::_
)
1890 when (LP.is_top_or_struct
(LP.current_context
()))
1893 msg_typedef s i1
19; LP.add_typedef_root
s;
1894 TypedefIdent
(s, i1
)
1897 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
1898 (* YES cos at toplevel can have some expression !! for instance when *)
1899 (* enter in the dimension of an array *)
1901 | (TIdent s::TMul::TIdent s2::_ , _)
1902 when (take_safe 1 !passed_tok <> [Tstruct] &&
1903 (take_safe 1 !passed_tok <> [Tenum]))
1905 !LP._lexer_hint = Some LP.Toplevel ->
1906 msg_typedef s 20; LP.add_typedef_root s;
1911 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TEq _
::_
, _
)
1912 when not_struct_enum before
1913 && ok_typedef s && pointer ptr
1915 msg_typedef s i1
21; LP.add_typedef_root
s;
1916 TypedefIdent
(s, i1
)
1919 (* xx * yy) AND in paramdecl *)
1920 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TCPar _
::_
, _
)
1921 when not_struct_enum before
&& (LP.current_context
() =*= LP.InParameter
)
1922 && ok_typedef s && pointer ptr
1924 msg_typedef s i1
22; LP.add_typedef_root
s;
1925 TypedefIdent
(s, i1
)
1928 (* xx * yy; *) (* wrong ? *)
1929 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TPtVirg _
::_
,
1930 (TOBrace _
| TPtVirg _
)::_
) when not_struct_enum before
1931 && ok_typedef s & pointer ptr
1933 msg_typedef s i1
23; LP.add_typedef_root
s;
1934 msg_maybe_dangereous_typedef s;
1935 TypedefIdent
(s, i1
)
1938 (* xx * yy, and ';' before xx *) (* wrong ? *)
1939 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::TComma _
::_
,
1940 (TOBrace _
| TPtVirg _
)::_
) when
1941 ok_typedef s && pointer ptr
1943 msg_typedef s i1
24; LP.add_typedef_root
s;
1944 TypedefIdent
(s, i1
)
1948 | (TIdent
(s, i1
)::ptr
::TIdent
(s2
, i2
)::_
, _
)
1949 when s ==~
regexp_typedef && not_struct_enum before
1950 (* struct user_info_t sometimes *)
1951 && ok_typedef s && pointer ptr
1953 msg_typedef s i1
25; LP.add_typedef_root
s;
1954 TypedefIdent
(s, i1
)
1956 (* xx ** yy *) (* wrong ? *)
1957 | (TIdent
(s, i1
)::TMul _
::TMul _
::TIdent
(s2
, i2
)::_
, _
)
1958 when not_struct_enum before
1959 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1962 msg_typedef s i1
26; LP.add_typedef_root
s;
1963 TypedefIdent
(s, i1
)
1966 | (TIdent
(s, i1
)::TMul _
::TMul _
::TMul _
::TIdent
(s2
, i2
)::_
, _
)
1967 when not_struct_enum before
1969 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1971 msg_typedef s i1
27; LP.add_typedef_root
s;
1972 TypedefIdent
(s, i1
)
1975 | (TIdent
(s, i1
)::TMul _
::TMul _
::TCPar _
::_
, _
)
1976 when not_struct_enum before
1977 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1980 msg_typedef s i1
28; LP.add_typedef_root
s;
1981 TypedefIdent
(s, i1
)
1985 (* ----------------------------------- *)
1986 (* old: why not do like for other rules and start with TIdent ?
1987 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
1988 * new: prefer now start with TIdent because otherwise the add_typedef_root
1989 * may have no effect if in second pass or if have disable the add_typedef.
1993 | (TIdent
(s, i1
)::TCPar i2
::(TIdent
(_
,i3
)|TInt
(_
,i3
))::after
::_
,
1995 when not
(TH.is_stuff_taking_parenthized
x) (* &&
1996 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful?
1999 && not
(ident_or_star after
) (* possible K&R declaration *)
2001 msg_typedef s i1
29; LP.add_typedef_root
s;
2003 TypedefIdent
(s, i1
)
2007 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
2010 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2011 when not (TH.is_stuff_taking_parenthized x)
2014 msg_typedef s 30; LP.add_typedef_root s;
2016 TypedefIdent
(s, i1
)
2018 (* special case: = (xx) ( yy) *)
2019 | (TIdent
(s, i1
)::TCPar _
::((TOPar _
::_
) as rest
) ,
2020 (TOPar
info)::(TEq _
|TEqEq _
)::_
)
2021 when ok_typedef s && paren_before_comma rest
2023 msg_typedef s i1
31; LP.add_typedef_root
s;
2025 TypedefIdent
(s, i1
)
2029 | (TIdent
(s, i1
)::ptr
::TCPar _
::TIdent
(s2
, i2
)::_
, (TOPar
info)::_
)
2030 when ok_typedef s && pointer ptr
2032 msg_typedef s i1
32; LP.add_typedef_root
s;
2037 (* (xx){ ... } constructor *)
2038 | (TIdent
(s, i1
)::TCPar _
::TOBrace _
::_
, TOPar _
::x::_
)
2039 when (*s ==~ regexp_typedef && *) not
(TH.is_stuff_taking_parenthized
x)
2042 msg_typedef s i1
33; LP.add_typedef_root
s;
2043 TypedefIdent
(s, i1
)
2046 (* can have sizeof on expression
2047 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
2048 msg_typedef s; LP.add_typedef_root s;
2053 (* ----------------------------------- *)
2054 (* x ( *y )(params), function pointer *)
2055 | (TIdent
(s, i1
)::TOPar _
::TMul _
::TIdent _
::TCPar _
::TOPar _
::_
, _
)
2056 when not_struct_enum before
2059 msg_typedef s i1
34; LP.add_typedef_root
s;
2060 TypedefIdent
(s, i1
)
2062 (* x* ( *y )(params), function pointer 2 *)
2063 | (TIdent
(s, i1
)::TMul _
::TOPar _
::TMul _
::TIdent _
::TCPar _
::TOPar _
::_
, _
)
2064 when not_struct_enum before
2067 msg_typedef s i1
35; LP.add_typedef_root
s;
2068 TypedefIdent
(s, i1
)
2071 (*-------------------------------------------------------------*)
2073 (*-------------------------------------------------------------*)
2074 | ((TIfdef
(_
,ii) |TIfdefelse
(_
,ii) |TIfdefelif
(_
,ii) |TEndif
(_
,ii) |
2075 TIfdefBool
(_
,_
,ii)|TIfdefMisc
(_
,_
,ii)|TIfdefVersion
(_
,_
,ii))
2080 if not !Flag_parsing_c.ifdef_to_if
2081 then TCommentCpp (Ast_c.CppDirective, ii)
2084 (* not !LP._lexer_hint.toplevel *)
2085 if !Flag_parsing_c.ifdef_directive_passing
2089 if (LP.current_context
() =*= LP.InInitializer
)
2091 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2092 incr
Stat.nIfdefInitializer
;
2094 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
2095 incr
Stat.nIfdefPassing
;
2097 TCommentCpp
(Token_c.CppDirective
, ii)
2101 | (TUndef
(ii) as x)::_
, _
2105 pr2_cpp("UNDEF: I treat it as comment");
2106 TCommentCpp
(Token_c.CppDirective
, ii)
2110 | (TCppDirectiveOther
(ii) as x)::_
, _
2114 pr2_cpp ("OTHER directive: I treat it as comment");
2115 TCommentCpp
(Token_c.CppDirective
, ii)
2119 (* If ident contain a for_each, then certainly a macro. But to be
2120 * sure should look if there is a '{' after the ')', but it requires
2121 * to count the '('. Because this can be expensive, we do that only
2122 * when the token contains "for_each".
2124 | (TIdent
(s, i1
)::TOPar _
::rest
, _
)
2125 when not
(LP.current_context
() =*= LP.InTopLevel
)
2126 (* otherwise a function such as static void loopback_enable(int i) {
2127 * will be considered as a loop
2131 if s ==~
regexp_foreach &&
2132 is_really_foreach (Common.take_safe
forLOOKAHEAD rest
)
2136 TMacroIterator
(s, i1
)
2140 (*-------------------------------------------------------------*)
2142 | _
-> raise Impossible
2144 let lookahead ~pass a b
=
2145 Common.profile_code
"C parsing.lookahead" (fun () -> lookahead2 ~pass a b
)