permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
708f4980 19module TV = Token_views_c
34e49164
C
20module LP = Lexer_parser
21
485bce71 22module Stat = Parsing_stat
34e49164 23
ae4735db 24open Parser_c
34e49164 25
ae4735db 26open TV
708f4980 27
34e49164
C
28(*****************************************************************************)
29(* Some debugging functions *)
30(*****************************************************************************)
31
708f4980 32let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
113803cf 33
ae4735db 34let pr2_cpp s =
34e49164
C
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once ("CPP-" ^ s)
37
38
ae4735db 39let msg_gen cond is_known printer s =
34e49164
C
40 if cond
41 then
42 if not (!Flag_parsing_c.filter_msg)
43 then printer s
44 else
45 if not (is_known s)
46 then printer s
ae4735db 47
485bce71
C
48(* In the following, there are some harcoded names of types or macros
49 * but they are not used by our heuristics! They are just here to
50 * enable to detect false positive by printing only the typedef/macros
51 * that we don't know yet. If we print everything, then we can easily
52 * get lost with too much verbose tracing information. So those
53 * functions "filter" some messages. So our heuristics are still good,
54 * there is no more (or not that much) hardcoded linux stuff.
34e49164 55 *)
485bce71 56
ae4735db
C
57let is_known_typdef =
58 (fun s ->
34e49164
C
59 (match s with
60 | "u_char" | "u_short" | "u_int" | "u_long"
ae4735db
C
61 | "u8" | "u16" | "u32" | "u64"
62 | "s8" | "s16" | "s32" | "s64"
63 | "__u8" | "__u16" | "__u32" | "__u64"
abad11c5 64 -> true
ae4735db
C
65
66 | "acpi_handle"
67 | "acpi_status"
34e49164
C
68 -> true
69
ae4735db
C
70 | "FILE"
71 | "DIR"
34e49164 72 -> true
ae4735db 73
34e49164 74 | s when s =~ ".*_t$" -> true
ae4735db 75 | _ -> false
34e49164
C
76 )
77 )
485bce71 78
ae4735db
C
79(* note: cant use partial application with let msg_typedef =
80 * because it would compute msg_typedef at compile time when
485bce71
C
81 * the flag debug_typedef is always false
82 *)
97111a47 83let msg_typedef s ii n =
485bce71
C
84 incr Stat.nTypedefInfer;
85 msg_gen (!Flag_parsing_c.debug_typedef)
86 is_known_typdef
ae4735db 87 (fun s ->
97111a47
C
88 pr2_cpp
89 (Printf.sprintf "TYPEDEF: promoting:(%d) %s on line %d" n s
90 (Ast_c.line_of_info ii))
91 (*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s
92 (Ast_c.line_of_info ii))*)
34e49164
C
93 )
94 s
95
485bce71
C
96let msg_maybe_dangereous_typedef s =
97 if not (is_known_typdef s)
ae4735db 98 then
113803cf
C
99 pr2
100 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
34e49164
C
101
102
103
ae4735db 104let msg_declare_macro s =
485bce71 105 incr Stat.nMacroDecl;
34e49164 106 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
107 (fun s ->
108 (match s with
34e49164 109 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
ae4735db 110 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
34e49164
C
111 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
112 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
113 | "SENSOR_DEVICE_ATTR"
114 | "LIST_HEAD"
115 | "DECLARE_WORK" | "DECLARE_TASKLET"
116 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
117 | "DECLARE_BITMAP"
118
119 -> true
120 (*
121 | s when s =~ "^DECLARE_.*" -> true
122 | s when s =~ ".*_ATTR$" -> true
123 | s when s =~ "^DEFINE_.*" -> true
124 *)
125
126 | _ -> false
127 )
128 )
129 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
130 s
34e49164 131
ae4735db
C
132
133let msg_foreach s =
485bce71 134 incr Stat.nIteratorHeuristic;
34e49164
C
135 pr2_cpp ("MACRO: found foreach: " ^ s)
136
137
ae4735db
C
138(* ??
139let msg_debug_macro s =
34e49164 140 pr2_cpp ("MACRO: found debug-macro: " ^ s)
485bce71 141*)
34e49164
C
142
143
ae4735db 144let msg_macro_noptvirg s =
485bce71 145 incr Stat.nMacroStmt;
34e49164
C
146 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
147
ae4735db 148let msg_macro_toplevel_noptvirg s =
485bce71 149 incr Stat.nMacroStmt;
34e49164
C
150 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
151
ae4735db 152let msg_macro_noptvirg_single s =
485bce71 153 incr Stat.nMacroStmt;
34e49164
C
154 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
155
156
485bce71
C
157
158
ae4735db 159let msg_macro_higher_order s =
485bce71 160 incr Stat.nMacroHigherOrder;
34e49164 161 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
162 (fun s ->
163 (match s with
34e49164
C
164 | "DBGINFO"
165 | "DBGPX"
166 | "DFLOW"
167 -> true
168 | _ -> false
169 )
170 )
171 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
172 s
173
174
ae4735db 175let msg_stringification s =
485bce71 176 incr Stat.nMacroString;
34e49164 177 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
178 (fun s ->
179 (match s with
34e49164
C
180 | "REVISION"
181 | "UTS_RELEASE"
182 | "SIZE_STR"
183 | "DMA_STR"
abad11c5 184 -> true
ae4735db 185 (* s when s =~ ".*STR.*" -> true *)
34e49164
C
186 | _ -> false
187 )
188 )
189 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
190 s
191
485bce71
C
192let msg_stringification_params s =
193 incr Stat.nMacroString;
194 pr2_cpp ("MACRO: string-macro with params : " ^ s)
195
196
197
ae4735db 198let msg_apply_known_macro s =
485bce71
C
199 incr Stat.nMacroExpand;
200 pr2_cpp ("MACRO: found known macro = " ^ s)
201
ae4735db 202let msg_apply_known_macro_hint s =
485bce71
C
203 incr Stat.nMacroHint;
204 pr2_cpp ("MACRO: found known macro hint = " ^ s)
205
206
34e49164 207
ae4735db
C
208
209let msg_ifdef_bool_passing is_ifdef_positif =
485bce71
C
210 incr Stat.nIfdefZero; (* of Version ? *)
211 if is_ifdef_positif
212 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
213 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
214
215
216let msg_ifdef_mid_something () =
217 incr Stat.nIfdefExprPassing;
218 pr2_cpp "found ifdef-mid-something"
219
220let msg_ifdef_funheaders () =
221 incr Stat.nIfdefFunheader;
222 ()
223
ae4735db 224let msg_ifdef_cparen_else () =
708f4980
C
225 incr Stat.nIfdefPassing;
226 pr2_cpp("found ifdef-cparen-else")
227
485bce71 228
ae4735db 229let msg_attribute s =
485bce71
C
230 incr Stat.nMacroAttribute;
231 pr2_cpp("ATTR:" ^ s)
ae4735db 232
485bce71
C
233
234
34e49164 235(*****************************************************************************)
485bce71 236(* The regexp and basic view definitions *)
34e49164
C
237(*****************************************************************************)
238
239(* opti: better to built then once and for all, especially regexp_foreach *)
240
abad11c5 241let regexp_macro = Str.regexp
34e49164
C
242 "^[A-Z_][A-Z_0-9]*$"
243
244(* linuxext: *)
abad11c5 245let regexp_annot = Str.regexp
34e49164
C
246 "^__.*$"
247
248(* linuxext: *)
abad11c5 249let regexp_declare = Str.regexp
34e49164
C
250 ".*DECLARE.*"
251
252(* linuxext: *)
ae4735db 253let regexp_foreach = Str.regexp_case_fold
34e49164
C
254 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
255
256let regexp_typedef = Str.regexp
257 ".*_t$"
258
34e49164
C
259let false_typedef = [
260 "printk";
261 ]
262
34e49164 263
485bce71
C
264let ok_typedef s = not (List.mem s false_typedef)
265
ae4735db 266let not_annot s =
485bce71 267 not (s ==~ regexp_annot)
abad11c5
C
268let is_macro s =
269 s ==~ regexp_macro
270let not_macro s =
271 not (is_macro s)
485bce71
C
272
273
34e49164 274
34e49164 275
485bce71
C
276(*****************************************************************************)
277(* Helpers *)
278(*****************************************************************************)
279
485bce71 280(* ------------------------------------------------------------------------- *)
ae4735db
C
281(* the pair is the status of '()' and '{}', ex: (-1,0)
282 * if too much ')' and good '{}'
283 * could do for [] too ?
485bce71
C
284 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
285 * then if have ifdef, then certainly can lead to a problem.
286 *)
708f4980 287let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
ae4735db 288 fun xs ->
485bce71 289 let cnt_paren, cnt_brace = ref 0, ref 0 in
ae4735db 290 xs +> TV.iter_token_ifdef (fun x ->
485bce71
C
291 (match x.tok with
292 | x when TH.is_opar x -> incr cnt_paren
293 | TOBrace _ -> incr cnt_brace
294 | x when TH.is_cpar x -> decr cnt_paren
295 | TCBrace _ -> decr cnt_brace
296 | _ -> ()
297 )
298 );
299 !cnt_paren, !cnt_brace
300
301
302(* ------------------------------------------------------------------------- *)
303let forLOOKAHEAD = 30
304
ae4735db 305
485bce71 306(* look if there is a '{' just after the closing ')', and handling the
ae4735db
C
307 * possibility to have nested expressions inside nested parenthesis
308 *
485bce71
C
309 * todo: use indentation instead of premier(statement) ?
310 *)
ae4735db 311let rec is_really_foreach xs =
485bce71
C
312 let rec is_foreach_aux = function
313 | [] -> false, []
314 | TCPar _::TOBrace _::xs -> true, xs
315 (* the following attempts to handle the cases where there is a
316 single statement in the body of the loop. undoubtedly more
ae4735db 317 cases are needed.
485bce71 318 todo: premier(statement) - suivant(funcall)
abad11c5 319 *)
485bce71
C
320 | TCPar _::TIdent _::xs -> true, xs
321 | TCPar _::Tif _::xs -> true, xs
322 | TCPar _::Twhile _::xs -> true, xs
323 | TCPar _::Tfor _::xs -> true, xs
324 | TCPar _::Tswitch _::xs -> true, xs
325 | TCPar _::Treturn _::xs -> true, xs
326
327
328 | TCPar _::xs -> false, xs
ae4735db 329 | TOPar _::xs ->
485bce71
C
330 let (_, xs') = is_foreach_aux xs in
331 is_foreach_aux xs'
332 | x::xs -> is_foreach_aux xs
333 in
334 is_foreach_aux xs +> fst
335
336
337(* ------------------------------------------------------------------------- *)
ae4735db 338let set_ifdef_token_parenthize_info cnt x =
485bce71
C
339 match x with
340 | TIfdef (tag, _)
341 | TIfdefelse (tag, _)
342 | TIfdefelif (tag, _)
343 | TEndif (tag, _)
344
345 | TIfdefBool (_, tag, _)
ae4735db 346 | TIfdefMisc (_, tag, _)
abad11c5
C
347 | TIfdefVersion (_, tag, _) ->
348 tag := Some cnt;
485bce71 349
abad11c5 350 | _ -> raise (Impossible 89)
485bce71
C
351
352
485bce71 353
ae4735db 354let ifdef_paren_cnt = ref 0
485bce71 355
ae4735db
C
356
357let rec set_ifdef_parenthize_info xs =
485bce71
C
358 xs +> List.iter (function
359 | NotIfdefLine xs -> ()
ae4735db
C
360 | Ifdefbool (_, xxs, info_ifdef)
361 | Ifdef (xxs, info_ifdef) ->
362
485bce71
C
363 incr ifdef_paren_cnt;
364 let total_directives = List.length info_ifdef in
365
ae4735db 366 info_ifdef +> List.iter (fun x ->
485bce71
C
367 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
368 x.tok);
369 xxs +> List.iter set_ifdef_parenthize_info
370 )
371
372
978fd7e5
C
373(*****************************************************************************)
374(* The parsing hack for #define *)
375(*****************************************************************************)
376
ae4735db 377(* To parse macro definitions I need to do some tricks
978fd7e5
C
378 * as some information can be get only at the lexing level. For instance
379 * the space after the name of the macro in '#define foo (x)' is meaningful
380 * but the grammar can not get this information. So define_ident below
381 * look at such space and generate a special TOpardefine. In a similar
382 * way macro definitions can contain some antislash and newlines
ae4735db
C
383 * and the grammar need to know where the macro ends (which is
384 * a line-level and so low token-level information). Hence the
978fd7e5 385 * function 'define_line' below and the TDefEol.
ae4735db
C
386 *
387 * update: TDefEol is handled in a special way at different places,
978fd7e5
C
388 * a little bit like EOF, especially for error recovery, so this
389 * is an important token that should not be retagged!
ae4735db
C
390 *
391 *
392 * ugly hack, a better solution perhaps would be to erase TDefEOL
393 * from the Ast and list of tokens in parse_c.
394 *
978fd7e5 395 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
ae4735db 396 *
978fd7e5
C
397 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
398 * is where is the \n TCommentSpace. Normally there is always a last token
399 * to synchronize on, either EOF or the token of the next toplevel.
ae4735db 400 * In the case of the #define we got in list of token
978fd7e5
C
401 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
402 * not synchronize on it and so we will not print the "\n".
403 * A solution would be to put the TDefEOL before the "\n".
c491d8ee 404 * (jll: tried to do this, see the comment "Put end of line..." below)
ae4735db
C
405 *
406 * todo?: could put a ExpandedTok for that ?
978fd7e5 407 *)
ae4735db
C
408let mark_end_define ii =
409 let ii' =
410 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
411 Common.str = "";
978fd7e5
C
412 Common.charpos = Ast_c.pos_of_info ii + 1
413 };
414 cocci_tag = ref Ast_c.emptyAnnot;
abad11c5 415 annots_tag = Token_annot.empty;
978fd7e5 416 comments_tag = ref Ast_c.emptyComments;
ae4735db 417 }
978fd7e5
C
418 in
419 TDefEOL (ii')
420
421(* put the TDefEOL at the good place *)
ae4735db 422let rec define_line_1 acc xs =
978fd7e5
C
423 match xs with
424 | [] -> List.rev acc
425 | TDefine ii::xs ->
426 let line = Ast_c.line_of_info ii in
427 let acc = (TDefine ii) :: acc in
00bac93c 428 define_line_2 acc ((=|=) line) ii xs
3a314143
C
429 | TUndef ii::xs ->
430 let line = Ast_c.line_of_info ii in
431 let acc = (TUndef ii) :: acc in
00bac93c 432 define_line_2 acc ((=|=) line) ii xs
978fd7e5
C
433 | TCppEscapedNewline ii::xs ->
434 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
435 pr2 (Ast_c.strloc_of_info ii);
436 let acc = (TCommentSpace ii) :: acc in
437 define_line_1 acc xs
438 | x::xs -> define_line_1 (x::acc) xs
439
00bac93c 440and define_line_2 acc lineOk lastinfo xs =
ae4735db
C
441 match xs with
442 | [] ->
978fd7e5 443 (* should not happened, should meet EOF before *)
ae4735db 444 pr2 "PB: WEIRD";
978fd7e5 445 List.rev (mark_end_define lastinfo::acc)
ae4735db 446 | x::xs ->
00bac93c 447 let line = TH.line_of_tok x in
978fd7e5
C
448 let info = TH.info_of_tok x in
449
450 (match x with
ae4735db 451 | EOF ii ->
978fd7e5
C
452 let acc = (mark_end_define lastinfo) :: acc in
453 let acc = (EOF ii) :: acc in
454 define_line_1 acc xs
ae4735db 455 | TCppEscapedNewline ii ->
00bac93c 456 if not (lineOk line) then pr2 "PB: WEIRD: not same line number";
978fd7e5 457 let acc = (TCommentSpace ii) :: acc in
00bac93c
BT
458 define_line_2 acc ((=|=) (line + 1)) info xs
459 | TComment _ when lineOk line ->
460 define_line_2 (x::acc) (function x -> true) info xs
461 | TString _ when lineOk line ->
462 define_line_2 (x::acc) (function x -> true) info xs
ae4735db 463 | x ->
00bac93c
BT
464 if lineOk line
465 then define_line_2 (x::acc) ((=|=) line) info xs
c491d8ee
C
466 else
467 (* Put end of line token before the newline. A newline at least
468 must be there because the line changed and because we saw a
469 #define previously to get to this function at all *)
470 define_line_1
471 ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
472 (x::xs)
978fd7e5
C
473 )
474
ae4735db 475let rec define_ident acc xs =
978fd7e5
C
476 match xs with
477 | [] -> List.rev acc
3a314143
C
478 | TUndef ii::xs ->
479 let acc = TUndef ii :: acc in
480 (match xs with
481 TCommentSpace i1::TIdent (s,i2)::xs ->
482 let acc = (TCommentSpace i1) :: acc in
483 let acc = (TIdentDefine (s,i2)) :: acc in
484 define_ident acc xs
485 | _ ->
486 pr2 "WEIRD: weird #define body";
487 define_ident acc xs
488 )
ae4735db 489 | TDefine ii::xs ->
978fd7e5
C
490 let acc = TDefine ii :: acc in
491 (match xs with
ae4735db 492 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
978fd7e5
C
493 (* Change also the kind of TIdent to avoid bad interaction
494 * with other parsing_hack tricks. For instant if keep TIdent then
495 * the stringication algo can believe the TIdent is a string-macro.
496 * So simpler to change the kind of the ident too.
497 *)
ae4735db 498 (* if TOParDefine sticked to the ident, then
978fd7e5
C
499 * it's a macro-function. Change token to avoid ambiguity
500 * between #define foo(x) and #define foo (x)
501 *)
502 let acc = (TCommentSpace i1) :: acc in
503 let acc = (TIdentDefine (s,i2)) :: acc in
504 let acc = (TOParDefine i3) :: acc in
505 define_ident acc xs
506
ae4735db 507 | TCommentSpace i1::TIdent (s,i2)::xs ->
978fd7e5
C
508 let acc = (TCommentSpace i1) :: acc in
509 let acc = (TIdentDefine (s,i2)) :: acc in
510 define_ident acc xs
511
512 (* bugfix: ident of macro (as well as params, cf below) can be tricky
513 * note, do we need to subst in the body of the define ? no cos
514 * here the issue is the name of the macro, as in #define inline,
ae4735db 515 * so obviously the name of this macro will not be used in its
978fd7e5
C
516 * body (it would be a recursive macro, which is forbidden).
517 *)
ae4735db 518
abad11c5
C
519 | TCommentSpace i1::t::xs
520 when TH.str_of_tok t ==~ Common.regexp_alpha
521 ->
978fd7e5
C
522 let s = TH.str_of_tok t in
523 let ii = TH.info_of_tok t in
abad11c5
C
524 pr2 (spf "remapping: %s to an ident in macro name" s);
525 let acc = (TCommentSpace i1) :: acc in
526 let acc = (TIdentDefine (s,ii)) :: acc in
527 define_ident acc xs
528
529 | TCommentSpace _::_::xs
530 | xs ->
ae4735db 531 pr2 "WEIRD: weird #define body";
978fd7e5
C
532 define_ident acc xs
533 )
534 | x::xs ->
535 let acc = x :: acc in
536 define_ident acc xs
978fd7e5
C
537
538
ae4735db
C
539
540let fix_tokens_define2 xs =
978fd7e5
C
541 define_ident [] (define_line_1 [] xs)
542
ae4735db 543let fix_tokens_define a =
978fd7e5 544 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
ae4735db 545
978fd7e5
C
546
547
548
549
550(* ------------------------------------------------------------------------- *)
551(* Other parsing hacks related to cpp, Include/Define hacks *)
552(* ------------------------------------------------------------------------- *)
553
554(* Sometimes I prefer to generate a single token for a list of things in the
555 * lexer so that if I have to passed them, like for passing TInclude then
ae4735db
C
556 * it's easy. Also if I don't do a single token, then I need to
557 * parse the rest which may not need special stuff, like detecting
978fd7e5
C
558 * end of line which the parser is not really ready for. So for instance
559 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
ae4735db 560 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
978fd7e5
C
561 * but this kind of token is valid only after a #include and the
562 * lexing and parsing rules are different for such tokens so not that
563 * easy to parse such things in parser_c.mly. Hence the following hacks.
ae4735db 564 *
978fd7e5
C
565 * less?: maybe could get rid of this like I get rid of some of fix_define.
566 *)
567
568(* helpers *)
569
570(* used to generate new token from existing one *)
571let new_info posadd str ii =
ae4735db
C
572 { Ast_c.pinfo =
573 Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
978fd7e5
C
574 charpos = Ast_c.pos_of_info ii + posadd;
575 str = str;
576 column = Ast_c.col_of_info ii + posadd;
577 };
578 (* must generate a new ref each time, otherwise share *)
579 cocci_tag = ref Ast_c.emptyAnnot;
abad11c5 580 annots_tag = Token_annot.empty;
978fd7e5
C
581 comments_tag = ref Ast_c.emptyComments;
582 }
583
584
ae4735db 585let rec comment_until_defeol xs =
978fd7e5 586 match xs with
ae4735db 587 | [] ->
978fd7e5
C
588 (* job not done in Cpp_token_c.define_parse ? *)
589 failwith "cant find end of define token TDefEOL"
ae4735db 590 | x::xs ->
978fd7e5 591 (match x with
ae4735db 592 | Parser_c.TDefEOL i ->
978fd7e5
C
593 Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
594 ::xs
ae4735db
C
595 | _ ->
596 let x' =
978fd7e5
C
597 (* bugfix: otherwise may lose a TComment token *)
598 if TH.is_real_comment x
599 then x
600 else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
601 in
602 x'::comment_until_defeol xs
603 )
604
ae4735db
C
605let drop_until_defeol xs =
606 List.tl
978fd7e5
C
607 (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
608
609
610
611(* ------------------------------------------------------------------------- *)
612(* returns a pair (replaced token, list of next tokens) *)
613(* ------------------------------------------------------------------------- *)
614
ae4735db
C
615let tokens_include (info, includes, filename, inifdef) =
616 Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
617 [Parser_c.TIncludeFilename
978fd7e5
C
618 (filename, (new_info (String.length includes) filename info))
619 ]
620
621
622
623
485bce71
C
624(*****************************************************************************)
625(* CPP handling: macros, ifdefs, macros defs *)
626(*****************************************************************************)
627
0708f913
C
628(* ------------------------------------------------------------------------- *)
629(* special skip_start skip_end handling *)
630(* ------------------------------------------------------------------------- *)
631
632(* note: after this normally the token list should not contain any more the
633 * TCommentSkipTagStart and End tokens.
634 *)
635let rec commentize_skip_start_to_end xs =
636 match xs with
637 | [] -> ()
ae4735db 638 | x::xs ->
0708f913 639 (match x with
ae4735db
C
640 | {tok = TCommentSkipTagStart info} ->
641 (try
642 let (before, x2, after) =
0708f913
C
643 xs +> Common.split_when (function
644 | {tok = TCommentSkipTagEnd _ } -> true
ae4735db 645 | _ -> false
0708f913
C
646 )
647 in
648 let topass = x::x2::before in
ae4735db 649 topass +> List.iter (fun tok ->
feec80c3 650 TV.set_as_comment Token_c.CppPassingExplicit tok
0708f913
C
651 );
652 commentize_skip_start_to_end after
ae4735db 653 with Not_found ->
0708f913
C
654 failwith "could not find end of skip_start special comment"
655 )
ae4735db 656 | {tok = TCommentSkipTagEnd info} ->
0708f913 657 failwith "found skip_end comment but no skip_start"
ae4735db 658 | _ ->
0708f913
C
659 commentize_skip_start_to_end xs
660 )
ae4735db
C
661
662
0708f913
C
663
664
34e49164
C
665(* ------------------------------------------------------------------------- *)
666(* ifdef keeping/passing *)
667(* ------------------------------------------------------------------------- *)
668
669(* #if 0, #if 1, #if LINUX_VERSION handling *)
ae4735db
C
670let rec find_ifdef_bool xs =
671 xs +> List.iter (function
34e49164 672 | NotIfdefLine _ -> ()
ae4735db 673 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
485bce71
C
674
675 msg_ifdef_bool_passing is_ifdef_positif;
34e49164
C
676
677 (match xxs with
abad11c5 678 | [] -> raise (Impossible 90)
ae4735db 679 | firstclause::xxs ->
feec80c3
C
680 info_ifdef_stmt +>
681 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
ae4735db 682
34e49164 683 if is_ifdef_positif
ae4735db 684 then xxs +> List.iter
feec80c3 685 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
34e49164 686 else begin
feec80c3
C
687 firstclause +>
688 iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal);
34e49164
C
689 (match List.rev xxs with
690 (* keep only last *)
ae4735db
C
691 | last::startxs ->
692 startxs +> List.iter
feec80c3 693 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
34e49164
C
694 | [] -> (* not #else *) ()
695 );
696 end
697 );
ae4735db 698
34e49164
C
699 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
700 )
701
702
703
34e49164
C
704let thresholdIfdefSizeMid = 6
705
706(* infer ifdef involving not-closed expressions/statements *)
ae4735db
C
707let rec find_ifdef_mid xs =
708 xs +> List.iter (function
34e49164 709 | NotIfdefLine _ -> ()
ae4735db
C
710 | Ifdef (xxs, info_ifdef_stmt) ->
711 (match xxs with
abad11c5 712 | [] -> raise (Impossible 91)
34e49164 713 | [first] -> ()
ae4735db 714 | first::second::rest ->
34e49164 715 (* don't analyse big ifdef *)
ae4735db
C
716 if xxs +> List.for_all
717 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
34e49164 718 (* don't want nested ifdef *)
ae4735db
C
719 xxs +> List.for_all (fun xs ->
720 xs +> List.for_all
34e49164
C
721 (function NotIfdefLine _ -> true | _ -> false)
722 )
ae4735db
C
723
724 then
34e49164 725 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
ae4735db
C
726 let cnt1, cnt2 = List.hd counts in
727 if cnt1 <> 0 || cnt2 <> 0 &&
b1b2de81 728 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
34e49164 729 (*
ae4735db
C
730 if counts +> List.exists (fun (cnt1, cnt2) ->
731 cnt1 <> 0 || cnt2 <> 0
732 )
34e49164
C
733 *)
734 then begin
485bce71
C
735 msg_ifdef_mid_something();
736
34e49164 737 (* keep only first, treat the rest as comment *)
feec80c3
C
738 info_ifdef_stmt +>
739 List.iter
740 (TV.save_as_comment (function x -> Token_c.CppIfDirective x));
ae4735db 741 (second::rest) +> List.iter
feec80c3 742 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
34e49164 743 end
ae4735db 744
34e49164
C
745 );
746 List.iter find_ifdef_mid xxs
ae4735db 747
34e49164 748 (* no need complex analysis for ifdefbool *)
ae4735db 749 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
34e49164 750 List.iter find_ifdef_mid xxs
ae4735db
C
751
752
34e49164
C
753 )
754
755
756let thresholdFunheaderLimit = 4
757
758(* ifdef defining alternate function header, type *)
759let rec find_ifdef_funheaders = function
760 | [] -> ()
ae4735db 761 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
34e49164
C
762
763 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
ae4735db 764 | Ifdef
34e49164
C
765 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
766 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
ae4735db 767 ], info_ifdef_stmt
34e49164
C
768 )
769 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
ae4735db 770 ::xs
34e49164
C
771 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
772 List.length ifdefblock2 <= thresholdFunheaderLimit
ae4735db 773 ->
34e49164 774 find_ifdef_funheaders xs;
485bce71
C
775
776 msg_ifdef_funheaders ();
feec80c3
C
777 info_ifdef_stmt +>
778 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
34e49164 779 let all_toks = [xline2] @ line2 in
feec80c3
C
780 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError) ;
781 ifdefblock2 +> iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
782
783 (* ifdef with nested ifdef *)
ae4735db 784 | Ifdef
34e49164 785 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
ae4735db 786 [Ifdef
34e49164
C
787 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
788 [NotIfdefLine (({col = 0} as xline3)::line3)];
789 ], info_ifdef_stmt2
790 )
791 ]
ae4735db 792 ], info_ifdef_stmt
34e49164
C
793 )
794 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
795 ::xs
796 ->
34e49164 797 find_ifdef_funheaders xs;
485bce71
C
798
799 msg_ifdef_funheaders ();
feec80c3
C
800 info_ifdef_stmt +>
801 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
802 info_ifdef_stmt2 +>
803 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
34e49164 804 let all_toks = [xline2;xline3] @ line2 @ line3 in
feec80c3 805 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
806
807 (* ifdef with elseif *)
ae4735db 808 | Ifdef
34e49164
C
809 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
810 [NotIfdefLine (({col = 0} as xline2)::line2)];
811 [NotIfdefLine (({col = 0} as xline3)::line3)];
ae4735db 812 ], info_ifdef_stmt
34e49164
C
813 )
814 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
815 ::xs
816 ->
34e49164 817 find_ifdef_funheaders xs;
485bce71
C
818
819 msg_ifdef_funheaders ();
feec80c3
C
820 info_ifdef_stmt +>
821 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
34e49164 822 let all_toks = [xline2;xline3] @ line2 @ line3 in
feec80c3 823 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError)
ae4735db 824
485bce71 825 (* recurse *)
ae4735db
C
826 | Ifdef (xxs,info_ifdef_stmt)::xs
827 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
828 List.iter find_ifdef_funheaders xxs;
34e49164 829 find_ifdef_funheaders xs
ae4735db 830
34e49164
C
831
832
485bce71 833(* ?? *)
ae4735db
C
834let rec adjust_inifdef_include xs =
835 xs +> List.iter (function
34e49164 836 | NotIfdefLine _ -> ()
ae4735db
C
837 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
838 xxs +> List.iter (iter_token_ifdef (fun tokext ->
34e49164 839 match tokext.tok with
ae4735db 840 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
34e49164
C
841 inifdef_ref := true;
842 | _ -> ()
843 ));
844 )
845
846
847
34e49164 848
485bce71 849
34e49164
C
850
851
ae4735db
C
852let rec find_ifdef_cparen_else xs =
853 let rec aux xs =
854 xs +> List.iter (function
708f4980 855 | NotIfdefLine _ -> ()
ae4735db
C
856 | Ifdef (xxs, info_ifdef_stmt) ->
857 (match xxs with
abad11c5 858 | [] -> raise (Impossible 92)
708f4980 859 | [first] -> ()
ae4735db 860 | first::second::rest ->
34e49164 861
708f4980 862 (* found a closing ')' just after the #else *)
34e49164 863
708f4980
C
864 (* Too bad ocaml does not support better list pattern matching
865 * a la Prolog-III where can match the end of lists.
866 *)
ae4735db
C
867 let condition =
868 if List.length first = 0 then false
869 else
708f4980
C
870 let last_line = Common.last first in
871 match last_line with
ae4735db
C
872 | NotIfdefLine xs ->
873 if List.length xs = 0 then false
874 else
708f4980
C
875 let last_tok = Common.last xs in
876 TH.is_cpar last_tok.tok
ae4735db 877 | Ifdef _ | Ifdefbool _ -> false
708f4980
C
878 in
879 if condition then begin
880 msg_ifdef_cparen_else();
34e49164 881
708f4980 882 (* keep only first, treat the rest as comment *)
feec80c3
C
883 info_ifdef_stmt +>
884 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
ae4735db 885 (second::rest) +> List.iter
feec80c3 886 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
708f4980 887 end
ae4735db 888
708f4980
C
889 );
890 List.iter aux xxs
ae4735db 891
708f4980 892 (* no need complex analysis for ifdefbool *)
ae4735db 893 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
708f4980
C
894 List.iter aux xxs
895 )
896 in aux xs
34e49164
C
897
898
708f4980
C
899(* ------------------------------------------------------------------------- *)
900(* cpp-builtin part2, macro, using standard.h or other defs *)
901(* ------------------------------------------------------------------------- *)
34e49164 902
ae4735db 903(* now in cpp_token_c.ml *)
34e49164
C
904
905(* ------------------------------------------------------------------------- *)
906(* stringification *)
907(* ------------------------------------------------------------------------- *)
908
ae4735db 909let rec find_string_macro_paren xs =
34e49164
C
910 match xs with
911 | [] -> ()
ae4735db
C
912 | Parenthised(xxs, info_parens)::xs ->
913 xxs +> List.iter (fun xs ->
914 if xs +> List.exists
485bce71 915 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
ae4735db
C
916 xs +> List.for_all
917 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
34e49164
C
918 true | _ -> false)
919 then
ae4735db 920 xs +> List.iter (fun tok ->
34e49164 921 match tok with
ae4735db 922 | PToken({tok = TIdent (s,_)} as id) ->
1b9ae606 923
34e49164 924 msg_stringification s;
485bce71 925 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
926 | _ -> ()
927 )
ae4735db 928 else
34e49164
C
929 find_string_macro_paren xs
930 );
931 find_string_macro_paren xs
ae4735db 932 | PToken(tok)::xs ->
34e49164 933 find_string_macro_paren xs
ae4735db 934
34e49164
C
935
936(* ------------------------------------------------------------------------- *)
937(* macro2 *)
938(* ------------------------------------------------------------------------- *)
939
940(* don't forget to recurse in each case *)
ae4735db 941let rec find_macro_paren xs =
34e49164
C
942 match xs with
943 | [] -> ()
ae4735db 944
34e49164
C
945 (* attribute *)
946 | PToken ({tok = Tattribute _} as id)
947 ::Parenthised (xxs,info_parens)
948 ::xs
ae4735db 949 ->
34e49164 950 pr2_cpp ("MACRO: __attribute detected ");
ae4735db 951 [Parenthised (xxs, info_parens)] +>
feec80c3
C
952 iter_token_paren (TV.set_as_comment Token_c.CppAttr);
953 TV.set_as_comment Token_c.CppAttr id;
34e49164
C
954 find_macro_paren xs
955
978fd7e5
C
956 | PToken ({tok = TattributeNoarg _} as id)
957 ::xs
ae4735db 958 ->
978fd7e5 959 pr2_cpp ("MACRO: __attributenoarg detected ");
feec80c3 960 TV.set_as_comment Token_c.CppAttr id;
978fd7e5
C
961 find_macro_paren xs
962
485bce71 963(*
708f4980 964 (* attribute cpp, __xxx id *)
485bce71 965 | PToken ({tok = TIdent (s,i1)} as id)
708f4980 966 ::PToken ({tok = TIdent (s2, i2)} as id2)
485bce71 967 ::xs when s ==~ regexp_annot
ae4735db 968 ->
485bce71
C
969 msg_attribute s;
970 id.tok <- TMacroAttr (s, i1);
708f4980 971 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
485bce71 972
708f4980
C
973 (* attribute cpp, id __xxx *)
974 | PToken ({tok = TIdent (s,i1)} as _id)
975 ::PToken ({tok = TIdent (s2, i2)} as id2)
976 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
ae4735db 977 ->
485bce71 978 msg_attribute s2;
708f4980
C
979 id2.tok <- TMacroAttr (s2, i2);
980 find_macro_paren xs
981
982 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
983 ::PToken ({tok = TIdent (s,i1)} as attr)
984 ::xs when s ==~ regexp_annot
ae4735db 985 ->
708f4980
C
986 pr2_cpp ("storage attribute: " ^ s);
987 attr.tok <- TMacroAttrStorage (s,i1);
988 (* recurse, may have other storage attributes *)
989 find_macro_paren (PToken (tok1)::xs)
ae4735db 990
485bce71
C
991*)
992
993 (* storage attribute *)
994 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
ae4735db
C
995 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
996 ->
485bce71
C
997 pr2_cpp ("storage attribute: " ^ s);
998 attr.tok <- TMacroAttrStorage (s,i1);
999 (* recurse, may have other storage attributes *)
1000 find_macro_paren (PToken (tok1)::xs)
708f4980 1001
485bce71 1002
34e49164 1003 (* stringification
ae4735db 1004 *
34e49164 1005 * the order of the matching clause is important
ae4735db 1006 *
34e49164
C
1007 *)
1008
1009 (* string macro with params, before case *)
485bce71 1010 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
34e49164 1011 ::Parenthised (xxs, info_parens)
ae4735db 1012 ::xs ->
485bce71
C
1013
1014 msg_stringification_params s;
1015 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1016 [Parenthised (xxs, info_parens)] +>
feec80c3 1017 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
34e49164
C
1018 find_macro_paren xs
1019
1020 (* after case *)
1021 | PToken ({tok = TIdent (s,_)} as id)
1022 ::Parenthised (xxs, info_parens)
485bce71 1023 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1024 ::xs ->
485bce71
C
1025
1026 msg_stringification_params s;
1027 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1028 [Parenthised (xxs, info_parens)] +>
feec80c3 1029 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
34e49164
C
1030 find_macro_paren xs
1031
1032
1033 (* for the case where the string is not inside a funcall, but
1034 * for instance in an initializer.
1035 *)
ae4735db 1036
34e49164 1037 (* string macro variable, before case *)
485bce71 1038 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1b9ae606 1039 ::xs when not !Flag.c_plus_plus ->
485bce71 1040
34e49164 1041 msg_stringification s;
485bce71 1042 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1043 find_macro_paren xs
1044
1045 (* after case *)
485bce71
C
1046 | PToken ({tok = TIdent (s,_)} as id)
1047 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1048 ::xs ->
485bce71 1049
34e49164 1050 msg_stringification s;
485bce71 1051 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1052 find_macro_paren xs
1053
1054
ae4735db 1055
34e49164
C
1056
1057
1058 (* recurse *)
ae4735db
C
1059 | (PToken x)::xs -> find_macro_paren xs
1060 | (Parenthised (xxs, info_parens))::xs ->
34e49164
C
1061 xxs +> List.iter find_macro_paren;
1062 find_macro_paren xs
1063
1064
1065
1066
1067
1068(* don't forget to recurse in each case *)
ae4735db 1069let rec find_macro_lineparen xs =
34e49164
C
1070 match xs with
1071 | [] -> ()
1072
1073 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
ae4735db 1074 | (Line
34e49164
C
1075 (
1076 [PToken ({tok = Tstatic _});
1077 PToken ({tok = TIdent (s,_)} as macro);
1078 Parenthised (xxs,info_parens);
1079 PToken ({tok = TPtVirg _});
ae4735db 1080 ]
34e49164 1081 ))
ae4735db
C
1082 ::xs
1083 when (s ==~ regexp_macro) ->
485bce71 1084
34e49164
C
1085 msg_declare_macro s;
1086 let info = TH.info_of_tok macro.tok in
1087 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1088
1089 find_macro_lineparen (xs)
1090
1091 (* the static const case *)
ae4735db 1092 | (Line
34e49164
C
1093 (
1094 [PToken ({tok = Tstatic _});
1095 PToken ({tok = Tconst _} as const);
1096 PToken ({tok = TIdent (s,_)} as macro);
1097 Parenthised (xxs,info_parens);
1098 PToken ({tok = TPtVirg _});
ae4735db 1099 ]
34e49164
C
1100 (*as line1*)
1101
1102 ))
ae4735db
C
1103 ::xs
1104 when (s ==~ regexp_macro) ->
485bce71 1105
34e49164
C
1106 msg_declare_macro s;
1107 let info = TH.info_of_tok macro.tok in
1108 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
ae4735db
C
1109
1110 (* need retag this const, otherwise ambiguity in grammar
34e49164
C
1111 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1112 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1113 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1114 storage_class_spec : Tstatic . (137)
1115 *)
1116 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1117
1118 find_macro_lineparen (xs)
1119
1120
1121 (* same but without trailing ';'
ae4735db 1122 *
34e49164
C
1123 * I do not put the final ';' because it can be on a multiline and
1124 * because of the way mk_line is coded, we will not have access to
1125 * this ';' on the next line, even if next to the ')' *)
ae4735db 1126 | (Line
34e49164
C
1127 ([PToken ({tok = Tstatic _});
1128 PToken ({tok = TIdent (s,_)} as macro);
1129 Parenthised (xxs,info_parens);
ae4735db 1130 ]
34e49164 1131 ))
ae4735db
C
1132 ::xs
1133 when s ==~ regexp_macro ->
34e49164
C
1134
1135 msg_declare_macro s;
1136 let info = TH.info_of_tok macro.tok in
1137 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1138
1139 find_macro_lineparen (xs)
1140
1141
34e49164 1142 (* on multiple lines *)
ae4735db 1143 | (Line
34e49164
C
1144 (
1145 (PToken ({tok = Tstatic _})::[]
1146 )))
ae4735db 1147 ::(Line
34e49164
C
1148 (
1149 [PToken ({tok = TIdent (s,_)} as macro);
1150 Parenthised (xxs,info_parens);
1151 PToken ({tok = TPtVirg _});
1152 ]
ae4735db 1153 )
34e49164 1154 )
ae4735db
C
1155 ::xs
1156 when (s ==~ regexp_macro) ->
485bce71 1157
34e49164
C
1158 msg_declare_macro s;
1159 let info = TH.info_of_tok macro.tok in
1160 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1161
1162 find_macro_lineparen (xs)
1163
1164
d6ce1786
C
1165 | (Line (* initializer case *)
1166 (
1167 PToken ({tok = Tstatic _}) ::
1168 PToken ({tok = TIdent (s,_)} as macro) ::
1169 Parenthised (xxs,info_parens) ::
1170 PToken ({tok = TEq _}) :: rest
1171 ))
1172 ::xs
1173 when (s ==~ regexp_macro) ->
1174
1175 msg_declare_macro s;
1176 let info = TH.info_of_tok macro.tok in
1177 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1178
1179 (* continue with the rest of the line *)
1180 find_macro_lineparen ((Line(rest))::xs)
1181
1182
1183 | (Line (* multi-line initializer case *)
1184 (
1185 (PToken ({tok = Tstatic _})::[]
1186 )))
1187 ::(Line
1188 (
1189 PToken ({tok = Tstatic _}) ::
1190 PToken ({tok = TIdent (s,_)} as macro) ::
1191 Parenthised (xxs,info_parens) ::
1192 PToken ({tok = TEq _}) :: rest
1193 ))
1194 ::xs
1195 when (s ==~ regexp_macro) ->
1196
1197 msg_declare_macro s;
1198 let info = TH.info_of_tok macro.tok in
1199 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1200
1201 (* continue with the rest of the line *)
1202 find_macro_lineparen ((Line(rest))::xs)
1203
1204
ae4735db
C
1205 (* linuxext: ex: DECLARE_BITMAP();
1206 *
34e49164
C
1207 * Here I use regexp_declare and not regexp_macro because
1208 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1209 * Here we don't have the preceding 'static' so only way to
1210 * not have positive is to restrict to .*DECLARE.* macros.
1211 *
1212 * but there is a grammar rule for that, so don't need this case anymore
0708f913 1213 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
34e49164
C
1214 * on a argument_list
1215 *)
ae4735db
C
1216
1217 | (Line
34e49164
C
1218 ([PToken ({tok = TIdent (s,_)} as macro);
1219 Parenthised (xxs,info_parens);
1220 PToken ({tok = TPtVirg _});
1221 ]
1222 ))
ae4735db
C
1223 ::xs
1224 when (s ==~ regexp_declare) ->
34e49164
C
1225
1226 msg_declare_macro s;
1227 let info = TH.info_of_tok macro.tok in
1228 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1229
1230 find_macro_lineparen (xs)
1231
ae4735db 1232
34e49164
C
1233 (* toplevel macros.
1234 * module_init(xxx)
ae4735db 1235 *
34e49164
C
1236 * Could also transform the TIdent in a TMacroTop but can have false
1237 * positive, so easier to just change the TCPar and so just solve
1238 * the end-of-stream pb of ocamlyacc
1239 *)
ae4735db 1240 | (Line
34e49164
C
1241 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1242 Parenthised (xxs,info_parens);
1243 ] as _line1
1244 ))
b1b2de81 1245 ::xs when col1 =|= 0
ae4735db
C
1246 ->
1247 let condition =
34e49164
C
1248 (* to reduce number of false positive *)
1249 (match xs with
ae4735db 1250 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
b1b2de81 1251 TH.is_eof other.tok || (col2 =|= 0 &&
34e49164
C
1252 (match other.tok with
1253 | TOBrace _ -> false (* otherwise would match funcdecl *)
1254 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1255 | TPtVirg _
34e49164 1256 | TDotDot _
288a5454 1257 | TWhy _
34e49164
C
1258 -> false
1259 | tok when TH.is_binary_operator tok -> false
ae4735db 1260
34e49164
C
1261 | _ -> true
1262 )
1263 )
1264 | _ -> false
1265 )
1266 in
1267 if condition
1268 then begin
485bce71 1269
34e49164
C
1270 msg_macro_toplevel_noptvirg s;
1271 (* just to avoid the end-of-stream pb of ocamlyacc *)
1272 let tcpar = Common.last info_parens in
1273 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
ae4735db 1274
34e49164 1275 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
ae4735db 1276
34e49164
C
1277 end;
1278
1279 find_macro_lineparen (xs)
1280
1281
1282
ae4735db 1283 (* macro with parameters
34e49164
C
1284 * ex: DEBUG()
1285 * return x;
1286 *)
ae4735db 1287 | (Line
34e49164
C
1288 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1289 Parenthised (xxs,info_parens);
1290 ] as _line1
1291 ))
ae4735db 1292 ::(Line
34e49164
C
1293 (PToken ({col = col2 } as other)::restline2
1294 ) as line2)
ae4735db 1295 ::xs
34e49164 1296 (* when s ==~ regexp_macro *)
ae4735db 1297 ->
97111a47
C
1298 (* This can give a false positive for K&R functions if the function
1299 name is in the same column as the first parameter declaration. *)
ae4735db
C
1300 let condition =
1301 (col1 =|= col2 &&
34e49164
C
1302 (match other.tok with
1303 | TOBrace _ -> false (* otherwise would match funcdecl *)
1304 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1305 | TPtVirg _
34e49164 1306 | TDotDot _
288a5454 1307 | TWhy _
34e49164
C
1308 -> false
1309 | tok when TH.is_binary_operator tok -> false
1310
1311 | _ -> true
1312 )
ae4735db
C
1313 )
1314 ||
34e49164 1315 (col2 <= col1 &&
113803cf 1316 (match other.tok, restline2 with
b1b2de81 1317 | TCBrace _, _ when ctx =*= InFunction -> true
113803cf
C
1318 | Treturn _, _ -> true
1319 | Tif _, _ -> true
1320 | Telse _, _ -> true
1321
1322 (* case of label, usually put in first line *)
ae4735db 1323 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
113803cf
C
1324 true
1325
34e49164
C
1326
1327 | _ -> false
1328 )
1329 )
1330
1331 in
ae4735db 1332
34e49164 1333 if condition
ae4735db 1334 then
b1b2de81 1335 if col1 =|= 0 then ()
34e49164
C
1336 else begin
1337 msg_macro_noptvirg s;
485bce71 1338 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
ae4735db 1339 [Parenthised (xxs, info_parens)] +>
feec80c3 1340 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
34e49164
C
1341 end;
1342
1343 find_macro_lineparen (line2::xs)
ae4735db
C
1344
1345 (* linuxext:? single macro
34e49164
C
1346 * ex: LOCK
1347 * foo();
1348 * UNLOCK
ae4735db 1349 *
113803cf 1350 * todo: factorize code with previous rule ?
34e49164 1351 *)
ae4735db 1352 | (Line
34e49164
C
1353 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1354 ] as _line1
1355 ))
ae4735db 1356 ::(Line
34e49164
C
1357 (PToken ({col = col2 } as other)::restline2
1358 ) as line2)
ae4735db 1359 ::xs ->
34e49164 1360 (* when s ==~ regexp_macro *)
ae4735db
C
1361
1362 let condition =
1363 (col1 =|= col2 &&
34e49164
C
1364 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1365 (match other.tok with
ae4735db
C
1366 | TPtVirg _ -> false
1367 | TOr _ -> false
34e49164 1368 | TCBrace _ when ctx <> InFunction -> false
288a5454 1369 | TWhy _ -> false
34e49164
C
1370 | tok when TH.is_binary_operator tok -> false
1371
1372 | _ -> true
1373 )) ||
1374 (col2 <= col1 &&
1375 (match other.tok with
b1b2de81 1376 | TCBrace _ when ctx =*= InFunction -> true
34e49164
C
1377 | Treturn _ -> true
1378 | Tif _ -> true
1379 | Telse _ -> true
1380 | _ -> false
1381 ))
1382 in
ae4735db 1383
34e49164
C
1384 if condition
1385 then begin
1386 msg_macro_noptvirg_single s;
485bce71 1387 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
34e49164
C
1388 end;
1389 find_macro_lineparen (line2::xs)
ae4735db
C
1390
1391 | x::xs ->
34e49164
C
1392 find_macro_lineparen xs
1393
1394
485bce71
C
1395
1396(* ------------------------------------------------------------------------- *)
1397(* define tobrace init *)
1398(* ------------------------------------------------------------------------- *)
1399
ae4735db
C
1400let rec find_define_init_brace_paren xs =
1401 let rec aux xs =
485bce71
C
1402 match xs with
1403 | [] -> ()
1404
1405 (* mainly for firefox *)
1406 | (PToken {tok = TDefine _})
1407 ::(PToken {tok = TIdentDefine (s,_)})
1408 ::(PToken ({tok = TOBrace i1} as tokbrace))
1409 ::(PToken tok2)
1410 ::(PToken tok3)
ae4735db 1411 ::xs ->
485bce71
C
1412 let is_init =
1413 match tok2.tok, tok3.tok with
1414 | TInt _, TComma _ -> true
1415 | TString _, TComma _ -> true
1416 | TIdent _, TComma _ -> true
1417 | _ -> false
ae4735db 1418
485bce71
C
1419 in
1420 if is_init
ae4735db 1421 then begin
485bce71
C
1422 pr2_cpp("found define initializer: " ^s);
1423 tokbrace.tok <- TOBraceDefineInit i1;
1424 end;
1425
1426 aux xs
1427
1428 (* mainly for linux, especially in sound/ *)
1429 | (PToken {tok = TDefine _})
1430 ::(PToken {tok = TIdentDefine (s,_)})
1431 ::(Parenthised(xxx, info_parens))
1432 ::(PToken ({tok = TOBrace i1} as tokbrace))
1433 ::(PToken tok2)
1434 ::(PToken tok3)
ae4735db 1435 ::xs ->
485bce71
C
1436 let is_init =
1437 match tok2.tok, tok3.tok with
1438 | TInt _, TComma _ -> true
1439 | TDot _, TIdent _ -> true
1440 | TIdent _, TComma _ -> true
1441 | _ -> false
ae4735db 1442
485bce71
C
1443 in
1444 if is_init
ae4735db 1445 then begin
485bce71
C
1446 pr2_cpp("found define initializer with param: " ^ s);
1447 tokbrace.tok <- TOBraceDefineInit i1;
1448 end;
1449
1450 aux xs
1451
ae4735db 1452
485bce71
C
1453
1454 (* recurse *)
ae4735db
C
1455 | (PToken x)::xs -> aux xs
1456 | (Parenthised (xxs, info_parens))::xs ->
485bce71 1457 (* not need for tobrace init:
ae4735db 1458 * xxs +> List.iter aux;
485bce71
C
1459 *)
1460 aux xs
1461 in
1462 aux xs
1463
1464
34e49164
C
1465(* ------------------------------------------------------------------------- *)
1466(* action *)
1467(* ------------------------------------------------------------------------- *)
1468
708f4980 1469(* obsolete now with macro expansion ? get some regression if comment.
ae4735db 1470 * todo: if do bad decision here, then it can influence other phases
708f4980 1471 * and make it hard to parse. So maybe when have a parse error, should
ae4735db 1472 * undo some of the guess those heuristics have done, and restore
708f4980
C
1473 * the original token value.
1474 *)
1475
34e49164
C
1476let rec find_actions = function
1477 | [] -> ()
1478
1479 | PToken ({tok = TIdent (s,ii)})
1480 ::Parenthised (xxs,info_parens)
ae4735db 1481 ::xs ->
34e49164
C
1482 find_actions xs;
1483 xxs +> List.iter find_actions;
1484 let modified = find_actions_params xxs in
ae4735db 1485 if modified
34e49164 1486 then msg_macro_higher_order s
ae4735db
C
1487
1488 | x::xs ->
34e49164
C
1489 find_actions xs
1490
ae4735db
C
1491and find_actions_params xxs =
1492 xxs +> List.fold_left (fun acc xs ->
34e49164 1493 let toks = tokens_of_paren xs in
ae4735db
C
1494 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1495 (* undo: && List.length toks > 1
708f4980
C
1496 * good for sparse, not good for linux
1497 *)
34e49164 1498 then begin
ae4735db 1499 xs +> iter_token_paren (fun x ->
34e49164 1500 if TH.is_eof x.tok
ae4735db 1501 then
34e49164 1502 (* certainly because paren detection had a pb because of
708f4980 1503 * some ifdef-exp. Do similar additional checking than
feec80c3 1504 * what is done in TV.set_as_comment.
34e49164 1505 *)
708f4980 1506 pr2 "PB: weird, I try to tag an EOF token as an action"
ae4735db 1507 else
708f4980 1508 (* cf tests-bis/no_cpar_macro.c *)
ae4735db
C
1509 if TH.is_eom x.tok
1510 then
708f4980 1511 pr2 "PB: weird, I try to tag an EOM token as an action"
ae4735db 1512 else
708f4980 1513 x.tok <- TAction (TH.info_of_tok x.tok);
34e49164
C
1514 );
1515 true (* modified *)
1516 end
1517 else acc
1518 ) false
1519
1520
1521
1522(* ------------------------------------------------------------------------- *)
1523(* main fix cpp function *)
1524(* ------------------------------------------------------------------------- *)
1525
ae4735db 1526let filter_cpp_stuff xs =
951c7801
C
1527 List.filter
1528 (function x ->
1529 (match x.tok with
1530 | tok when TH.is_comment tok -> false
34e49164
C
1531 (* don't want drop the define, or if drop, have to drop
1532 * also its body otherwise the line heuristics may be lost
1533 * by not finding the TDefine in column 0 but by finding
1534 * a TDefineIdent in a column > 0
1535 *)
951c7801
C
1536 | Parser_c.TDefine _ -> true
1537 | tok when TH.is_cpp_instruction tok -> false
1538 | _ -> true
1539 ))
1540 xs
34e49164
C
1541
1542let insert_virtual_positions l =
1543 let strlen x = String.length (Ast_c.str_of_info x) in
708f4980
C
1544 let rec loop prev offset acc = function
1545 [] -> List.rev acc
34e49164
C
1546 | x::xs ->
1547 let ii = TH.info_of_tok x in
1548 let inject pi =
1549 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1550 match Ast_c.pinfo_of_info ii with
1551 Ast_c.OriginTok pi ->
1552 let prev = Ast_c.parse_info_of_info ii in
ae4735db 1553 loop prev (strlen ii) (x::acc) xs
34e49164 1554 | Ast_c.ExpandedTok (pi,_) ->
708f4980 1555 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
ae4735db 1556 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164 1557 | Ast_c.FakeTok (s,_) ->
708f4980 1558 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
ae4735db 1559 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164
C
1560 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1561 let rec skip_fake = function
708f4980 1562 | [] -> []
34e49164
C
1563 | x::xs ->
1564 let ii = TH.info_of_tok x in
1565 match Ast_c.pinfo_of_info ii with
708f4980 1566 | Ast_c.OriginTok pi ->
34e49164 1567 let prev = Ast_c.parse_info_of_info ii in
708f4980
C
1568 let res = loop prev (strlen ii) [] xs in
1569 x::res
34e49164 1570 | _ -> x::skip_fake xs in
ae4735db 1571 skip_fake l
708f4980 1572
485bce71 1573(* ------------------------------------------------------------------------- *)
f59c9fb7 1574
ae4735db 1575let fix_tokens_cpp2 ~macro_defs tokens =
708f4980 1576 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
ae4735db
C
1577
1578 begin
34e49164
C
1579 (* the order is important, if you put the action heuristic first,
1580 * then because of ifdef, can have not closed paren
ae4735db
C
1581 * and so may believe that higher order macro
1582 * and it will eat too much tokens. So important to do
34e49164 1583 * first the ifdef.
ae4735db 1584 *
34e49164
C
1585 * I recompute multiple times cleaner cos the mutable
1586 * can have be changed and so may have more comments
1587 * in the token original list.
ae4735db 1588 *
34e49164
C
1589 *)
1590
0708f913
C
1591 commentize_skip_start_to_end !tokens2;
1592
34e49164 1593 (* ifdef *)
ae4735db
C
1594 let cleaner = !tokens2 +> List.filter (fun x ->
1595 (* is_comment will also filter the TCommentCpp created in
0708f913 1596 * commentize_skip_start_to_end *)
34e49164
C
1597 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1598 ) in
708f4980 1599 let ifdef_grouped = TV.mk_ifdef cleaner in
485bce71
C
1600 set_ifdef_parenthize_info ifdef_grouped;
1601
34e49164
C
1602 find_ifdef_funheaders ifdef_grouped;
1603 find_ifdef_bool ifdef_grouped;
1604 find_ifdef_mid ifdef_grouped;
ae4735db 1605 (* change order ? maybe cparen_else heuristic make some of the funheaders
708f4980
C
1606 * heuristics irrelevant ?
1607 *)
ae4735db 1608 find_ifdef_cparen_else ifdef_grouped;
34e49164
C
1609 adjust_inifdef_include ifdef_grouped;
1610
1611
1612 (* macro 1 *)
1613 let cleaner = !tokens2 +> filter_cpp_stuff in
1614
708f4980
C
1615 let paren_grouped = TV.mk_parenthised cleaner in
1616 Cpp_token_c.apply_macro_defs
ae4735db
C
1617 ~msg_apply_known_macro
1618 ~msg_apply_known_macro_hint
708f4980 1619 macro_defs paren_grouped;
34e49164 1620 (* because the before field is used by apply_macro_defs *)
ae4735db 1621 tokens2 := TV.rebuild_tokens_extented !tokens2;
34e49164
C
1622
1623 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1624 * that after the "ifdef-simplification" phase.
1625 *)
ae4735db 1626 let cleaner = !tokens2 +> List.filter (fun x ->
34e49164
C
1627 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1628 ) in
1629
708f4980 1630 let brace_grouped = TV.mk_braceised cleaner in
34e49164
C
1631 set_context_tag brace_grouped;
1632
34e49164
C
1633 (* macro *)
1634 let cleaner = !tokens2 +> filter_cpp_stuff in
1635
708f4980
C
1636 let paren_grouped = TV.mk_parenthised cleaner in
1637 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
485bce71 1638 find_define_init_brace_paren paren_grouped;
34e49164
C
1639 find_string_macro_paren paren_grouped;
1640 find_macro_lineparen line_paren_grouped;
1641 find_macro_paren paren_grouped;
1642
1643
708f4980 1644 (* obsolete: actions ? not yet *)
34e49164 1645 let cleaner = !tokens2 +> filter_cpp_stuff in
708f4980 1646 let paren_grouped = TV.mk_parenthised cleaner in
34e49164 1647 find_actions paren_grouped;
ae4735db 1648
34e49164
C
1649
1650
708f4980 1651 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
34e49164
C
1652 end
1653
ae4735db 1654let time_hack1 ~macro_defs a =
708f4980 1655 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
34e49164 1656
ae4735db 1657let fix_tokens_cpp ~macro_defs a =
708f4980 1658 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
34e49164 1659
34e49164 1660
34e49164 1661
abad11c5
C
1662let can_be_on_top_level tl =
1663 match tl with
1664 | Tstruct _
1665 | Ttypedef _
1666 | TDefine _
1667 | TIfdef _
1668 | TIfdefelse _
1669 | TIfdefelif _
1670 | TIfdefBool _
1671 | TIfdefMisc _
1672 | TIfdefVersion _
1673 | TEndif _ -> true
1674 | _ -> false
1675
34e49164
C
1676
1677(*****************************************************************************)
1678(* Lexing with lookahead *)
1679(*****************************************************************************)
1680
1681(* Why using yet another parsing_hack technique ? The fix_xxx where do
ae4735db 1682 * some pre-processing on the full list of tokens is not enough ?
34e49164
C
1683 * No cos sometimes we need more contextual info, and even if
1684 * set_context() tries to give some contextual info, it's not completely
1685 * accurate so the following code give yet another alternative, yet another
1686 * chance to transform some tokens.
ae4735db 1687 *
34e49164
C
1688 * todo?: maybe could try to get rid of this technique. Maybe a better
1689 * set_context() would make possible to move this code using a fix_xx
1690 * technique.
ae4735db 1691 *
485bce71 1692 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
34e49164
C
1693 * it is more general to do it via my LALR(k) tech. Because here we can
1694 * transform some token give some context information. So sometimes it
1695 * makes sense to transform a token in one context, sometimes not, and
1696 * lex can not provide us this context information. Note that the order
ae4735db
C
1697 * in the pattern matching in lookahead is important. Do not cut/paste.
1698 *
34e49164
C
1699 * Note that in next there is only "clean" tokens, there is no comment
1700 * or space tokens. This is done by the caller.
ae4735db 1701 *
34e49164
C
1702 *)
1703
485bce71
C
1704open Lexer_parser (* for the fields of lexer_hint type *)
1705
1706let not_struct_enum = function
1707 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1708 | _ -> true
34e49164 1709
1b9ae606
C
1710let pointer ?(followed_by=fun _ -> true)
1711 ?(followed_by_more=fun _ -> true) ts =
1712 let rec loop ts =
1713 match ts with
1714 | TMul _ :: rest -> loop rest
1715 | TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
1716 | t :: ts' -> followed_by t && followed_by_more ts'
1717 | [] -> failwith "unexpected end of token stream" in
1718 match ts with
1719 | TMul _ :: rest -> loop rest
1720 | TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
f59c9fb7 1721 | _ -> false
485bce71 1722
feec80c3 1723let ident = function
17ba0788 1724 TIdent _ -> true
feec80c3 1725 | _ -> false
17ba0788 1726
abad11c5
C
1727let is_type = function
1728 | TypedefIdent _
1729 | Tvoid _
1730 | Tchar _
1731 | Tfloat _
1732 | Tdouble _
1b9ae606 1733
abad11c5
C
1734 | Tsize_t _
1735 | Tssize_t _
1736 | Tptrdiff_t _
1737
1738 | Tint _
1739 | Tlong _
1740 | Tshort _ -> true
1741 | _ -> false
1742
1743let is_cparen = function (TCPar _) -> true | _ -> false
1744let is_oparen = function (TOPar _) -> true | _ -> false
1745
1746let rec not_has_type_before f xs =
1747 match xs with
1748 | [] -> raise (Impossible 666)
1749 | x :: xs ->
1750 if f x then
1751 true
1752 else if is_type x then
1753 false
1754 else
1755 not_has_type_before f xs
1756
97111a47
C
1757(* This function is inefficient, because it will look over a K&R header,
1758or function prototype multiple times. At least when we see a , and are in a
1759parameter list, we know we will eventually see a close paren, and it
1760should come fairly soon. *)
1761let k_and_r l =
abad11c5 1762 let l1 = drop_until is_cparen l in
97111a47
C
1763 match l1 with
1764 (TCPar _) :: (TOCro _) :: _ -> false
1765 | (TCPar _) :: _ -> true
1766 | _ -> false
34e49164 1767
97111a47
C
1768(* (a)(b) is ambiguous, because (a) could be a function name or a cast.
1769At this point, we just see an ident for a; we don't know if it is eg a local
1770variable. This function sees at least if b is the only argument, ie there
1771are no commas at top level *)
1772let paren_before_comma l =
1773 let rec loop level = function
1774 [] -> false
1775 | (TComma _)::_ when level = 1 -> false
1776 | (TCPar _)::_ when level = 1 -> true
1777 | (TCPar _)::rest -> loop (level-1) rest
1778 | (TOPar _)::rest -> loop (level+1) rest
1779 | x::rest -> loop level rest in
1780 loop 0 l
1781
1782let lookahead2 ~pass next before =
34e49164 1783 match (next, before) with
1b9ae606 1784
4dfbc1c2
C
1785 (* c++ hacks *)
1786 (* yy xx( and in function *)
1787 | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_
1788 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1789 pr2_cpp("constructed_object: " ^s);
1790 TOParCplusplusInit i1
1b9ae606
C
1791 | TOPar i1::_, TIdent(s,i2)::ptr
1792 when !Flag.c_plus_plus
1793 && pointer ~followed_by:(function TypedefIdent _ -> true | _ -> false) ptr
1794 && (LP.current_context () = (LP.InFunction)) ->
1795 pr2_cpp("constructed_object: " ^s);
1796 TOParCplusplusInit i1
4dfbc1c2
C
1797 | TypedefIdent(s,i)::TOPar i1::_,_
1798 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1799 TIdent(s,i)
1800
34e49164
C
1801 (*-------------------------------------------------------------*)
1802 (* typedef inference, parse_typedef_fix3 *)
1803 (*-------------------------------------------------------------*)
1804 (* xx xx *)
b1b2de81 1805 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
34e49164
C
1806 && ok_typedef s
1807 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
ae4735db 1808 ->
34e49164
C
1809 (* parse_typedef_fix3:
1810 * acpi_object acpi_object;
ae4735db 1811 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
34e49164
C
1812 * Le parser en interne a deja appelé le prochain token pour pouvoir
1813 * decider des choses.
1814 * => special case in lexer_heuristic, again
1815 *)
ae4735db
C
1816 if !Flag_parsing_c.debug_typedef
1817 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
34e49164 1818
34e49164 1819
abad11c5 1820 LP.disable_typedef();
97111a47 1821 msg_typedef s i1 1; LP.add_typedef_root s;
34e49164
C
1822 TypedefIdent (s, i1)
1823
abad11c5 1824 (* christia *)
abad11c5 1825
1b9ae606
C
1826 (* delete[] *)
1827 | (TOCro i1 :: _, Tdelete _ :: _)
1828 when !Flag.c_plus_plus ->
1829 TCommentCpp (Token_c.CppDirective, i1)
1830 (* delete[] *)
1831 | (TCCro i1 :: _, Tdelete _ :: _)
1832 when !Flag.c_plus_plus ->
1833 TCommentCpp (Token_c.CppDirective, i1)
1834
1835 (* extern "_" tt *)
1836 | ((TString ((s, _), i1) | TMacroString (s, i1)) :: _ , Textern _ :: _)
1837 when !Flag.c_plus_plus ->
1838 TCommentCpp (Token_c.CppDirective, i1)
1839
1840 (* ) const { *)
1841 | (Tconst i1 :: TOBrace _ :: _ , TCPar _ :: _)
1842 when !Flag.c_plus_plus ->
1843 TCommentCpp (Token_c.CppDirective, i1)
1844
1845 (* xx const tt *)
1846 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::type_::_ , _)
1847 when not_struct_enum before
1848 && is_type type_ ->
abad11c5
C
1849 TCommentCpp (Token_c.CppDirective, i1)
1850
1851 (* xx struct *)
1b9ae606
C
1852 | (TIdent (s, i1)::Tstruct _::_ , _)
1853 when not_struct_enum before ->
abad11c5
C
1854 TCommentCpp (Token_c.CppDirective, i1)
1855
1856 (* xx tt *)
1b9ae606
C
1857 | (TIdent (s, i1)::type_::_ , _)
1858 when not_struct_enum before
1859 && is_type type_ ->
abad11c5
C
1860 TCommentCpp (Token_c.CppDirective, i1)
1861
1862 (* tt xx yy *)
1b9ae606
C
1863 | (TIdent (s, i1)::TIdent (s2, i2)::_ , seen::_)
1864 when not_struct_enum before
1865 && is_type seen ->
abad11c5
C
1866 if is_macro s2 then
1867 TIdent (s, i1)
1868 else
1869 TCommentCpp (Token_c.CppDirective, i1)
1870
7d539fd0
BT
1871 (* exception to next rule *)
1872 | (TIdent (s2, i2)::TOPar _::_ , TIdent (s1, i1)::seen::_)
1873 when not_struct_enum before
1874 && is_macro s2 && is_type seen ->
1875 TIdent (s2, i2)
1876
1b9ae606
C
1877 | (TIdent (s2, i2)::_ , TIdent (s, i1)::seen::_)
1878 when not_struct_enum before
1879 && is_macro s2 && is_type seen ->
abad11c5
C
1880 TCommentCpp (Token_c.CppDirective, i2)
1881
1882 (* tt xx * *)
1b9ae606
C
1883 | (TIdent (s, i1)::ptr , seen::_)
1884 when not_struct_enum before
1885 && pointer ptr && is_type seen ->
abad11c5
C
1886 TCommentCpp (Token_c.CppDirective, i1)
1887
1888 (* tt * xx yy *)
1b9ae606
C
1889 | (TIdent (s, i1)::TIdent(s2, i2)::_ , ptr)
1890 when not_struct_enum before
1891 && pointer ptr ->
abad11c5
C
1892 if is_macro s2 then
1893 TIdent (s, i1)
1894 else
1895 TCommentCpp (Token_c.CppDirective, i1)
1896
7d539fd0
BT
1897 (* exception to next rule *)
1898 | (TIdent (s2, i2)::TOPar _::_ , TIdent (s1, i1)::ptr)
1899 when not_struct_enum before
1900 && is_macro s2 && pointer ptr ->
1901 TIdent (s2, i2)
abad11c5 1902 (* tt * xx yy *)
1b9ae606
C
1903 | (TIdent(s2, i2)::_ , TIdent (s, i1)::ptr)
1904 when not_struct_enum before
1905 && is_macro s2 && pointer ptr ->
abad11c5
C
1906 TCommentCpp (Token_c.CppDirective, i2)
1907
1908 (* exception to next rule *)
1b9ae606
C
1909 | (TIdent(s2, i2)::TOPar _ :: _ , TIdent(s, i1)::seen::_)
1910 when not_struct_enum before
1911 && is_macro s2 && is_type seen ->
abad11c5
C
1912 TIdent(s2, i2)
1913 (* tt xx yy *)
1b9ae606
C
1914 | (TIdent(s2, i2)::_ , TIdent(s, i1)::seen::_)
1915 when not_struct_enum before
1916 && is_macro s2 && is_type seen ->
abad11c5
C
1917 TCommentCpp (Token_c.CppDirective, i2)
1918
1919 (* xx * yy AND in paramdecl *)
1b9ae606 1920 | (TIdent (s, i1)::ptr , _)
abad11c5 1921 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1b9ae606
C
1922 && pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
1923 && ok_typedef s ->
abad11c5
C
1924 msg_typedef s i1 14; LP.add_typedef_root s;
1925 TypedefIdent (s, i1)
1926
1b9ae606 1927 (* xx MM ( *)
7d539fd0
BT
1928 (* exception to next rule *)
1929 | (TIdent (s, i1)::TIdent (s2, i2)::TOPar _::_ , _) when not_struct_enum before
1930 && ok_typedef s && is_macro s2
abad11c5
C
1931 ->
1932 TIdent (s, i1)
34e49164 1933 (* xx yy *)
ae4735db 1934 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
34e49164
C
1935 && ok_typedef s
1936 ->
1937 (* && not_annot s2 BUT lead to false positive*)
97111a47 1938 msg_typedef s i1 2; LP.add_typedef_root s;
34e49164
C
1939 TypedefIdent (s, i1)
1940
1941
1942 (* xx inline *)
ae4735db 1943 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
34e49164 1944 && ok_typedef s
ae4735db 1945 ->
abad11c5 1946
97111a47 1947 msg_typedef s i1 3; LP.add_typedef_root s;
34e49164
C
1948 TypedefIdent (s, i1)
1949
1950
1951 (* [,(] xx [,)] AND param decl *)
97111a47 1952 | (TIdent (s, i1)::(((TComma _|TCPar _)::_) as rest) ,
abad11c5 1953 ((TComma _ |TOPar _)::_ as bef))
97111a47 1954 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
abad11c5
C
1955 && k_and_r rest
1956 && not_has_type_before is_cparen rest
1957 && not_has_type_before is_oparen bef
97111a47 1958 ->
abad11c5 1959
97111a47
C
1960 TKRParam(s,i1)
1961
1962 | (TIdent (s, i1)::((TComma _|TCPar _)::_) , (TComma _ |TOPar _)::_ )
b1b2de81 1963 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
34e49164 1964 && ok_typedef s
ae4735db 1965 ->
abad11c5 1966
97111a47
C
1967 msg_typedef s i1 4; LP.add_typedef_root s;
1968 TypedefIdent (s, i1)
34e49164
C
1969
1970 (* xx* [,)] *)
1971 (* specialcase: [,(] xx* [,)] *)
1b9ae606
C
1972 | (TIdent (s, i1)::ptr , (*(TComma _|TOPar _)::*)_ )
1973 when pointer ~followed_by:(function TComma _ |TCPar _ -> true | _ -> false) ptr
1974 && not_struct_enum before
34e49164
C
1975 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1976 && ok_typedef s
ae4735db 1977 ->
abad11c5 1978
97111a47 1979 msg_typedef s i1 5; LP.add_typedef_root s;
34e49164
C
1980 TypedefIdent (s, i1)
1981
1982
1983 (* xx** [,)] *)
1984 (* specialcase: [,(] xx** [,)] *)
1985 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1986 when not_struct_enum before
1987 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1988 && ok_typedef s
ae4735db 1989 ->
abad11c5 1990
97111a47 1991 msg_typedef s i1 6; LP.add_typedef_root s;
34e49164
C
1992 TypedefIdent (s, i1)
1993
1994
1995
1996 (* xx const * USELESS because of next rule ? *)
ae4735db
C
1997 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1998 when not_struct_enum before
34e49164
C
1999 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2000 && ok_typedef s
2001 ->
2002
97111a47 2003 msg_typedef s i1 7; LP.add_typedef_root s;
34e49164 2004 TypedefIdent (s, i1)
ae4735db 2005
34e49164 2006 (* xx const *)
ae4735db
C
2007 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
2008 when not_struct_enum before
34e49164
C
2009 && ok_typedef s
2010 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2011 ->
2012
97111a47 2013 msg_typedef s i1 8; LP.add_typedef_root s;
34e49164
C
2014 TypedefIdent (s, i1)
2015
2016
2017 (* xx * const *)
1b9ae606
C
2018 | (TIdent (s, i1)::ptr , _ )
2019 when pointer ~followed_by:(function Tconst _ | Tvolatile _ | Trestrict _ -> true | _ -> false) ptr
2020 && not_struct_enum before
2021 && ok_typedef s
34e49164 2022 ->
abad11c5 2023
34e49164
C
2024 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2025
97111a47 2026 msg_typedef s i1 9; LP.add_typedef_root s;
34e49164
C
2027 TypedefIdent (s, i1)
2028
2029
2030 (* ( const xx) *)
485bce71 2031 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
34e49164 2032 ok_typedef s ->
abad11c5 2033
97111a47 2034 msg_typedef s i1 10; LP.add_typedef_root s;
34e49164 2035 TypedefIdent (s, i1)
ae4735db 2036
34e49164
C
2037
2038
2039 (* ( xx ) [sizeof, ~] *)
485bce71 2040 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
34e49164
C
2041 when not_struct_enum before
2042 && ok_typedef s
ae4735db 2043 ->
abad11c5 2044
97111a47 2045 msg_typedef s i1 11; LP.add_typedef_root s;
34e49164
C
2046 TypedefIdent (s, i1)
2047
2048 (* [(,] xx [ AND parameterdeclaration *)
2049 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
b1b2de81 2050 when (LP.current_context() =*= LP.InParameter)
34e49164 2051 && ok_typedef s
ae4735db 2052 ->
abad11c5 2053
97111a47 2054 msg_typedef s i1 12; LP.add_typedef_root s;
34e49164 2055 TypedefIdent (s, i1)
ae4735db 2056
34e49164
C
2057 (*------------------------------------------------------------*)
2058 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
2059 (* but if have a '=', or ',' I think not *)
2060 (*------------------------------------------------------------*)
2061
2062 (* static xx * yy *)
1b9ae606 2063 | (TIdent (s, i1)::ptr ,
485bce71 2064 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
1b9ae606
C
2065 pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
2066 && ok_typedef s
34e49164 2067 ->
abad11c5 2068
97111a47 2069 msg_typedef s i1 13; LP.add_typedef_root s;
34e49164 2070 TypedefIdent (s, i1)
ae4735db 2071
34e49164
C
2072 (* TODO xx * yy ; AND in start of compound element *)
2073
2074
2075 (* xx * yy, AND in paramdecl *)
1b9ae606 2076 | (TIdent (s, i1)::ptr , _)
b1b2de81 2077 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1b9ae606
C
2078 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2079 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2080 && ok_typedef s
ae4735db 2081 ->
34e49164 2082
97111a47 2083 msg_typedef s i1 14; LP.add_typedef_root s;
34e49164
C
2084 TypedefIdent (s, i1)
2085
2086
2087 (* xx * yy ; AND in Toplevel, except when have = before *)
2088 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
2089 TIdent (s, i1)
1b9ae606
C
2090 | (TIdent (s, i1)::ptr , _)
2091 when not_struct_enum before
2092 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2093 ~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
2094 && (LP.is_top_or_struct (LP.current_context ()))
ae4735db 2095 ->
abad11c5 2096
97111a47 2097 msg_typedef s i1 15; LP.add_typedef_root s;
34e49164
C
2098 TypedefIdent (s, i1)
2099
2100 (* xx * yy , AND in Toplevel *)
1b9ae606 2101 | (TIdent (s, i1)::ptr , _)
b1b2de81 2102 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
1b9ae606
C
2103 && ok_typedef s
2104 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2105 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
ae4735db 2106 ->
34e49164 2107
97111a47 2108 msg_typedef s i1 16; LP.add_typedef_root s;
34e49164
C
2109 TypedefIdent (s, i1)
2110
2111 (* xx * yy ( AND in Toplevel *)
1b9ae606 2112 | (TIdent (s, i1)::ptr , _)
ae4735db 2113 when not_struct_enum before
1b9ae606
C
2114 && (LP.is_top_or_struct (LP.current_context ()))
2115 && ok_typedef s
2116 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2117 ~followed_by_more:(function TOPar _ :: _ -> true | _ -> false) ptr
34e49164 2118 ->
abad11c5 2119
97111a47 2120 msg_typedef s i1 17; LP.add_typedef_root s;
34e49164 2121 TypedefIdent (s, i1)
ae4735db 2122
34e49164
C
2123 (* xx * yy [ *)
2124 (* todo? enough ? cos in struct def we can have some expression ! *)
1b9ae606
C
2125 | (TIdent (s, i1)::ptr , _)
2126 when not_struct_enum before
2127 && (LP.is_top_or_struct (LP.current_context ()))
2128 && ok_typedef s
2129 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2130 ~followed_by_more:(function TOCro _ :: _ -> true | _ -> false) ptr
ae4735db 2131 ->
abad11c5 2132
97111a47 2133 msg_typedef s i1 18; LP.add_typedef_root s;
34e49164
C
2134 TypedefIdent (s, i1)
2135
2136 (* u16: 10; in struct *)
2137 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
485bce71 2138 when (LP.is_top_or_struct (LP.current_context ()))
ae4735db
C
2139 && ok_typedef s
2140 ->
abad11c5 2141
97111a47 2142 msg_typedef s i1 19; LP.add_typedef_root s;
34e49164 2143 TypedefIdent (s, i1)
ae4735db 2144
34e49164
C
2145
2146 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
2147 (* YES cos at toplevel can have some expression !! for instance when *)
2148 (* enter in the dimension of an array *)
2149 (*
2150 | (TIdent s::TMul::TIdent s2::_ , _)
2151 when (take_safe 1 !passed_tok <> [Tstruct] &&
2152 (take_safe 1 !passed_tok <> [Tenum]))
2153 &&
ae4735db 2154 !LP._lexer_hint = Some LP.Toplevel ->
4dfbc1c2 2155 msg_typedef s 20; LP.add_typedef_root s;
34e49164
C
2156 TypedefIdent s
2157 *)
2158
2159 (* xx * yy = *)
1b9ae606 2160 | (TIdent (s, i1)::ptr , _)
ae4735db 2161 when not_struct_enum before
1b9ae606
C
2162 && ok_typedef s
2163 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2164 ~followed_by_more:(function TEq _ :: _ -> true | _ -> false) ptr
34e49164 2165 ->
abad11c5 2166
97111a47 2167 msg_typedef s i1 21; LP.add_typedef_root s;
34e49164
C
2168 TypedefIdent (s, i1)
2169
2170
2171 (* xx * yy) AND in paramdecl *)
1b9ae606
C
2172 | (TIdent (s, i1)::ptr , _)
2173 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
2174 && ok_typedef s
2175 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2176 ~followed_by_more:(function TCPar _ :: _ -> true | _ -> false) ptr
34e49164 2177 ->
abad11c5 2178
97111a47 2179 msg_typedef s i1 22; LP.add_typedef_root s;
34e49164 2180 TypedefIdent (s, i1)
ae4735db 2181
34e49164
C
2182
2183 (* xx * yy; *) (* wrong ? *)
1b9ae606
C
2184 | (TIdent (s, i1)::ptr ,
2185 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
2186 && ok_typedef s
2187 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2188 ~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
34e49164 2189 ->
abad11c5 2190
97111a47 2191 msg_typedef s i1 23; LP.add_typedef_root s;
485bce71 2192 msg_maybe_dangereous_typedef s;
34e49164
C
2193 TypedefIdent (s, i1)
2194
2195
2196 (* xx * yy, and ';' before xx *) (* wrong ? *)
1b9ae606 2197 | (TIdent (s, i1)::ptr ,
34e49164 2198 (TOBrace _| TPtVirg _)::_) when
1b9ae606
C
2199 ok_typedef s
2200 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2201 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
34e49164 2202 ->
abad11c5 2203
97111a47 2204 msg_typedef s i1 24; LP.add_typedef_root s;
34e49164
C
2205 TypedefIdent (s, i1)
2206
2207
2208 (* xx_t * yy *)
1b9ae606 2209 | (TIdent (s, i1)::ptr , _)
ae4735db
C
2210 when s ==~ regexp_typedef && not_struct_enum before
2211 (* struct user_info_t sometimes *)
1b9ae606
C
2212 && ok_typedef s
2213 && pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
ae4735db 2214 ->
abad11c5 2215
97111a47 2216 msg_typedef s i1 25; LP.add_typedef_root s;
34e49164
C
2217 TypedefIdent (s, i1)
2218
2219 (* xx ** yy *) (* wrong ? *)
2220 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
1b9ae606
C
2221 when not_struct_enum before
2222 && (LP.current_context() =*= LP.InParameter)
2223 && ok_typedef s
2224 ->
2225
2226 msg_typedef s i1 26; LP.add_typedef_root s;
2227 TypedefIdent (s, i1)
2228
2229 (* xx ** yy *) (* wrong ? *)
2230 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , (TOBrace _ | TPtVirg _)::_)
34e49164
C
2231 when not_struct_enum before
2232 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
abad11c5
C
2233 && ok_typedef s
2234 (* christia : this code catches 'a * *b' which is wrong
2235 *)
abad11c5
C
2236 ->
2237
97111a47 2238 msg_typedef s i1 26; LP.add_typedef_root s;
34e49164
C
2239 TypedefIdent (s, i1)
2240
2241 (* xx *** yy *)
2242 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
ae4735db
C
2243 when not_struct_enum before
2244 && ok_typedef s
34e49164
C
2245 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2246 ->
abad11c5 2247
97111a47 2248 msg_typedef s i1 27; LP.add_typedef_root s;
34e49164
C
2249 TypedefIdent (s, i1)
2250
2251 (* xx ** ) *)
2252 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
ae4735db 2253 when not_struct_enum before
34e49164 2254 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 2255 && ok_typedef s
34e49164 2256 ->
abad11c5 2257
97111a47 2258 msg_typedef s i1 28; LP.add_typedef_root s;
34e49164
C
2259 TypedefIdent (s, i1)
2260
2261
2262
2263 (* ----------------------------------- *)
ae4735db 2264 (* old: why not do like for other rules and start with TIdent ?
485bce71
C
2265 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
2266 * new: prefer now start with TIdent because otherwise the add_typedef_root
2267 * may have no effect if in second pass or if have disable the add_typedef.
2268 *)
34e49164
C
2269
2270 (* (xx) yy *)
feec80c3 2271 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
ae4735db 2272 (TOPar info)::x::_)
17ba0788
C
2273 when not (TH.is_stuff_taking_parenthized x) (* &&
2274 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful?
2275 *)
ae4735db 2276 && ok_typedef s
feec80c3 2277 && not (ident x) (* possible K&R declaration *)
ae4735db 2278 ->
abad11c5 2279
97111a47 2280 msg_typedef s i1 29; LP.add_typedef_root s;
485bce71
C
2281 (*TOPar info*)
2282 TypedefIdent (s, i1)
34e49164
C
2283
2284
ae4735db 2285 (* (xx) ( yy)
91eba41f
C
2286 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
2287 *)
2288 (*
ae4735db
C
2289 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2290 when not (TH.is_stuff_taking_parenthized x)
2291 && ok_typedef s
34e49164 2292 ->
4dfbc1c2 2293 msg_typedef s 30; LP.add_typedef_root s;
485bce71
C
2294 (* TOPar info *)
2295 TypedefIdent (s, i1)
91eba41f
C
2296 *)
2297 (* special case: = (xx) ( yy) *)
97111a47 2298 | (TIdent (s, i1)::TCPar _::((TOPar _::_) as rest) ,
91eba41f 2299 (TOPar info)::(TEq _ |TEqEq _)::_)
97111a47 2300 when ok_typedef s && paren_before_comma rest
91eba41f 2301 ->
abad11c5 2302
97111a47 2303 msg_typedef s i1 31; LP.add_typedef_root s;
91eba41f
C
2304 (* TOPar info *)
2305 TypedefIdent (s, i1)
2306
34e49164
C
2307
2308 (* (xx * ) yy *)
1b9ae606
C
2309 | (TIdent (s, i1)::ptr, (TOPar info)::_)
2310 when ok_typedef s
2311 && pointer ~followed_by:(function TCPar _ -> true | _ -> false)
2312 ~followed_by_more:(function TIdent _ :: _ -> true | _ -> false) ptr
ae4735db 2313 ->
abad11c5 2314
97111a47 2315 msg_typedef s i1 32; LP.add_typedef_root s;
485bce71
C
2316 (*TOPar info*)
2317 TypedefIdent (s,i1)
2318
34e49164
C
2319
2320 (* (xx){ ... } constructor *)
ae4735db
C
2321 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
2322 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2323 && ok_typedef s
34e49164 2324 ->
abad11c5 2325
97111a47 2326 msg_typedef s i1 33; LP.add_typedef_root s;
34e49164
C
2327 TypedefIdent (s, i1)
2328
2329
2330 (* can have sizeof on expression
ae4735db 2331 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
708f4980 2332 msg_typedef s; LP.add_typedef_root s;
34e49164
C
2333 Tsizeof
2334 *)
91eba41f
C
2335
2336
2337 (* ----------------------------------- *)
2338 (* x ( *y )(params), function pointer *)
ae4735db 2339 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
34e49164 2340 when not_struct_enum before
ae4735db 2341 && ok_typedef s
34e49164 2342 ->
abad11c5 2343
97111a47 2344 msg_typedef s i1 34; LP.add_typedef_root s;
34e49164
C
2345 TypedefIdent (s, i1)
2346
91eba41f 2347 (* x* ( *y )(params), function pointer 2 *)
ae4735db 2348 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
91eba41f 2349 when not_struct_enum before
ae4735db 2350 && ok_typedef s
91eba41f 2351 ->
abad11c5 2352
97111a47 2353 msg_typedef s i1 35; LP.add_typedef_root s;
91eba41f
C
2354 TypedefIdent (s, i1)
2355
34e49164
C
2356
2357 (*-------------------------------------------------------------*)
2358 (* CPP *)
2359 (*-------------------------------------------------------------*)
485bce71
C
2360 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2361 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
34e49164 2362 as x)
ae4735db
C
2363 ::_, _
2364 ->
abad11c5 2365
485bce71 2366 (*
ae4735db 2367 if not !Flag_parsing_c.ifdef_to_if
feec80c3 2368 then TCommentCpp (Ast_c.CppIfDirective, ii)
ae4735db 2369 else
485bce71 2370 *)
0708f913
C
2371 (* not !LP._lexer_hint.toplevel *)
2372 if !Flag_parsing_c.ifdef_directive_passing
708f4980 2373 || (pass >= 2)
0708f913 2374 then begin
ae4735db 2375
b1b2de81 2376 if (LP.current_context () =*= LP.InInitializer)
ae4735db 2377 then begin
0708f913
C
2378 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2379 incr Stat.nIfdefInitializer;
ae4735db 2380 end else begin
708f4980 2381 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
0708f913
C
2382 incr Stat.nIfdefPassing;
2383 end;
feec80c3
C
2384 let x =
2385 match x with
2386 TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef
2387 | TIfdefBool _ -> Token_c.IfDef0
2388 | TIfdefelse _ | TIfdefelif _ -> Token_c.Else
2389 | TEndif _ -> Token_c.Endif
2390 | _ -> Token_c.Other in (* not possible here *)
2391 TCommentCpp (Token_c.CppIfDirective x, ii)
0708f913
C
2392 end
2393 else x
ae4735db 2394
3a314143 2395 | (TUndef (ii) as x)::_, _
ae4735db 2396 ->
708f4980 2397 if (pass >= 2)
485bce71 2398 then begin
0708f913
C
2399 pr2_cpp("UNDEF: I treat it as comment");
2400 TCommentCpp (Token_c.CppDirective, ii)
113803cf
C
2401 end
2402 else x
2403
ae4735db
C
2404 | (TCppDirectiveOther (ii) as x)::_, _
2405 ->
708f4980 2406 if (pass >= 2)
113803cf 2407 then begin
0708f913
C
2408 pr2_cpp ("OTHER directive: I treat it as comment");
2409 TCommentCpp (Token_c.CppDirective, ii)
485bce71
C
2410 end
2411 else x
34e49164
C
2412
2413 (* If ident contain a for_each, then certainly a macro. But to be
2414 * sure should look if there is a '{' after the ')', but it requires
2415 * to count the '('. Because this can be expensive, we do that only
ae4735db 2416 * when the token contains "for_each".
34e49164 2417 *)
ae4735db 2418 | (TIdent (s, i1)::TOPar _::rest, _)
b1b2de81 2419 when not (LP.current_context () =*= LP.InTopLevel)
ae4735db
C
2420 (* otherwise a function such as static void loopback_enable(int i) {
2421 * will be considered as a loop
34e49164
C
2422 *)
2423 ->
2424
ae4735db 2425 if s ==~ regexp_foreach &&
34e49164 2426 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
ae4735db 2427
34e49164
C
2428 then begin
2429 msg_foreach s;
2430 TMacroIterator (s, i1)
2431 end
2432 else TIdent (s, i1)
2433
abad11c5
C
2434(* (* christia: here insert support for macros on top level *)
2435 | TIdent (s, ii) :: tl :: _, _ when
2436 can_be_on_top_level tl && LP.current_context () = InTopLevel ->
2437 pr2_cpp ("'" ^ s ^ "' looks like a macro, I treat it as comment");
2438 TCommentCpp (Token_c.CppDirective, ii)
2439*)
2440
2441(*-------------------------------------------------------------*)
34e49164 2442 | v::xs, _ -> v
abad11c5 2443 | _ -> raise (Impossible 93)
34e49164 2444
ae4735db 2445let lookahead ~pass a b =
485bce71 2446 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
34e49164
C
2447
2448