Release coccinelle-0.2.5-rc2
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
708f4980 19module TV = Token_views_c
34e49164
C
20module LP = Lexer_parser
21
485bce71 22module Stat = Parsing_stat
34e49164 23
ae4735db 24open Parser_c
34e49164 25
ae4735db 26open TV
708f4980 27
34e49164
C
28(*****************************************************************************)
29(* Some debugging functions *)
30(*****************************************************************************)
31
708f4980 32let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
113803cf 33
ae4735db 34let pr2_cpp s =
34e49164
C
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once ("CPP-" ^ s)
37
38
ae4735db 39let msg_gen cond is_known printer s =
34e49164
C
40 if cond
41 then
42 if not (!Flag_parsing_c.filter_msg)
43 then printer s
44 else
45 if not (is_known s)
46 then printer s
ae4735db 47
34e49164 48
485bce71
C
49(* In the following, there are some harcoded names of types or macros
50 * but they are not used by our heuristics! They are just here to
51 * enable to detect false positive by printing only the typedef/macros
52 * that we don't know yet. If we print everything, then we can easily
53 * get lost with too much verbose tracing information. So those
54 * functions "filter" some messages. So our heuristics are still good,
55 * there is no more (or not that much) hardcoded linux stuff.
34e49164 56 *)
485bce71 57
ae4735db
C
58let is_known_typdef =
59 (fun s ->
34e49164
C
60 (match s with
61 | "u_char" | "u_short" | "u_int" | "u_long"
ae4735db
C
62 | "u8" | "u16" | "u32" | "u64"
63 | "s8" | "s16" | "s32" | "s64"
64 | "__u8" | "__u16" | "__u32" | "__u64"
34e49164 65 -> true
ae4735db
C
66
67 | "acpi_handle"
68 | "acpi_status"
34e49164
C
69 -> true
70
ae4735db
C
71 | "FILE"
72 | "DIR"
34e49164 73 -> true
ae4735db 74
34e49164 75 | s when s =~ ".*_t$" -> true
ae4735db 76 | _ -> false
34e49164
C
77 )
78 )
485bce71 79
ae4735db
C
80(* note: cant use partial application with let msg_typedef =
81 * because it would compute msg_typedef at compile time when
485bce71
C
82 * the flag debug_typedef is always false
83 *)
ae4735db 84let msg_typedef s =
485bce71
C
85 incr Stat.nTypedefInfer;
86 msg_gen (!Flag_parsing_c.debug_typedef)
87 is_known_typdef
ae4735db 88 (fun s ->
34e49164
C
89 pr2_cpp ("TYPEDEF: promoting: " ^ s)
90 )
91 s
92
485bce71
C
93let msg_maybe_dangereous_typedef s =
94 if not (is_known_typdef s)
ae4735db 95 then
113803cf
C
96 pr2
97 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
34e49164
C
98
99
100
ae4735db 101let msg_declare_macro s =
485bce71 102 incr Stat.nMacroDecl;
34e49164 103 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
104 (fun s ->
105 (match s with
34e49164 106 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
ae4735db 107 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
34e49164
C
108 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
109 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
110 | "SENSOR_DEVICE_ATTR"
111 | "LIST_HEAD"
112 | "DECLARE_WORK" | "DECLARE_TASKLET"
113 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
114 | "DECLARE_BITMAP"
115
116 -> true
117 (*
118 | s when s =~ "^DECLARE_.*" -> true
119 | s when s =~ ".*_ATTR$" -> true
120 | s when s =~ "^DEFINE_.*" -> true
121 *)
122
123 | _ -> false
124 )
125 )
126 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
127 s
34e49164 128
ae4735db
C
129
130let msg_foreach s =
485bce71 131 incr Stat.nIteratorHeuristic;
34e49164
C
132 pr2_cpp ("MACRO: found foreach: " ^ s)
133
134
ae4735db
C
135(* ??
136let msg_debug_macro s =
34e49164 137 pr2_cpp ("MACRO: found debug-macro: " ^ s)
485bce71 138*)
34e49164
C
139
140
ae4735db 141let msg_macro_noptvirg s =
485bce71 142 incr Stat.nMacroStmt;
34e49164
C
143 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
144
ae4735db 145let msg_macro_toplevel_noptvirg s =
485bce71 146 incr Stat.nMacroStmt;
34e49164
C
147 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
148
ae4735db 149let msg_macro_noptvirg_single s =
485bce71 150 incr Stat.nMacroStmt;
34e49164
C
151 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
152
153
485bce71
C
154
155
ae4735db 156let msg_macro_higher_order s =
485bce71 157 incr Stat.nMacroHigherOrder;
34e49164 158 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
159 (fun s ->
160 (match s with
34e49164
C
161 | "DBGINFO"
162 | "DBGPX"
163 | "DFLOW"
164 -> true
165 | _ -> false
166 )
167 )
168 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
169 s
170
171
ae4735db 172let msg_stringification s =
485bce71 173 incr Stat.nMacroString;
34e49164 174 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
175 (fun s ->
176 (match s with
34e49164
C
177 | "REVISION"
178 | "UTS_RELEASE"
179 | "SIZE_STR"
180 | "DMA_STR"
181 -> true
ae4735db 182 (* s when s =~ ".*STR.*" -> true *)
34e49164
C
183 | _ -> false
184 )
185 )
186 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
187 s
188
485bce71
C
189let msg_stringification_params s =
190 incr Stat.nMacroString;
191 pr2_cpp ("MACRO: string-macro with params : " ^ s)
192
193
194
ae4735db 195let msg_apply_known_macro s =
485bce71
C
196 incr Stat.nMacroExpand;
197 pr2_cpp ("MACRO: found known macro = " ^ s)
198
ae4735db 199let msg_apply_known_macro_hint s =
485bce71
C
200 incr Stat.nMacroHint;
201 pr2_cpp ("MACRO: found known macro hint = " ^ s)
202
203
34e49164 204
ae4735db
C
205
206let msg_ifdef_bool_passing is_ifdef_positif =
485bce71
C
207 incr Stat.nIfdefZero; (* of Version ? *)
208 if is_ifdef_positif
209 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
210 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
211
212
213let msg_ifdef_mid_something () =
214 incr Stat.nIfdefExprPassing;
215 pr2_cpp "found ifdef-mid-something"
216
217let msg_ifdef_funheaders () =
218 incr Stat.nIfdefFunheader;
219 ()
220
ae4735db 221let msg_ifdef_cparen_else () =
708f4980
C
222 incr Stat.nIfdefPassing;
223 pr2_cpp("found ifdef-cparen-else")
224
485bce71 225
ae4735db 226let msg_attribute s =
485bce71
C
227 incr Stat.nMacroAttribute;
228 pr2_cpp("ATTR:" ^ s)
ae4735db 229
485bce71
C
230
231
34e49164 232(*****************************************************************************)
485bce71 233(* The regexp and basic view definitions *)
34e49164
C
234(*****************************************************************************)
235
236(* opti: better to built then once and for all, especially regexp_foreach *)
237
238let regexp_macro = Str.regexp
239 "^[A-Z_][A-Z_0-9]*$"
240
241(* linuxext: *)
242let regexp_annot = Str.regexp
243 "^__.*$"
244
245(* linuxext: *)
246let regexp_declare = Str.regexp
247 ".*DECLARE.*"
248
249(* linuxext: *)
ae4735db 250let regexp_foreach = Str.regexp_case_fold
34e49164
C
251 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
252
253let regexp_typedef = Str.regexp
254 ".*_t$"
255
34e49164
C
256let false_typedef = [
257 "printk";
258 ]
259
34e49164 260
485bce71
C
261let ok_typedef s = not (List.mem s false_typedef)
262
ae4735db 263let not_annot s =
485bce71
C
264 not (s ==~ regexp_annot)
265
266
34e49164 267
34e49164 268
485bce71
C
269(*****************************************************************************)
270(* Helpers *)
271(*****************************************************************************)
272
485bce71 273(* ------------------------------------------------------------------------- *)
ae4735db
C
274(* the pair is the status of '()' and '{}', ex: (-1,0)
275 * if too much ')' and good '{}'
276 * could do for [] too ?
485bce71
C
277 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
278 * then if have ifdef, then certainly can lead to a problem.
279 *)
708f4980 280let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
ae4735db 281 fun xs ->
485bce71 282 let cnt_paren, cnt_brace = ref 0, ref 0 in
ae4735db 283 xs +> TV.iter_token_ifdef (fun x ->
485bce71
C
284 (match x.tok with
285 | x when TH.is_opar x -> incr cnt_paren
286 | TOBrace _ -> incr cnt_brace
287 | x when TH.is_cpar x -> decr cnt_paren
288 | TCBrace _ -> decr cnt_brace
289 | _ -> ()
290 )
291 );
292 !cnt_paren, !cnt_brace
293
294
295(* ------------------------------------------------------------------------- *)
296let forLOOKAHEAD = 30
297
ae4735db 298
485bce71 299(* look if there is a '{' just after the closing ')', and handling the
ae4735db
C
300 * possibility to have nested expressions inside nested parenthesis
301 *
485bce71
C
302 * todo: use indentation instead of premier(statement) ?
303 *)
ae4735db 304let rec is_really_foreach xs =
485bce71
C
305 let rec is_foreach_aux = function
306 | [] -> false, []
307 | TCPar _::TOBrace _::xs -> true, xs
308 (* the following attempts to handle the cases where there is a
309 single statement in the body of the loop. undoubtedly more
ae4735db 310 cases are needed.
485bce71
C
311 todo: premier(statement) - suivant(funcall)
312 *)
313 | TCPar _::TIdent _::xs -> true, xs
314 | TCPar _::Tif _::xs -> true, xs
315 | TCPar _::Twhile _::xs -> true, xs
316 | TCPar _::Tfor _::xs -> true, xs
317 | TCPar _::Tswitch _::xs -> true, xs
318 | TCPar _::Treturn _::xs -> true, xs
319
320
321 | TCPar _::xs -> false, xs
ae4735db 322 | TOPar _::xs ->
485bce71
C
323 let (_, xs') = is_foreach_aux xs in
324 is_foreach_aux xs'
325 | x::xs -> is_foreach_aux xs
326 in
327 is_foreach_aux xs +> fst
328
329
330(* ------------------------------------------------------------------------- *)
ae4735db 331let set_ifdef_token_parenthize_info cnt x =
485bce71
C
332 match x with
333 | TIfdef (tag, _)
334 | TIfdefelse (tag, _)
335 | TIfdefelif (tag, _)
336 | TEndif (tag, _)
337
338 | TIfdefBool (_, tag, _)
ae4735db 339 | TIfdefMisc (_, tag, _)
485bce71 340 | TIfdefVersion (_, tag, _)
ae4735db 341 ->
485bce71
C
342 tag := Some cnt;
343
344 | _ -> raise Impossible
485bce71
C
345
346
485bce71 347
ae4735db 348let ifdef_paren_cnt = ref 0
485bce71 349
ae4735db
C
350
351let rec set_ifdef_parenthize_info xs =
485bce71
C
352 xs +> List.iter (function
353 | NotIfdefLine xs -> ()
ae4735db
C
354 | Ifdefbool (_, xxs, info_ifdef)
355 | Ifdef (xxs, info_ifdef) ->
356
485bce71
C
357 incr ifdef_paren_cnt;
358 let total_directives = List.length info_ifdef in
359
ae4735db 360 info_ifdef +> List.iter (fun x ->
485bce71
C
361 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
362 x.tok);
363 xxs +> List.iter set_ifdef_parenthize_info
364 )
365
366
978fd7e5
C
367(*****************************************************************************)
368(* The parsing hack for #define *)
369(*****************************************************************************)
370
ae4735db 371(* To parse macro definitions I need to do some tricks
978fd7e5
C
372 * as some information can be get only at the lexing level. For instance
373 * the space after the name of the macro in '#define foo (x)' is meaningful
374 * but the grammar can not get this information. So define_ident below
375 * look at such space and generate a special TOpardefine. In a similar
376 * way macro definitions can contain some antislash and newlines
ae4735db
C
377 * and the grammar need to know where the macro ends (which is
378 * a line-level and so low token-level information). Hence the
978fd7e5 379 * function 'define_line' below and the TDefEol.
ae4735db
C
380 *
381 * update: TDefEol is handled in a special way at different places,
978fd7e5
C
382 * a little bit like EOF, especially for error recovery, so this
383 * is an important token that should not be retagged!
ae4735db
C
384 *
385 *
386 * ugly hack, a better solution perhaps would be to erase TDefEOL
387 * from the Ast and list of tokens in parse_c.
388 *
978fd7e5 389 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
ae4735db 390 *
978fd7e5
C
391 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
392 * is where is the \n TCommentSpace. Normally there is always a last token
393 * to synchronize on, either EOF or the token of the next toplevel.
ae4735db 394 * In the case of the #define we got in list of token
978fd7e5
C
395 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
396 * not synchronize on it and so we will not print the "\n".
397 * A solution would be to put the TDefEOL before the "\n".
c491d8ee 398 * (jll: tried to do this, see the comment "Put end of line..." below)
ae4735db
C
399 *
400 * todo?: could put a ExpandedTok for that ?
978fd7e5 401 *)
ae4735db
C
402let mark_end_define ii =
403 let ii' =
404 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
405 Common.str = "";
978fd7e5
C
406 Common.charpos = Ast_c.pos_of_info ii + 1
407 };
408 cocci_tag = ref Ast_c.emptyAnnot;
409 comments_tag = ref Ast_c.emptyComments;
ae4735db 410 }
978fd7e5
C
411 in
412 TDefEOL (ii')
413
414(* put the TDefEOL at the good place *)
ae4735db 415let rec define_line_1 acc xs =
978fd7e5
C
416 match xs with
417 | [] -> List.rev acc
418 | TDefine ii::xs ->
419 let line = Ast_c.line_of_info ii in
420 let acc = (TDefine ii) :: acc in
421 define_line_2 acc line ii xs
3a314143
C
422 | TUndef ii::xs ->
423 let line = Ast_c.line_of_info ii in
424 let acc = (TUndef ii) :: acc in
425 define_line_2 acc line ii xs
978fd7e5
C
426 | TCppEscapedNewline ii::xs ->
427 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
428 pr2 (Ast_c.strloc_of_info ii);
429 let acc = (TCommentSpace ii) :: acc in
430 define_line_1 acc xs
431 | x::xs -> define_line_1 (x::acc) xs
432
ae4735db
C
433and define_line_2 acc line lastinfo xs =
434 match xs with
435 | [] ->
978fd7e5 436 (* should not happened, should meet EOF before *)
ae4735db 437 pr2 "PB: WEIRD";
978fd7e5 438 List.rev (mark_end_define lastinfo::acc)
ae4735db 439 | x::xs ->
978fd7e5
C
440 let line' = TH.line_of_tok x in
441 let info = TH.info_of_tok x in
442
443 (match x with
ae4735db 444 | EOF ii ->
978fd7e5
C
445 let acc = (mark_end_define lastinfo) :: acc in
446 let acc = (EOF ii) :: acc in
447 define_line_1 acc xs
ae4735db 448 | TCppEscapedNewline ii ->
978fd7e5
C
449 if (line' <> line) then pr2 "PB: WEIRD: not same line number";
450 let acc = (TCommentSpace ii) :: acc in
451 define_line_2 acc (line+1) info xs
ae4735db 452 | x ->
978fd7e5 453 if line' =|= line
ae4735db 454 then define_line_2 (x::acc) line info xs
c491d8ee
C
455 else
456 (* Put end of line token before the newline. A newline at least
457 must be there because the line changed and because we saw a
458 #define previously to get to this function at all *)
459 define_line_1
460 ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
461 (x::xs)
978fd7e5
C
462 )
463
ae4735db 464let rec define_ident acc xs =
978fd7e5
C
465 match xs with
466 | [] -> List.rev acc
3a314143
C
467 | TUndef ii::xs ->
468 let acc = TUndef ii :: acc in
469 (match xs with
470 TCommentSpace i1::TIdent (s,i2)::xs ->
471 let acc = (TCommentSpace i1) :: acc in
472 let acc = (TIdentDefine (s,i2)) :: acc in
473 define_ident acc xs
474 | _ ->
475 pr2 "WEIRD: weird #define body";
476 define_ident acc xs
477 )
ae4735db 478 | TDefine ii::xs ->
978fd7e5
C
479 let acc = TDefine ii :: acc in
480 (match xs with
ae4735db 481 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
978fd7e5
C
482 (* Change also the kind of TIdent to avoid bad interaction
483 * with other parsing_hack tricks. For instant if keep TIdent then
484 * the stringication algo can believe the TIdent is a string-macro.
485 * So simpler to change the kind of the ident too.
486 *)
ae4735db 487 (* if TOParDefine sticked to the ident, then
978fd7e5
C
488 * it's a macro-function. Change token to avoid ambiguity
489 * between #define foo(x) and #define foo (x)
490 *)
491 let acc = (TCommentSpace i1) :: acc in
492 let acc = (TIdentDefine (s,i2)) :: acc in
493 let acc = (TOParDefine i3) :: acc in
494 define_ident acc xs
495
ae4735db 496 | TCommentSpace i1::TIdent (s,i2)::xs ->
978fd7e5
C
497 let acc = (TCommentSpace i1) :: acc in
498 let acc = (TIdentDefine (s,i2)) :: acc in
499 define_ident acc xs
500
501 (* bugfix: ident of macro (as well as params, cf below) can be tricky
502 * note, do we need to subst in the body of the define ? no cos
503 * here the issue is the name of the macro, as in #define inline,
ae4735db 504 * so obviously the name of this macro will not be used in its
978fd7e5
C
505 * body (it would be a recursive macro, which is forbidden).
506 *)
ae4735db
C
507
508 | TCommentSpace i1::t::xs ->
978fd7e5
C
509
510 let s = TH.str_of_tok t in
511 let ii = TH.info_of_tok t in
512 if s ==~ Common.regexp_alpha
513 then begin
514 pr2 (spf "remapping: %s to an ident in macro name" s);
515 let acc = (TCommentSpace i1) :: acc in
516 let acc = (TIdentDefine (s,ii)) :: acc in
517 define_ident acc xs
518 end
519 else begin
ae4735db 520 pr2 "WEIRD: weird #define body";
978fd7e5
C
521 define_ident acc xs
522 end
523
ae4735db
C
524 | _ ->
525 pr2 "WEIRD: weird #define body";
978fd7e5
C
526 define_ident acc xs
527 )
528 | x::xs ->
529 let acc = x :: acc in
530 define_ident acc xs
978fd7e5
C
531
532
ae4735db
C
533
534let fix_tokens_define2 xs =
978fd7e5
C
535 define_ident [] (define_line_1 [] xs)
536
ae4735db 537let fix_tokens_define a =
978fd7e5 538 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
ae4735db 539
978fd7e5
C
540
541
542
543
544(* ------------------------------------------------------------------------- *)
545(* Other parsing hacks related to cpp, Include/Define hacks *)
546(* ------------------------------------------------------------------------- *)
547
548(* Sometimes I prefer to generate a single token for a list of things in the
549 * lexer so that if I have to passed them, like for passing TInclude then
ae4735db
C
550 * it's easy. Also if I don't do a single token, then I need to
551 * parse the rest which may not need special stuff, like detecting
978fd7e5
C
552 * end of line which the parser is not really ready for. So for instance
553 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
ae4735db 554 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
978fd7e5
C
555 * but this kind of token is valid only after a #include and the
556 * lexing and parsing rules are different for such tokens so not that
557 * easy to parse such things in parser_c.mly. Hence the following hacks.
ae4735db 558 *
978fd7e5
C
559 * less?: maybe could get rid of this like I get rid of some of fix_define.
560 *)
561
562(* helpers *)
563
564(* used to generate new token from existing one *)
565let new_info posadd str ii =
ae4735db
C
566 { Ast_c.pinfo =
567 Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
978fd7e5
C
568 charpos = Ast_c.pos_of_info ii + posadd;
569 str = str;
570 column = Ast_c.col_of_info ii + posadd;
571 };
572 (* must generate a new ref each time, otherwise share *)
573 cocci_tag = ref Ast_c.emptyAnnot;
574 comments_tag = ref Ast_c.emptyComments;
575 }
576
577
ae4735db 578let rec comment_until_defeol xs =
978fd7e5 579 match xs with
ae4735db 580 | [] ->
978fd7e5
C
581 (* job not done in Cpp_token_c.define_parse ? *)
582 failwith "cant find end of define token TDefEOL"
ae4735db 583 | x::xs ->
978fd7e5 584 (match x with
ae4735db 585 | Parser_c.TDefEOL i ->
978fd7e5
C
586 Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
587 ::xs
ae4735db
C
588 | _ ->
589 let x' =
978fd7e5
C
590 (* bugfix: otherwise may lose a TComment token *)
591 if TH.is_real_comment x
592 then x
593 else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
594 in
595 x'::comment_until_defeol xs
596 )
597
ae4735db
C
598let drop_until_defeol xs =
599 List.tl
978fd7e5
C
600 (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
601
602
603
604(* ------------------------------------------------------------------------- *)
605(* returns a pair (replaced token, list of next tokens) *)
606(* ------------------------------------------------------------------------- *)
607
ae4735db
C
608let tokens_include (info, includes, filename, inifdef) =
609 Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
610 [Parser_c.TIncludeFilename
978fd7e5
C
611 (filename, (new_info (String.length includes) filename info))
612 ]
613
614
615
616
485bce71
C
617(*****************************************************************************)
618(* CPP handling: macros, ifdefs, macros defs *)
619(*****************************************************************************)
620
0708f913
C
621(* ------------------------------------------------------------------------- *)
622(* special skip_start skip_end handling *)
623(* ------------------------------------------------------------------------- *)
624
625(* note: after this normally the token list should not contain any more the
626 * TCommentSkipTagStart and End tokens.
627 *)
628let rec commentize_skip_start_to_end xs =
629 match xs with
630 | [] -> ()
ae4735db 631 | x::xs ->
0708f913 632 (match x with
ae4735db
C
633 | {tok = TCommentSkipTagStart info} ->
634 (try
635 let (before, x2, after) =
0708f913
C
636 xs +> Common.split_when (function
637 | {tok = TCommentSkipTagEnd _ } -> true
ae4735db 638 | _ -> false
0708f913
C
639 )
640 in
641 let topass = x::x2::before in
ae4735db 642 topass +> List.iter (fun tok ->
0708f913
C
643 set_as_comment Token_c.CppPassingExplicit tok
644 );
645 commentize_skip_start_to_end after
ae4735db 646 with Not_found ->
0708f913
C
647 failwith "could not find end of skip_start special comment"
648 )
ae4735db 649 | {tok = TCommentSkipTagEnd info} ->
0708f913 650 failwith "found skip_end comment but no skip_start"
ae4735db 651 | _ ->
0708f913
C
652 commentize_skip_start_to_end xs
653 )
ae4735db
C
654
655
0708f913
C
656
657
34e49164
C
658(* ------------------------------------------------------------------------- *)
659(* ifdef keeping/passing *)
660(* ------------------------------------------------------------------------- *)
661
662(* #if 0, #if 1, #if LINUX_VERSION handling *)
ae4735db
C
663let rec find_ifdef_bool xs =
664 xs +> List.iter (function
34e49164 665 | NotIfdefLine _ -> ()
ae4735db 666 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
485bce71
C
667
668 msg_ifdef_bool_passing is_ifdef_positif;
34e49164
C
669
670 (match xxs with
671 | [] -> raise Impossible
ae4735db 672 | firstclause::xxs ->
0708f913 673 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 674
34e49164 675 if is_ifdef_positif
ae4735db 676 then xxs +> List.iter
0708f913 677 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
34e49164 678 else begin
0708f913 679 firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal);
34e49164
C
680 (match List.rev xxs with
681 (* keep only last *)
ae4735db
C
682 | last::startxs ->
683 startxs +> List.iter
0708f913 684 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
34e49164
C
685 | [] -> (* not #else *) ()
686 );
687 end
688 );
ae4735db 689
34e49164
C
690 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
691 )
692
693
694
34e49164
C
695let thresholdIfdefSizeMid = 6
696
697(* infer ifdef involving not-closed expressions/statements *)
ae4735db
C
698let rec find_ifdef_mid xs =
699 xs +> List.iter (function
34e49164 700 | NotIfdefLine _ -> ()
ae4735db
C
701 | Ifdef (xxs, info_ifdef_stmt) ->
702 (match xxs with
34e49164
C
703 | [] -> raise Impossible
704 | [first] -> ()
ae4735db 705 | first::second::rest ->
34e49164 706 (* don't analyse big ifdef *)
ae4735db
C
707 if xxs +> List.for_all
708 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
34e49164 709 (* don't want nested ifdef *)
ae4735db
C
710 xxs +> List.for_all (fun xs ->
711 xs +> List.for_all
34e49164
C
712 (function NotIfdefLine _ -> true | _ -> false)
713 )
ae4735db
C
714
715 then
34e49164 716 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
ae4735db
C
717 let cnt1, cnt2 = List.hd counts in
718 if cnt1 <> 0 || cnt2 <> 0 &&
b1b2de81 719 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
34e49164 720 (*
ae4735db
C
721 if counts +> List.exists (fun (cnt1, cnt2) ->
722 cnt1 <> 0 || cnt2 <> 0
723 )
34e49164
C
724 *)
725 then begin
485bce71
C
726 msg_ifdef_mid_something();
727
34e49164 728 (* keep only first, treat the rest as comment *)
0708f913 729 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 730 (second::rest) +> List.iter
0708f913 731 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
34e49164 732 end
ae4735db 733
34e49164
C
734 );
735 List.iter find_ifdef_mid xxs
ae4735db 736
34e49164 737 (* no need complex analysis for ifdefbool *)
ae4735db 738 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
34e49164 739 List.iter find_ifdef_mid xxs
ae4735db
C
740
741
34e49164
C
742 )
743
744
745let thresholdFunheaderLimit = 4
746
747(* ifdef defining alternate function header, type *)
748let rec find_ifdef_funheaders = function
749 | [] -> ()
ae4735db 750 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
34e49164
C
751
752 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
ae4735db 753 | Ifdef
34e49164
C
754 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
755 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
ae4735db 756 ], info_ifdef_stmt
34e49164
C
757 )
758 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
ae4735db 759 ::xs
34e49164
C
760 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
761 List.length ifdefblock2 <= thresholdFunheaderLimit
ae4735db 762 ->
34e49164 763 find_ifdef_funheaders xs;
485bce71
C
764
765 msg_ifdef_funheaders ();
0708f913 766 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 767 let all_toks = [xline2] @ line2 in
0708f913
C
768 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ;
769 ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
770
771 (* ifdef with nested ifdef *)
ae4735db 772 | Ifdef
34e49164 773 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
ae4735db 774 [Ifdef
34e49164
C
775 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
776 [NotIfdefLine (({col = 0} as xline3)::line3)];
777 ], info_ifdef_stmt2
778 )
779 ]
ae4735db 780 ], info_ifdef_stmt
34e49164
C
781 )
782 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
783 ::xs
784 ->
34e49164 785 find_ifdef_funheaders xs;
485bce71
C
786
787 msg_ifdef_funheaders ();
0708f913
C
788 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
789 info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 790 let all_toks = [xline2;xline3] @ line2 @ line3 in
0708f913 791 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
792
793 (* ifdef with elseif *)
ae4735db 794 | Ifdef
34e49164
C
795 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
796 [NotIfdefLine (({col = 0} as xline2)::line2)];
797 [NotIfdefLine (({col = 0} as xline3)::line3)];
ae4735db 798 ], info_ifdef_stmt
34e49164
C
799 )
800 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
801 ::xs
802 ->
34e49164 803 find_ifdef_funheaders xs;
485bce71
C
804
805 msg_ifdef_funheaders ();
0708f913 806 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 807 let all_toks = [xline2;xline3] @ line2 @ line3 in
0708f913 808 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError)
ae4735db 809
485bce71 810 (* recurse *)
ae4735db
C
811 | Ifdef (xxs,info_ifdef_stmt)::xs
812 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
813 List.iter find_ifdef_funheaders xxs;
34e49164 814 find_ifdef_funheaders xs
ae4735db 815
34e49164
C
816
817
485bce71 818(* ?? *)
ae4735db
C
819let rec adjust_inifdef_include xs =
820 xs +> List.iter (function
34e49164 821 | NotIfdefLine _ -> ()
ae4735db
C
822 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
823 xxs +> List.iter (iter_token_ifdef (fun tokext ->
34e49164 824 match tokext.tok with
ae4735db 825 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
34e49164
C
826 inifdef_ref := true;
827 | _ -> ()
828 ));
829 )
830
831
832
34e49164 833
485bce71 834
34e49164
C
835
836
ae4735db
C
837let rec find_ifdef_cparen_else xs =
838 let rec aux xs =
839 xs +> List.iter (function
708f4980 840 | NotIfdefLine _ -> ()
ae4735db
C
841 | Ifdef (xxs, info_ifdef_stmt) ->
842 (match xxs with
708f4980
C
843 | [] -> raise Impossible
844 | [first] -> ()
ae4735db 845 | first::second::rest ->
34e49164 846
708f4980 847 (* found a closing ')' just after the #else *)
34e49164 848
708f4980
C
849 (* Too bad ocaml does not support better list pattern matching
850 * a la Prolog-III where can match the end of lists.
851 *)
ae4735db
C
852 let condition =
853 if List.length first = 0 then false
854 else
708f4980
C
855 let last_line = Common.last first in
856 match last_line with
ae4735db
C
857 | NotIfdefLine xs ->
858 if List.length xs = 0 then false
859 else
708f4980
C
860 let last_tok = Common.last xs in
861 TH.is_cpar last_tok.tok
ae4735db 862 | Ifdef _ | Ifdefbool _ -> false
708f4980
C
863 in
864 if condition then begin
865 msg_ifdef_cparen_else();
34e49164 866
708f4980
C
867 (* keep only first, treat the rest as comment *)
868 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 869 (second::rest) +> List.iter
708f4980
C
870 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
871 end
ae4735db 872
708f4980
C
873 );
874 List.iter aux xxs
ae4735db 875
708f4980 876 (* no need complex analysis for ifdefbool *)
ae4735db 877 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
708f4980
C
878 List.iter aux xxs
879 )
880 in aux xs
34e49164
C
881
882
708f4980
C
883(* ------------------------------------------------------------------------- *)
884(* cpp-builtin part2, macro, using standard.h or other defs *)
885(* ------------------------------------------------------------------------- *)
34e49164 886
ae4735db 887(* now in cpp_token_c.ml *)
34e49164
C
888
889(* ------------------------------------------------------------------------- *)
890(* stringification *)
891(* ------------------------------------------------------------------------- *)
892
ae4735db 893let rec find_string_macro_paren xs =
34e49164
C
894 match xs with
895 | [] -> ()
ae4735db
C
896 | Parenthised(xxs, info_parens)::xs ->
897 xxs +> List.iter (fun xs ->
898 if xs +> List.exists
485bce71 899 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
ae4735db
C
900 xs +> List.for_all
901 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
34e49164
C
902 true | _ -> false)
903 then
ae4735db 904 xs +> List.iter (fun tok ->
34e49164 905 match tok with
ae4735db 906 | PToken({tok = TIdent (s,_)} as id) ->
34e49164 907 msg_stringification s;
485bce71 908 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
909 | _ -> ()
910 )
ae4735db 911 else
34e49164
C
912 find_string_macro_paren xs
913 );
914 find_string_macro_paren xs
ae4735db 915 | PToken(tok)::xs ->
34e49164 916 find_string_macro_paren xs
ae4735db 917
34e49164
C
918
919(* ------------------------------------------------------------------------- *)
920(* macro2 *)
921(* ------------------------------------------------------------------------- *)
922
923(* don't forget to recurse in each case *)
ae4735db 924let rec find_macro_paren xs =
34e49164
C
925 match xs with
926 | [] -> ()
ae4735db 927
34e49164
C
928 (* attribute *)
929 | PToken ({tok = Tattribute _} as id)
930 ::Parenthised (xxs,info_parens)
931 ::xs
ae4735db 932 ->
34e49164 933 pr2_cpp ("MACRO: __attribute detected ");
ae4735db 934 [Parenthised (xxs, info_parens)] +>
0708f913
C
935 iter_token_paren (set_as_comment Token_c.CppAttr);
936 set_as_comment Token_c.CppAttr id;
34e49164
C
937 find_macro_paren xs
938
978fd7e5
C
939 | PToken ({tok = TattributeNoarg _} as id)
940 ::xs
ae4735db 941 ->
978fd7e5
C
942 pr2_cpp ("MACRO: __attributenoarg detected ");
943 set_as_comment Token_c.CppAttr id;
944 find_macro_paren xs
945
485bce71 946(*
708f4980 947 (* attribute cpp, __xxx id *)
485bce71 948 | PToken ({tok = TIdent (s,i1)} as id)
708f4980 949 ::PToken ({tok = TIdent (s2, i2)} as id2)
485bce71 950 ::xs when s ==~ regexp_annot
ae4735db 951 ->
485bce71
C
952 msg_attribute s;
953 id.tok <- TMacroAttr (s, i1);
708f4980 954 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
485bce71 955
708f4980
C
956 (* attribute cpp, id __xxx *)
957 | PToken ({tok = TIdent (s,i1)} as _id)
958 ::PToken ({tok = TIdent (s2, i2)} as id2)
959 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
ae4735db 960 ->
485bce71 961 msg_attribute s2;
708f4980
C
962 id2.tok <- TMacroAttr (s2, i2);
963 find_macro_paren xs
964
965 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
966 ::PToken ({tok = TIdent (s,i1)} as attr)
967 ::xs when s ==~ regexp_annot
ae4735db 968 ->
708f4980
C
969 pr2_cpp ("storage attribute: " ^ s);
970 attr.tok <- TMacroAttrStorage (s,i1);
971 (* recurse, may have other storage attributes *)
972 find_macro_paren (PToken (tok1)::xs)
ae4735db 973
708f4980 974
485bce71
C
975*)
976
977 (* storage attribute *)
978 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
ae4735db
C
979 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
980 ->
485bce71
C
981 pr2_cpp ("storage attribute: " ^ s);
982 attr.tok <- TMacroAttrStorage (s,i1);
983 (* recurse, may have other storage attributes *)
984 find_macro_paren (PToken (tok1)::xs)
708f4980 985
485bce71 986
34e49164 987 (* stringification
ae4735db 988 *
34e49164 989 * the order of the matching clause is important
ae4735db 990 *
34e49164
C
991 *)
992
993 (* string macro with params, before case *)
485bce71 994 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
34e49164 995 ::Parenthised (xxs, info_parens)
ae4735db 996 ::xs ->
485bce71
C
997
998 msg_stringification_params s;
999 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1000 [Parenthised (xxs, info_parens)] +>
0708f913 1001 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1002 find_macro_paren xs
1003
1004 (* after case *)
1005 | PToken ({tok = TIdent (s,_)} as id)
1006 ::Parenthised (xxs, info_parens)
485bce71 1007 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1008 ::xs ->
485bce71
C
1009
1010 msg_stringification_params s;
1011 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1012 [Parenthised (xxs, info_parens)] +>
0708f913 1013 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1014 find_macro_paren xs
1015
1016
1017 (* for the case where the string is not inside a funcall, but
1018 * for instance in an initializer.
1019 *)
ae4735db 1020
34e49164 1021 (* string macro variable, before case *)
485bce71 1022 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
ae4735db 1023 ::xs ->
485bce71 1024
34e49164 1025 msg_stringification s;
485bce71 1026 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1027 find_macro_paren xs
1028
1029 (* after case *)
485bce71
C
1030 | PToken ({tok = TIdent (s,_)} as id)
1031 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1032 ::xs ->
485bce71 1033
34e49164 1034 msg_stringification s;
485bce71 1035 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1036 find_macro_paren xs
1037
1038
ae4735db 1039
34e49164
C
1040
1041
1042 (* recurse *)
ae4735db
C
1043 | (PToken x)::xs -> find_macro_paren xs
1044 | (Parenthised (xxs, info_parens))::xs ->
34e49164
C
1045 xxs +> List.iter find_macro_paren;
1046 find_macro_paren xs
1047
1048
1049
1050
1051
1052(* don't forget to recurse in each case *)
ae4735db 1053let rec find_macro_lineparen xs =
34e49164
C
1054 match xs with
1055 | [] -> ()
1056
1057 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
ae4735db 1058 | (Line
34e49164
C
1059 (
1060 [PToken ({tok = Tstatic _});
1061 PToken ({tok = TIdent (s,_)} as macro);
1062 Parenthised (xxs,info_parens);
1063 PToken ({tok = TPtVirg _});
ae4735db 1064 ]
34e49164 1065 ))
ae4735db
C
1066 ::xs
1067 when (s ==~ regexp_macro) ->
485bce71 1068
34e49164
C
1069 msg_declare_macro s;
1070 let info = TH.info_of_tok macro.tok in
1071 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1072
1073 find_macro_lineparen (xs)
1074
1075 (* the static const case *)
ae4735db 1076 | (Line
34e49164
C
1077 (
1078 [PToken ({tok = Tstatic _});
1079 PToken ({tok = Tconst _} as const);
1080 PToken ({tok = TIdent (s,_)} as macro);
1081 Parenthised (xxs,info_parens);
1082 PToken ({tok = TPtVirg _});
ae4735db 1083 ]
34e49164
C
1084 (*as line1*)
1085
1086 ))
ae4735db
C
1087 ::xs
1088 when (s ==~ regexp_macro) ->
485bce71 1089
34e49164
C
1090 msg_declare_macro s;
1091 let info = TH.info_of_tok macro.tok in
1092 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
ae4735db
C
1093
1094 (* need retag this const, otherwise ambiguity in grammar
34e49164
C
1095 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1096 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1097 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1098 storage_class_spec : Tstatic . (137)
1099 *)
1100 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1101
1102 find_macro_lineparen (xs)
1103
1104
1105 (* same but without trailing ';'
ae4735db 1106 *
34e49164
C
1107 * I do not put the final ';' because it can be on a multiline and
1108 * because of the way mk_line is coded, we will not have access to
1109 * this ';' on the next line, even if next to the ')' *)
ae4735db 1110 | (Line
34e49164
C
1111 ([PToken ({tok = Tstatic _});
1112 PToken ({tok = TIdent (s,_)} as macro);
1113 Parenthised (xxs,info_parens);
ae4735db 1114 ]
34e49164 1115 ))
ae4735db
C
1116 ::xs
1117 when s ==~ regexp_macro ->
34e49164
C
1118
1119 msg_declare_macro s;
1120 let info = TH.info_of_tok macro.tok in
1121 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1122
1123 find_macro_lineparen (xs)
1124
1125
1126
1127
1128 (* on multiple lines *)
ae4735db 1129 | (Line
34e49164
C
1130 (
1131 (PToken ({tok = Tstatic _})::[]
1132 )))
ae4735db 1133 ::(Line
34e49164
C
1134 (
1135 [PToken ({tok = TIdent (s,_)} as macro);
1136 Parenthised (xxs,info_parens);
1137 PToken ({tok = TPtVirg _});
1138 ]
ae4735db 1139 )
34e49164 1140 )
ae4735db
C
1141 ::xs
1142 when (s ==~ regexp_macro) ->
485bce71 1143
34e49164
C
1144 msg_declare_macro s;
1145 let info = TH.info_of_tok macro.tok in
1146 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1147
1148 find_macro_lineparen (xs)
1149
1150
ae4735db
C
1151 (* linuxext: ex: DECLARE_BITMAP();
1152 *
34e49164
C
1153 * Here I use regexp_declare and not regexp_macro because
1154 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1155 * Here we don't have the preceding 'static' so only way to
1156 * not have positive is to restrict to .*DECLARE.* macros.
1157 *
1158 * but there is a grammar rule for that, so don't need this case anymore
0708f913 1159 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
34e49164
C
1160 * on a argument_list
1161 *)
ae4735db
C
1162
1163 | (Line
34e49164
C
1164 ([PToken ({tok = TIdent (s,_)} as macro);
1165 Parenthised (xxs,info_parens);
1166 PToken ({tok = TPtVirg _});
1167 ]
1168 ))
ae4735db
C
1169 ::xs
1170 when (s ==~ regexp_declare) ->
34e49164
C
1171
1172 msg_declare_macro s;
1173 let info = TH.info_of_tok macro.tok in
1174 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1175
1176 find_macro_lineparen (xs)
1177
ae4735db 1178
34e49164
C
1179 (* toplevel macros.
1180 * module_init(xxx)
ae4735db 1181 *
34e49164
C
1182 * Could also transform the TIdent in a TMacroTop but can have false
1183 * positive, so easier to just change the TCPar and so just solve
1184 * the end-of-stream pb of ocamlyacc
1185 *)
ae4735db 1186 | (Line
34e49164
C
1187 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1188 Parenthised (xxs,info_parens);
1189 ] as _line1
1190 ))
b1b2de81 1191 ::xs when col1 =|= 0
ae4735db
C
1192 ->
1193 let condition =
34e49164
C
1194 (* to reduce number of false positive *)
1195 (match xs with
ae4735db 1196 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
b1b2de81 1197 TH.is_eof other.tok || (col2 =|= 0 &&
34e49164
C
1198 (match other.tok with
1199 | TOBrace _ -> false (* otherwise would match funcdecl *)
1200 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1201 | TPtVirg _
34e49164
C
1202 | TDotDot _
1203 -> false
1204 | tok when TH.is_binary_operator tok -> false
ae4735db 1205
34e49164
C
1206 | _ -> true
1207 )
1208 )
1209 | _ -> false
1210 )
1211 in
1212 if condition
1213 then begin
485bce71 1214
34e49164
C
1215 msg_macro_toplevel_noptvirg s;
1216 (* just to avoid the end-of-stream pb of ocamlyacc *)
1217 let tcpar = Common.last info_parens in
1218 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
ae4735db 1219
34e49164 1220 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
ae4735db 1221
34e49164
C
1222 end;
1223
1224 find_macro_lineparen (xs)
1225
1226
1227
ae4735db 1228 (* macro with parameters
34e49164
C
1229 * ex: DEBUG()
1230 * return x;
1231 *)
ae4735db 1232 | (Line
34e49164
C
1233 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1234 Parenthised (xxs,info_parens);
1235 ] as _line1
1236 ))
ae4735db 1237 ::(Line
34e49164
C
1238 (PToken ({col = col2 } as other)::restline2
1239 ) as line2)
ae4735db 1240 ::xs
34e49164 1241 (* when s ==~ regexp_macro *)
ae4735db
C
1242 ->
1243 let condition =
1244 (col1 =|= col2 &&
34e49164
C
1245 (match other.tok with
1246 | TOBrace _ -> false (* otherwise would match funcdecl *)
1247 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1248 | TPtVirg _
34e49164
C
1249 | TDotDot _
1250 -> false
1251 | tok when TH.is_binary_operator tok -> false
1252
1253 | _ -> true
1254 )
ae4735db
C
1255 )
1256 ||
34e49164 1257 (col2 <= col1 &&
113803cf 1258 (match other.tok, restline2 with
b1b2de81 1259 | TCBrace _, _ when ctx =*= InFunction -> true
113803cf
C
1260 | Treturn _, _ -> true
1261 | Tif _, _ -> true
1262 | Telse _, _ -> true
1263
1264 (* case of label, usually put in first line *)
ae4735db 1265 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
113803cf
C
1266 true
1267
34e49164
C
1268
1269 | _ -> false
1270 )
1271 )
1272
1273 in
ae4735db 1274
34e49164 1275 if condition
ae4735db 1276 then
b1b2de81 1277 if col1 =|= 0 then ()
34e49164
C
1278 else begin
1279 msg_macro_noptvirg s;
485bce71 1280 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
ae4735db 1281 [Parenthised (xxs, info_parens)] +>
0708f913 1282 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1283 end;
1284
1285 find_macro_lineparen (line2::xs)
ae4735db
C
1286
1287 (* linuxext:? single macro
34e49164
C
1288 * ex: LOCK
1289 * foo();
1290 * UNLOCK
ae4735db 1291 *
113803cf 1292 * todo: factorize code with previous rule ?
34e49164 1293 *)
ae4735db 1294 | (Line
34e49164
C
1295 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1296 ] as _line1
1297 ))
ae4735db 1298 ::(Line
34e49164
C
1299 (PToken ({col = col2 } as other)::restline2
1300 ) as line2)
ae4735db 1301 ::xs ->
34e49164 1302 (* when s ==~ regexp_macro *)
ae4735db
C
1303
1304 let condition =
1305 (col1 =|= col2 &&
34e49164
C
1306 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1307 (match other.tok with
ae4735db
C
1308 | TPtVirg _ -> false
1309 | TOr _ -> false
34e49164
C
1310 | TCBrace _ when ctx <> InFunction -> false
1311 | tok when TH.is_binary_operator tok -> false
1312
1313 | _ -> true
1314 )) ||
1315 (col2 <= col1 &&
1316 (match other.tok with
b1b2de81 1317 | TCBrace _ when ctx =*= InFunction -> true
34e49164
C
1318 | Treturn _ -> true
1319 | Tif _ -> true
1320 | Telse _ -> true
1321 | _ -> false
1322 ))
1323 in
ae4735db 1324
34e49164
C
1325 if condition
1326 then begin
1327 msg_macro_noptvirg_single s;
485bce71 1328 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
34e49164
C
1329 end;
1330 find_macro_lineparen (line2::xs)
ae4735db
C
1331
1332 | x::xs ->
34e49164
C
1333 find_macro_lineparen xs
1334
1335
485bce71
C
1336
1337(* ------------------------------------------------------------------------- *)
1338(* define tobrace init *)
1339(* ------------------------------------------------------------------------- *)
1340
ae4735db
C
1341let rec find_define_init_brace_paren xs =
1342 let rec aux xs =
485bce71
C
1343 match xs with
1344 | [] -> ()
1345
1346 (* mainly for firefox *)
1347 | (PToken {tok = TDefine _})
1348 ::(PToken {tok = TIdentDefine (s,_)})
1349 ::(PToken ({tok = TOBrace i1} as tokbrace))
1350 ::(PToken tok2)
1351 ::(PToken tok3)
ae4735db 1352 ::xs ->
485bce71
C
1353 let is_init =
1354 match tok2.tok, tok3.tok with
1355 | TInt _, TComma _ -> true
1356 | TString _, TComma _ -> true
1357 | TIdent _, TComma _ -> true
1358 | _ -> false
ae4735db 1359
485bce71
C
1360 in
1361 if is_init
ae4735db 1362 then begin
485bce71
C
1363 pr2_cpp("found define initializer: " ^s);
1364 tokbrace.tok <- TOBraceDefineInit i1;
1365 end;
1366
1367 aux xs
1368
1369 (* mainly for linux, especially in sound/ *)
1370 | (PToken {tok = TDefine _})
1371 ::(PToken {tok = TIdentDefine (s,_)})
1372 ::(Parenthised(xxx, info_parens))
1373 ::(PToken ({tok = TOBrace i1} as tokbrace))
1374 ::(PToken tok2)
1375 ::(PToken tok3)
ae4735db 1376 ::xs ->
485bce71
C
1377 let is_init =
1378 match tok2.tok, tok3.tok with
1379 | TInt _, TComma _ -> true
1380 | TDot _, TIdent _ -> true
1381 | TIdent _, TComma _ -> true
1382 | _ -> false
ae4735db 1383
485bce71
C
1384 in
1385 if is_init
ae4735db 1386 then begin
485bce71
C
1387 pr2_cpp("found define initializer with param: " ^ s);
1388 tokbrace.tok <- TOBraceDefineInit i1;
1389 end;
1390
1391 aux xs
1392
ae4735db 1393
485bce71
C
1394
1395 (* recurse *)
ae4735db
C
1396 | (PToken x)::xs -> aux xs
1397 | (Parenthised (xxs, info_parens))::xs ->
485bce71 1398 (* not need for tobrace init:
ae4735db 1399 * xxs +> List.iter aux;
485bce71
C
1400 *)
1401 aux xs
1402 in
1403 aux xs
1404
1405
34e49164
C
1406(* ------------------------------------------------------------------------- *)
1407(* action *)
1408(* ------------------------------------------------------------------------- *)
1409
708f4980 1410(* obsolete now with macro expansion ? get some regression if comment.
ae4735db 1411 * todo: if do bad decision here, then it can influence other phases
708f4980 1412 * and make it hard to parse. So maybe when have a parse error, should
ae4735db 1413 * undo some of the guess those heuristics have done, and restore
708f4980
C
1414 * the original token value.
1415 *)
1416
34e49164
C
1417let rec find_actions = function
1418 | [] -> ()
1419
1420 | PToken ({tok = TIdent (s,ii)})
1421 ::Parenthised (xxs,info_parens)
ae4735db 1422 ::xs ->
34e49164
C
1423 find_actions xs;
1424 xxs +> List.iter find_actions;
1425 let modified = find_actions_params xxs in
ae4735db 1426 if modified
34e49164 1427 then msg_macro_higher_order s
ae4735db
C
1428
1429 | x::xs ->
34e49164
C
1430 find_actions xs
1431
ae4735db
C
1432and find_actions_params xxs =
1433 xxs +> List.fold_left (fun acc xs ->
34e49164 1434 let toks = tokens_of_paren xs in
ae4735db
C
1435 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1436 (* undo: && List.length toks > 1
708f4980
C
1437 * good for sparse, not good for linux
1438 *)
34e49164 1439 then begin
ae4735db 1440 xs +> iter_token_paren (fun x ->
34e49164 1441 if TH.is_eof x.tok
ae4735db 1442 then
34e49164 1443 (* certainly because paren detection had a pb because of
708f4980
C
1444 * some ifdef-exp. Do similar additional checking than
1445 * what is done in set_as_comment.
34e49164 1446 *)
708f4980 1447 pr2 "PB: weird, I try to tag an EOF token as an action"
ae4735db 1448 else
708f4980 1449 (* cf tests-bis/no_cpar_macro.c *)
ae4735db
C
1450 if TH.is_eom x.tok
1451 then
708f4980 1452 pr2 "PB: weird, I try to tag an EOM token as an action"
ae4735db 1453 else
708f4980 1454 x.tok <- TAction (TH.info_of_tok x.tok);
34e49164
C
1455 );
1456 true (* modified *)
1457 end
1458 else acc
1459 ) false
1460
1461
1462
1463(* ------------------------------------------------------------------------- *)
1464(* main fix cpp function *)
1465(* ------------------------------------------------------------------------- *)
1466
ae4735db 1467let filter_cpp_stuff xs =
951c7801
C
1468 List.filter
1469 (function x ->
1470 (match x.tok with
1471 | tok when TH.is_comment tok -> false
34e49164
C
1472 (* don't want drop the define, or if drop, have to drop
1473 * also its body otherwise the line heuristics may be lost
1474 * by not finding the TDefine in column 0 but by finding
1475 * a TDefineIdent in a column > 0
1476 *)
951c7801
C
1477 | Parser_c.TDefine _ -> true
1478 | tok when TH.is_cpp_instruction tok -> false
1479 | _ -> true
1480 ))
1481 xs
34e49164
C
1482
1483let insert_virtual_positions l =
1484 let strlen x = String.length (Ast_c.str_of_info x) in
708f4980
C
1485 let rec loop prev offset acc = function
1486 [] -> List.rev acc
34e49164
C
1487 | x::xs ->
1488 let ii = TH.info_of_tok x in
1489 let inject pi =
1490 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1491 match Ast_c.pinfo_of_info ii with
1492 Ast_c.OriginTok pi ->
1493 let prev = Ast_c.parse_info_of_info ii in
ae4735db 1494 loop prev (strlen ii) (x::acc) xs
34e49164 1495 | Ast_c.ExpandedTok (pi,_) ->
708f4980 1496 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
ae4735db 1497 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164 1498 | Ast_c.FakeTok (s,_) ->
708f4980 1499 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
ae4735db 1500 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164
C
1501 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1502 let rec skip_fake = function
708f4980 1503 | [] -> []
34e49164
C
1504 | x::xs ->
1505 let ii = TH.info_of_tok x in
1506 match Ast_c.pinfo_of_info ii with
708f4980 1507 | Ast_c.OriginTok pi ->
34e49164 1508 let prev = Ast_c.parse_info_of_info ii in
708f4980
C
1509 let res = loop prev (strlen ii) [] xs in
1510 x::res
34e49164 1511 | _ -> x::skip_fake xs in
ae4735db 1512 skip_fake l
708f4980 1513
34e49164 1514
485bce71 1515(* ------------------------------------------------------------------------- *)
ae4735db 1516let fix_tokens_cpp2 ~macro_defs tokens =
708f4980 1517 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
ae4735db
C
1518
1519 begin
34e49164
C
1520 (* the order is important, if you put the action heuristic first,
1521 * then because of ifdef, can have not closed paren
ae4735db
C
1522 * and so may believe that higher order macro
1523 * and it will eat too much tokens. So important to do
34e49164 1524 * first the ifdef.
ae4735db 1525 *
34e49164
C
1526 * I recompute multiple times cleaner cos the mutable
1527 * can have be changed and so may have more comments
1528 * in the token original list.
ae4735db 1529 *
34e49164
C
1530 *)
1531
0708f913
C
1532 commentize_skip_start_to_end !tokens2;
1533
34e49164 1534 (* ifdef *)
ae4735db
C
1535 let cleaner = !tokens2 +> List.filter (fun x ->
1536 (* is_comment will also filter the TCommentCpp created in
0708f913 1537 * commentize_skip_start_to_end *)
34e49164
C
1538 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1539 ) in
708f4980 1540 let ifdef_grouped = TV.mk_ifdef cleaner in
485bce71
C
1541 set_ifdef_parenthize_info ifdef_grouped;
1542
34e49164
C
1543 find_ifdef_funheaders ifdef_grouped;
1544 find_ifdef_bool ifdef_grouped;
1545 find_ifdef_mid ifdef_grouped;
ae4735db 1546 (* change order ? maybe cparen_else heuristic make some of the funheaders
708f4980
C
1547 * heuristics irrelevant ?
1548 *)
ae4735db 1549 find_ifdef_cparen_else ifdef_grouped;
34e49164
C
1550 adjust_inifdef_include ifdef_grouped;
1551
1552
1553 (* macro 1 *)
1554 let cleaner = !tokens2 +> filter_cpp_stuff in
1555
708f4980
C
1556 let paren_grouped = TV.mk_parenthised cleaner in
1557 Cpp_token_c.apply_macro_defs
ae4735db
C
1558 ~msg_apply_known_macro
1559 ~msg_apply_known_macro_hint
708f4980 1560 macro_defs paren_grouped;
34e49164 1561 (* because the before field is used by apply_macro_defs *)
ae4735db 1562 tokens2 := TV.rebuild_tokens_extented !tokens2;
34e49164
C
1563
1564 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1565 * that after the "ifdef-simplification" phase.
1566 *)
ae4735db 1567 let cleaner = !tokens2 +> List.filter (fun x ->
34e49164
C
1568 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1569 ) in
1570
708f4980 1571 let brace_grouped = TV.mk_braceised cleaner in
34e49164
C
1572 set_context_tag brace_grouped;
1573
34e49164
C
1574 (* macro *)
1575 let cleaner = !tokens2 +> filter_cpp_stuff in
1576
708f4980
C
1577 let paren_grouped = TV.mk_parenthised cleaner in
1578 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
485bce71 1579 find_define_init_brace_paren paren_grouped;
34e49164
C
1580 find_string_macro_paren paren_grouped;
1581 find_macro_lineparen line_paren_grouped;
1582 find_macro_paren paren_grouped;
1583
1584
708f4980 1585 (* obsolete: actions ? not yet *)
34e49164 1586 let cleaner = !tokens2 +> filter_cpp_stuff in
708f4980 1587 let paren_grouped = TV.mk_parenthised cleaner in
34e49164 1588 find_actions paren_grouped;
ae4735db 1589
34e49164
C
1590
1591
708f4980 1592 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
34e49164
C
1593 end
1594
ae4735db 1595let time_hack1 ~macro_defs a =
708f4980 1596 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
34e49164 1597
ae4735db 1598let fix_tokens_cpp ~macro_defs a =
708f4980 1599 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
34e49164 1600
34e49164 1601
34e49164 1602
34e49164
C
1603
1604(*****************************************************************************)
1605(* Lexing with lookahead *)
1606(*****************************************************************************)
1607
1608(* Why using yet another parsing_hack technique ? The fix_xxx where do
ae4735db 1609 * some pre-processing on the full list of tokens is not enough ?
34e49164
C
1610 * No cos sometimes we need more contextual info, and even if
1611 * set_context() tries to give some contextual info, it's not completely
1612 * accurate so the following code give yet another alternative, yet another
1613 * chance to transform some tokens.
ae4735db 1614 *
34e49164
C
1615 * todo?: maybe could try to get rid of this technique. Maybe a better
1616 * set_context() would make possible to move this code using a fix_xx
1617 * technique.
ae4735db 1618 *
485bce71 1619 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
34e49164
C
1620 * it is more general to do it via my LALR(k) tech. Because here we can
1621 * transform some token give some context information. So sometimes it
1622 * makes sense to transform a token in one context, sometimes not, and
1623 * lex can not provide us this context information. Note that the order
ae4735db
C
1624 * in the pattern matching in lookahead is important. Do not cut/paste.
1625 *
34e49164
C
1626 * Note that in next there is only "clean" tokens, there is no comment
1627 * or space tokens. This is done by the caller.
ae4735db 1628 *
34e49164
C
1629 *)
1630
485bce71
C
1631open Lexer_parser (* for the fields of lexer_hint type *)
1632
1633let not_struct_enum = function
1634 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1635 | _ -> true
34e49164 1636
485bce71 1637
ae4735db 1638let lookahead2 ~pass next before =
34e49164
C
1639
1640 match (next, before) with
1641
1642 (*-------------------------------------------------------------*)
1643 (* typedef inference, parse_typedef_fix3 *)
1644 (*-------------------------------------------------------------*)
1645 (* xx xx *)
b1b2de81 1646 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
34e49164
C
1647 && ok_typedef s
1648 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
ae4735db 1649 ->
34e49164
C
1650 (* parse_typedef_fix3:
1651 * acpi_object acpi_object;
ae4735db 1652 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
34e49164
C
1653 * Le parser en interne a deja appelé le prochain token pour pouvoir
1654 * decider des choses.
1655 * => special case in lexer_heuristic, again
1656 *)
ae4735db
C
1657 if !Flag_parsing_c.debug_typedef
1658 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
34e49164
C
1659
1660 LP.disable_typedef();
1661
1662 msg_typedef s; LP.add_typedef_root s;
1663 TypedefIdent (s, i1)
1664
1665 (* xx yy *)
ae4735db 1666 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
34e49164
C
1667 && ok_typedef s
1668 ->
1669 (* && not_annot s2 BUT lead to false positive*)
1670
1671 msg_typedef s; LP.add_typedef_root s;
1672 TypedefIdent (s, i1)
1673
1674
1675 (* xx inline *)
ae4735db 1676 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
34e49164 1677 && ok_typedef s
ae4735db 1678 ->
34e49164
C
1679 msg_typedef s; LP.add_typedef_root s;
1680 TypedefIdent (s, i1)
1681
1682
1683 (* [,(] xx [,)] AND param decl *)
1684 | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ )
b1b2de81 1685 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
34e49164 1686 && ok_typedef s
ae4735db 1687 ->
34e49164
C
1688 msg_typedef s; LP.add_typedef_root s;
1689 TypedefIdent (s, i1)
1690
1691 (* xx* [,)] *)
1692 (* specialcase: [,(] xx* [,)] *)
1693 | (TIdent (s, i1)::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1694 when not_struct_enum before
1695 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1696 && ok_typedef s
ae4735db 1697 ->
34e49164
C
1698 msg_typedef s; LP.add_typedef_root s;
1699 TypedefIdent (s, i1)
1700
1701
1702 (* xx** [,)] *)
1703 (* specialcase: [,(] xx** [,)] *)
1704 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1705 when not_struct_enum before
1706 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1707 && ok_typedef s
ae4735db 1708 ->
34e49164
C
1709 msg_typedef s; LP.add_typedef_root s;
1710 TypedefIdent (s, i1)
1711
1712
1713
1714 (* xx const * USELESS because of next rule ? *)
ae4735db
C
1715 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1716 when not_struct_enum before
34e49164
C
1717 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1718 && ok_typedef s
1719 ->
1720
1721 msg_typedef s; LP.add_typedef_root s;
1722 TypedefIdent (s, i1)
ae4735db 1723
34e49164 1724 (* xx const *)
ae4735db
C
1725 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
1726 when not_struct_enum before
34e49164
C
1727 && ok_typedef s
1728 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1729 ->
1730
1731 msg_typedef s; LP.add_typedef_root s;
1732 TypedefIdent (s, i1)
1733
1734
1735 (* xx * const *)
ae4735db
C
1736 | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ )
1737 when not_struct_enum before
34e49164
C
1738 && ok_typedef s
1739 ->
1740 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1741
1742 msg_typedef s; LP.add_typedef_root s;
1743 TypedefIdent (s, i1)
1744
1745
1746 (* ( const xx) *)
485bce71 1747 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
34e49164
C
1748 ok_typedef s ->
1749 msg_typedef s; LP.add_typedef_root s;
1750 TypedefIdent (s, i1)
ae4735db 1751
34e49164
C
1752
1753
1754 (* ( xx ) [sizeof, ~] *)
485bce71 1755 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
34e49164
C
1756 when not_struct_enum before
1757 && ok_typedef s
ae4735db 1758 ->
34e49164
C
1759 msg_typedef s; LP.add_typedef_root s;
1760 TypedefIdent (s, i1)
1761
1762 (* [(,] xx [ AND parameterdeclaration *)
1763 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
b1b2de81 1764 when (LP.current_context() =*= LP.InParameter)
34e49164 1765 && ok_typedef s
ae4735db 1766 ->
34e49164
C
1767 msg_typedef s; LP.add_typedef_root s;
1768 TypedefIdent (s, i1)
ae4735db 1769
34e49164
C
1770 (*------------------------------------------------------------*)
1771 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
1772 (* but if have a '=', or ',' I think not *)
1773 (*------------------------------------------------------------*)
1774
1775 (* static xx * yy *)
ae4735db 1776 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ ,
485bce71 1777 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
ae4735db 1778 ok_typedef s
34e49164
C
1779 ->
1780 msg_typedef s; LP.add_typedef_root s;
1781 TypedefIdent (s, i1)
ae4735db 1782
34e49164
C
1783 (* TODO xx * yy ; AND in start of compound element *)
1784
1785
1786 (* xx * yy, AND in paramdecl *)
1787 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
b1b2de81 1788 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
ae4735db
C
1789 && ok_typedef s
1790 ->
34e49164
C
1791
1792 msg_typedef s; LP.add_typedef_root s;
1793 TypedefIdent (s, i1)
1794
1795
1796 (* xx * yy ; AND in Toplevel, except when have = before *)
1797 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
1798 TIdent (s, i1)
1799 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _)
485bce71 1800 when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ()))
ae4735db 1801 ->
34e49164
C
1802 msg_typedef s; LP.add_typedef_root s;
1803 TypedefIdent (s, i1)
1804
1805 (* xx * yy , AND in Toplevel *)
1806 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
b1b2de81 1807 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
ae4735db
C
1808 && ok_typedef s
1809 ->
34e49164
C
1810
1811 msg_typedef s; LP.add_typedef_root s;
1812 TypedefIdent (s, i1)
1813
1814 (* xx * yy ( AND in Toplevel *)
1815 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _)
ae4735db 1816 when not_struct_enum before
485bce71 1817 && (LP.is_top_or_struct (LP.current_context ()))
ae4735db 1818 && ok_typedef s
34e49164
C
1819 ->
1820 msg_typedef s; LP.add_typedef_root s;
1821 TypedefIdent (s, i1)
ae4735db 1822
34e49164
C
1823 (* xx * yy [ *)
1824 (* todo? enough ? cos in struct def we can have some expression ! *)
1825 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _)
ae4735db 1826 when not_struct_enum before &&
485bce71 1827 (LP.is_top_or_struct (LP.current_context ()))
ae4735db
C
1828 && ok_typedef s
1829 ->
34e49164
C
1830 msg_typedef s; LP.add_typedef_root s;
1831 TypedefIdent (s, i1)
1832
1833 (* u16: 10; in struct *)
1834 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
485bce71 1835 when (LP.is_top_or_struct (LP.current_context ()))
ae4735db
C
1836 && ok_typedef s
1837 ->
34e49164
C
1838 msg_typedef s; LP.add_typedef_root s;
1839 TypedefIdent (s, i1)
ae4735db 1840
34e49164
C
1841
1842 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
1843 (* YES cos at toplevel can have some expression !! for instance when *)
1844 (* enter in the dimension of an array *)
1845 (*
1846 | (TIdent s::TMul::TIdent s2::_ , _)
1847 when (take_safe 1 !passed_tok <> [Tstruct] &&
1848 (take_safe 1 !passed_tok <> [Tenum]))
1849 &&
ae4735db 1850 !LP._lexer_hint = Some LP.Toplevel ->
708f4980 1851 msg_typedef s; LP.add_typedef_root s;
34e49164
C
1852 TypedefIdent s
1853 *)
1854
1855 (* xx * yy = *)
1856 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _)
ae4735db
C
1857 when not_struct_enum before
1858 && ok_typedef s
34e49164
C
1859 ->
1860 msg_typedef s; LP.add_typedef_root s;
1861 TypedefIdent (s, i1)
1862
1863
1864 (* xx * yy) AND in paramdecl *)
1865 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _)
b1b2de81 1866 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
ae4735db 1867 && ok_typedef s
34e49164
C
1868 ->
1869 msg_typedef s; LP.add_typedef_root s;
1870 TypedefIdent (s, i1)
ae4735db 1871
34e49164
C
1872
1873 (* xx * yy; *) (* wrong ? *)
ae4735db
C
1874 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ ,
1875 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
1876 && ok_typedef s
34e49164
C
1877 ->
1878 msg_typedef s; LP.add_typedef_root s;
485bce71 1879 msg_maybe_dangereous_typedef s;
34e49164
C
1880 TypedefIdent (s, i1)
1881
1882
1883 (* xx * yy, and ';' before xx *) (* wrong ? *)
ae4735db 1884 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ ,
34e49164 1885 (TOBrace _| TPtVirg _)::_) when
ae4735db 1886 ok_typedef s
34e49164
C
1887 ->
1888 msg_typedef s; LP.add_typedef_root s;
1889 TypedefIdent (s, i1)
1890
1891
1892 (* xx_t * yy *)
ae4735db
C
1893 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _)
1894 when s ==~ regexp_typedef && not_struct_enum before
1895 (* struct user_info_t sometimes *)
1896 && ok_typedef s
1897 ->
34e49164
C
1898 msg_typedef s; LP.add_typedef_root s;
1899 TypedefIdent (s, i1)
1900
1901 (* xx ** yy *) (* wrong ? *)
1902 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
1903 when not_struct_enum before
1904 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 1905 && ok_typedef s
34e49164
C
1906 ->
1907 msg_typedef s; LP.add_typedef_root s;
1908 TypedefIdent (s, i1)
1909
1910 (* xx *** yy *)
1911 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
ae4735db
C
1912 when not_struct_enum before
1913 && ok_typedef s
34e49164
C
1914 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1915 ->
1916 msg_typedef s; LP.add_typedef_root s;
1917 TypedefIdent (s, i1)
1918
1919 (* xx ** ) *)
1920 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
ae4735db 1921 when not_struct_enum before
34e49164 1922 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 1923 && ok_typedef s
34e49164
C
1924 ->
1925 msg_typedef s; LP.add_typedef_root s;
1926 TypedefIdent (s, i1)
1927
1928
1929
1930 (* ----------------------------------- *)
ae4735db 1931 (* old: why not do like for other rules and start with TIdent ?
485bce71
C
1932 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
1933 * new: prefer now start with TIdent because otherwise the add_typedef_root
1934 * may have no effect if in second pass or if have disable the add_typedef.
1935 *)
34e49164
C
1936
1937 (* (xx) yy *)
ae4735db
C
1938 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
1939 (TOPar info)::x::_)
34e49164 1940 when not (TH.is_stuff_taking_parenthized x) &&
b1b2de81 1941 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3
ae4735db
C
1942 && ok_typedef s
1943 ->
34e49164
C
1944
1945 msg_typedef s; LP.add_typedef_root s;
485bce71
C
1946 (*TOPar info*)
1947 TypedefIdent (s, i1)
34e49164
C
1948
1949
ae4735db 1950 (* (xx) ( yy)
91eba41f
C
1951 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
1952 *)
1953 (*
ae4735db
C
1954 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
1955 when not (TH.is_stuff_taking_parenthized x)
1956 && ok_typedef s
34e49164
C
1957 ->
1958 msg_typedef s; LP.add_typedef_root s;
485bce71
C
1959 (* TOPar info *)
1960 TypedefIdent (s, i1)
91eba41f
C
1961 *)
1962 (* special case: = (xx) ( yy) *)
ae4735db 1963 | (TIdent (s, i1)::TCPar _::TOPar _::_ ,
91eba41f 1964 (TOPar info)::(TEq _ |TEqEq _)::_)
ae4735db 1965 when ok_typedef s
91eba41f
C
1966 ->
1967 msg_typedef s; LP.add_typedef_root s;
1968 (* TOPar info *)
1969 TypedefIdent (s, i1)
1970
34e49164
C
1971
1972 (* (xx * ) yy *)
ae4735db
C
1973 | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when
1974 ok_typedef s
1975 ->
34e49164 1976 msg_typedef s; LP.add_typedef_root s;
485bce71
C
1977 (*TOPar info*)
1978 TypedefIdent (s,i1)
1979
34e49164
C
1980
1981 (* (xx){ ... } constructor *)
ae4735db
C
1982 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
1983 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
1984 && ok_typedef s
34e49164
C
1985 ->
1986 msg_typedef s; LP.add_typedef_root s;
1987 TypedefIdent (s, i1)
1988
1989
1990 (* can have sizeof on expression
ae4735db 1991 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
708f4980 1992 msg_typedef s; LP.add_typedef_root s;
34e49164
C
1993 Tsizeof
1994 *)
91eba41f
C
1995
1996
1997 (* ----------------------------------- *)
1998 (* x ( *y )(params), function pointer *)
ae4735db 1999 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
34e49164 2000 when not_struct_enum before
ae4735db 2001 && ok_typedef s
34e49164
C
2002 ->
2003 msg_typedef s; LP.add_typedef_root s;
2004 TypedefIdent (s, i1)
2005
91eba41f 2006 (* x* ( *y )(params), function pointer 2 *)
ae4735db 2007 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
91eba41f 2008 when not_struct_enum before
ae4735db 2009 && ok_typedef s
91eba41f
C
2010 ->
2011 msg_typedef s; LP.add_typedef_root s;
2012 TypedefIdent (s, i1)
2013
34e49164
C
2014
2015 (*-------------------------------------------------------------*)
2016 (* CPP *)
2017 (*-------------------------------------------------------------*)
485bce71
C
2018 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2019 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
34e49164 2020 as x)
ae4735db
C
2021 ::_, _
2022 ->
485bce71 2023 (*
ae4735db 2024 if not !Flag_parsing_c.ifdef_to_if
34e49164 2025 then TCommentCpp (Ast_c.CppDirective, ii)
ae4735db 2026 else
485bce71 2027 *)
0708f913
C
2028 (* not !LP._lexer_hint.toplevel *)
2029 if !Flag_parsing_c.ifdef_directive_passing
708f4980 2030 || (pass >= 2)
0708f913 2031 then begin
ae4735db 2032
b1b2de81 2033 if (LP.current_context () =*= LP.InInitializer)
ae4735db 2034 then begin
0708f913
C
2035 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2036 incr Stat.nIfdefInitializer;
ae4735db 2037 end else begin
708f4980 2038 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
0708f913
C
2039 incr Stat.nIfdefPassing;
2040 end;
2041 TCommentCpp (Token_c.CppDirective, ii)
2042 end
2043 else x
ae4735db 2044
3a314143 2045 | (TUndef (ii) as x)::_, _
ae4735db 2046 ->
708f4980 2047 if (pass >= 2)
485bce71 2048 then begin
0708f913
C
2049 pr2_cpp("UNDEF: I treat it as comment");
2050 TCommentCpp (Token_c.CppDirective, ii)
113803cf
C
2051 end
2052 else x
2053
ae4735db
C
2054 | (TCppDirectiveOther (ii) as x)::_, _
2055 ->
708f4980 2056 if (pass >= 2)
113803cf 2057 then begin
0708f913
C
2058 pr2_cpp ("OTHER directive: I treat it as comment");
2059 TCommentCpp (Token_c.CppDirective, ii)
485bce71
C
2060 end
2061 else x
34e49164
C
2062
2063 (* If ident contain a for_each, then certainly a macro. But to be
2064 * sure should look if there is a '{' after the ')', but it requires
2065 * to count the '('. Because this can be expensive, we do that only
ae4735db 2066 * when the token contains "for_each".
34e49164 2067 *)
ae4735db 2068 | (TIdent (s, i1)::TOPar _::rest, _)
b1b2de81 2069 when not (LP.current_context () =*= LP.InTopLevel)
ae4735db
C
2070 (* otherwise a function such as static void loopback_enable(int i) {
2071 * will be considered as a loop
34e49164
C
2072 *)
2073 ->
2074
ae4735db 2075 if s ==~ regexp_foreach &&
34e49164 2076 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
ae4735db 2077
34e49164
C
2078 then begin
2079 msg_foreach s;
2080 TMacroIterator (s, i1)
2081 end
2082 else TIdent (s, i1)
2083
2084
ae4735db 2085
34e49164
C
2086 (*-------------------------------------------------------------*)
2087 | v::xs, _ -> v
2088 | _ -> raise Impossible
2089
ae4735db 2090let lookahead ~pass a b =
485bce71 2091 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
34e49164
C
2092
2093