Coccinelle release 0.2.5-rc7.
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
708f4980 19module TV = Token_views_c
34e49164
C
20module LP = Lexer_parser
21
485bce71 22module Stat = Parsing_stat
34e49164 23
ae4735db 24open Parser_c
34e49164 25
ae4735db 26open TV
708f4980 27
34e49164
C
28(*****************************************************************************)
29(* Some debugging functions *)
30(*****************************************************************************)
31
708f4980 32let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
113803cf 33
ae4735db 34let pr2_cpp s =
34e49164
C
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once ("CPP-" ^ s)
37
38
ae4735db 39let msg_gen cond is_known printer s =
34e49164
C
40 if cond
41 then
42 if not (!Flag_parsing_c.filter_msg)
43 then printer s
44 else
45 if not (is_known s)
46 then printer s
ae4735db 47
34e49164 48
485bce71
C
49(* In the following, there are some harcoded names of types or macros
50 * but they are not used by our heuristics! They are just here to
51 * enable to detect false positive by printing only the typedef/macros
52 * that we don't know yet. If we print everything, then we can easily
53 * get lost with too much verbose tracing information. So those
54 * functions "filter" some messages. So our heuristics are still good,
55 * there is no more (or not that much) hardcoded linux stuff.
34e49164 56 *)
485bce71 57
ae4735db
C
58let is_known_typdef =
59 (fun s ->
34e49164
C
60 (match s with
61 | "u_char" | "u_short" | "u_int" | "u_long"
ae4735db
C
62 | "u8" | "u16" | "u32" | "u64"
63 | "s8" | "s16" | "s32" | "s64"
64 | "__u8" | "__u16" | "__u32" | "__u64"
34e49164 65 -> true
ae4735db
C
66
67 | "acpi_handle"
68 | "acpi_status"
34e49164
C
69 -> true
70
ae4735db
C
71 | "FILE"
72 | "DIR"
34e49164 73 -> true
ae4735db 74
34e49164 75 | s when s =~ ".*_t$" -> true
ae4735db 76 | _ -> false
34e49164
C
77 )
78 )
485bce71 79
ae4735db
C
80(* note: cant use partial application with let msg_typedef =
81 * because it would compute msg_typedef at compile time when
485bce71
C
82 * the flag debug_typedef is always false
83 *)
4dfbc1c2 84let msg_typedef s n =
485bce71
C
85 incr Stat.nTypedefInfer;
86 msg_gen (!Flag_parsing_c.debug_typedef)
87 is_known_typdef
ae4735db 88 (fun s ->
4dfbc1c2
C
89 (*pr2_cpp (Printf.sprintf "TYPEDEF: promoting(%d): %s" n s)*)
90 pr2_cpp (Printf.sprintf "TYPEDEF: promoting: %s" s)
34e49164
C
91 )
92 s
93
485bce71
C
94let msg_maybe_dangereous_typedef s =
95 if not (is_known_typdef s)
ae4735db 96 then
113803cf
C
97 pr2
98 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
34e49164
C
99
100
101
ae4735db 102let msg_declare_macro s =
485bce71 103 incr Stat.nMacroDecl;
34e49164 104 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
105 (fun s ->
106 (match s with
34e49164 107 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
ae4735db 108 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
34e49164
C
109 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
110 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
111 | "SENSOR_DEVICE_ATTR"
112 | "LIST_HEAD"
113 | "DECLARE_WORK" | "DECLARE_TASKLET"
114 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
115 | "DECLARE_BITMAP"
116
117 -> true
118 (*
119 | s when s =~ "^DECLARE_.*" -> true
120 | s when s =~ ".*_ATTR$" -> true
121 | s when s =~ "^DEFINE_.*" -> true
122 *)
123
124 | _ -> false
125 )
126 )
127 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
128 s
34e49164 129
ae4735db
C
130
131let msg_foreach s =
485bce71 132 incr Stat.nIteratorHeuristic;
34e49164
C
133 pr2_cpp ("MACRO: found foreach: " ^ s)
134
135
ae4735db
C
136(* ??
137let msg_debug_macro s =
34e49164 138 pr2_cpp ("MACRO: found debug-macro: " ^ s)
485bce71 139*)
34e49164
C
140
141
ae4735db 142let msg_macro_noptvirg s =
485bce71 143 incr Stat.nMacroStmt;
34e49164
C
144 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
145
ae4735db 146let msg_macro_toplevel_noptvirg s =
485bce71 147 incr Stat.nMacroStmt;
34e49164
C
148 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
149
ae4735db 150let msg_macro_noptvirg_single s =
485bce71 151 incr Stat.nMacroStmt;
34e49164
C
152 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
153
154
485bce71
C
155
156
ae4735db 157let msg_macro_higher_order s =
485bce71 158 incr Stat.nMacroHigherOrder;
34e49164 159 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
160 (fun s ->
161 (match s with
34e49164
C
162 | "DBGINFO"
163 | "DBGPX"
164 | "DFLOW"
165 -> true
166 | _ -> false
167 )
168 )
169 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
170 s
171
172
ae4735db 173let msg_stringification s =
485bce71 174 incr Stat.nMacroString;
34e49164 175 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
176 (fun s ->
177 (match s with
34e49164
C
178 | "REVISION"
179 | "UTS_RELEASE"
180 | "SIZE_STR"
181 | "DMA_STR"
182 -> true
ae4735db 183 (* s when s =~ ".*STR.*" -> true *)
34e49164
C
184 | _ -> false
185 )
186 )
187 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
188 s
189
485bce71
C
190let msg_stringification_params s =
191 incr Stat.nMacroString;
192 pr2_cpp ("MACRO: string-macro with params : " ^ s)
193
194
195
ae4735db 196let msg_apply_known_macro s =
485bce71
C
197 incr Stat.nMacroExpand;
198 pr2_cpp ("MACRO: found known macro = " ^ s)
199
ae4735db 200let msg_apply_known_macro_hint s =
485bce71
C
201 incr Stat.nMacroHint;
202 pr2_cpp ("MACRO: found known macro hint = " ^ s)
203
204
34e49164 205
ae4735db
C
206
207let msg_ifdef_bool_passing is_ifdef_positif =
485bce71
C
208 incr Stat.nIfdefZero; (* of Version ? *)
209 if is_ifdef_positif
210 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
211 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
212
213
214let msg_ifdef_mid_something () =
215 incr Stat.nIfdefExprPassing;
216 pr2_cpp "found ifdef-mid-something"
217
218let msg_ifdef_funheaders () =
219 incr Stat.nIfdefFunheader;
220 ()
221
ae4735db 222let msg_ifdef_cparen_else () =
708f4980
C
223 incr Stat.nIfdefPassing;
224 pr2_cpp("found ifdef-cparen-else")
225
485bce71 226
ae4735db 227let msg_attribute s =
485bce71
C
228 incr Stat.nMacroAttribute;
229 pr2_cpp("ATTR:" ^ s)
ae4735db 230
485bce71
C
231
232
34e49164 233(*****************************************************************************)
485bce71 234(* The regexp and basic view definitions *)
34e49164
C
235(*****************************************************************************)
236
237(* opti: better to built then once and for all, especially regexp_foreach *)
238
239let regexp_macro = Str.regexp
240 "^[A-Z_][A-Z_0-9]*$"
241
242(* linuxext: *)
243let regexp_annot = Str.regexp
244 "^__.*$"
245
246(* linuxext: *)
247let regexp_declare = Str.regexp
248 ".*DECLARE.*"
249
250(* linuxext: *)
ae4735db 251let regexp_foreach = Str.regexp_case_fold
34e49164
C
252 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
253
254let regexp_typedef = Str.regexp
255 ".*_t$"
256
34e49164
C
257let false_typedef = [
258 "printk";
259 ]
260
34e49164 261
485bce71
C
262let ok_typedef s = not (List.mem s false_typedef)
263
ae4735db 264let not_annot s =
485bce71
C
265 not (s ==~ regexp_annot)
266
267
34e49164 268
34e49164 269
485bce71
C
270(*****************************************************************************)
271(* Helpers *)
272(*****************************************************************************)
273
485bce71 274(* ------------------------------------------------------------------------- *)
ae4735db
C
275(* the pair is the status of '()' and '{}', ex: (-1,0)
276 * if too much ')' and good '{}'
277 * could do for [] too ?
485bce71
C
278 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
279 * then if have ifdef, then certainly can lead to a problem.
280 *)
708f4980 281let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
ae4735db 282 fun xs ->
485bce71 283 let cnt_paren, cnt_brace = ref 0, ref 0 in
ae4735db 284 xs +> TV.iter_token_ifdef (fun x ->
485bce71
C
285 (match x.tok with
286 | x when TH.is_opar x -> incr cnt_paren
287 | TOBrace _ -> incr cnt_brace
288 | x when TH.is_cpar x -> decr cnt_paren
289 | TCBrace _ -> decr cnt_brace
290 | _ -> ()
291 )
292 );
293 !cnt_paren, !cnt_brace
294
295
296(* ------------------------------------------------------------------------- *)
297let forLOOKAHEAD = 30
298
ae4735db 299
485bce71 300(* look if there is a '{' just after the closing ')', and handling the
ae4735db
C
301 * possibility to have nested expressions inside nested parenthesis
302 *
485bce71
C
303 * todo: use indentation instead of premier(statement) ?
304 *)
ae4735db 305let rec is_really_foreach xs =
485bce71
C
306 let rec is_foreach_aux = function
307 | [] -> false, []
308 | TCPar _::TOBrace _::xs -> true, xs
309 (* the following attempts to handle the cases where there is a
310 single statement in the body of the loop. undoubtedly more
ae4735db 311 cases are needed.
485bce71
C
312 todo: premier(statement) - suivant(funcall)
313 *)
314 | TCPar _::TIdent _::xs -> true, xs
315 | TCPar _::Tif _::xs -> true, xs
316 | TCPar _::Twhile _::xs -> true, xs
317 | TCPar _::Tfor _::xs -> true, xs
318 | TCPar _::Tswitch _::xs -> true, xs
319 | TCPar _::Treturn _::xs -> true, xs
320
321
322 | TCPar _::xs -> false, xs
ae4735db 323 | TOPar _::xs ->
485bce71
C
324 let (_, xs') = is_foreach_aux xs in
325 is_foreach_aux xs'
326 | x::xs -> is_foreach_aux xs
327 in
328 is_foreach_aux xs +> fst
329
330
331(* ------------------------------------------------------------------------- *)
ae4735db 332let set_ifdef_token_parenthize_info cnt x =
485bce71
C
333 match x with
334 | TIfdef (tag, _)
335 | TIfdefelse (tag, _)
336 | TIfdefelif (tag, _)
337 | TEndif (tag, _)
338
339 | TIfdefBool (_, tag, _)
ae4735db 340 | TIfdefMisc (_, tag, _)
485bce71 341 | TIfdefVersion (_, tag, _)
ae4735db 342 ->
485bce71
C
343 tag := Some cnt;
344
345 | _ -> raise Impossible
485bce71
C
346
347
485bce71 348
ae4735db 349let ifdef_paren_cnt = ref 0
485bce71 350
ae4735db
C
351
352let rec set_ifdef_parenthize_info xs =
485bce71
C
353 xs +> List.iter (function
354 | NotIfdefLine xs -> ()
ae4735db
C
355 | Ifdefbool (_, xxs, info_ifdef)
356 | Ifdef (xxs, info_ifdef) ->
357
485bce71
C
358 incr ifdef_paren_cnt;
359 let total_directives = List.length info_ifdef in
360
ae4735db 361 info_ifdef +> List.iter (fun x ->
485bce71
C
362 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
363 x.tok);
364 xxs +> List.iter set_ifdef_parenthize_info
365 )
366
367
978fd7e5
C
368(*****************************************************************************)
369(* The parsing hack for #define *)
370(*****************************************************************************)
371
ae4735db 372(* To parse macro definitions I need to do some tricks
978fd7e5
C
373 * as some information can be get only at the lexing level. For instance
374 * the space after the name of the macro in '#define foo (x)' is meaningful
375 * but the grammar can not get this information. So define_ident below
376 * look at such space and generate a special TOpardefine. In a similar
377 * way macro definitions can contain some antislash and newlines
ae4735db
C
378 * and the grammar need to know where the macro ends (which is
379 * a line-level and so low token-level information). Hence the
978fd7e5 380 * function 'define_line' below and the TDefEol.
ae4735db
C
381 *
382 * update: TDefEol is handled in a special way at different places,
978fd7e5
C
383 * a little bit like EOF, especially for error recovery, so this
384 * is an important token that should not be retagged!
ae4735db
C
385 *
386 *
387 * ugly hack, a better solution perhaps would be to erase TDefEOL
388 * from the Ast and list of tokens in parse_c.
389 *
978fd7e5 390 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
ae4735db 391 *
978fd7e5
C
392 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
393 * is where is the \n TCommentSpace. Normally there is always a last token
394 * to synchronize on, either EOF or the token of the next toplevel.
ae4735db 395 * In the case of the #define we got in list of token
978fd7e5
C
396 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
397 * not synchronize on it and so we will not print the "\n".
398 * A solution would be to put the TDefEOL before the "\n".
c491d8ee 399 * (jll: tried to do this, see the comment "Put end of line..." below)
ae4735db
C
400 *
401 * todo?: could put a ExpandedTok for that ?
978fd7e5 402 *)
ae4735db
C
403let mark_end_define ii =
404 let ii' =
405 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
406 Common.str = "";
978fd7e5
C
407 Common.charpos = Ast_c.pos_of_info ii + 1
408 };
409 cocci_tag = ref Ast_c.emptyAnnot;
410 comments_tag = ref Ast_c.emptyComments;
ae4735db 411 }
978fd7e5
C
412 in
413 TDefEOL (ii')
414
415(* put the TDefEOL at the good place *)
ae4735db 416let rec define_line_1 acc xs =
978fd7e5
C
417 match xs with
418 | [] -> List.rev acc
419 | TDefine ii::xs ->
420 let line = Ast_c.line_of_info ii in
421 let acc = (TDefine ii) :: acc in
422 define_line_2 acc line ii xs
3a314143
C
423 | TUndef ii::xs ->
424 let line = Ast_c.line_of_info ii in
425 let acc = (TUndef ii) :: acc in
426 define_line_2 acc line ii xs
978fd7e5
C
427 | TCppEscapedNewline ii::xs ->
428 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
429 pr2 (Ast_c.strloc_of_info ii);
430 let acc = (TCommentSpace ii) :: acc in
431 define_line_1 acc xs
432 | x::xs -> define_line_1 (x::acc) xs
433
ae4735db
C
434and define_line_2 acc line lastinfo xs =
435 match xs with
436 | [] ->
978fd7e5 437 (* should not happened, should meet EOF before *)
ae4735db 438 pr2 "PB: WEIRD";
978fd7e5 439 List.rev (mark_end_define lastinfo::acc)
ae4735db 440 | x::xs ->
978fd7e5
C
441 let line' = TH.line_of_tok x in
442 let info = TH.info_of_tok x in
443
444 (match x with
ae4735db 445 | EOF ii ->
978fd7e5
C
446 let acc = (mark_end_define lastinfo) :: acc in
447 let acc = (EOF ii) :: acc in
448 define_line_1 acc xs
ae4735db 449 | TCppEscapedNewline ii ->
978fd7e5
C
450 if (line' <> line) then pr2 "PB: WEIRD: not same line number";
451 let acc = (TCommentSpace ii) :: acc in
452 define_line_2 acc (line+1) info xs
ae4735db 453 | x ->
978fd7e5 454 if line' =|= line
ae4735db 455 then define_line_2 (x::acc) line info xs
c491d8ee
C
456 else
457 (* Put end of line token before the newline. A newline at least
458 must be there because the line changed and because we saw a
459 #define previously to get to this function at all *)
460 define_line_1
461 ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
462 (x::xs)
978fd7e5
C
463 )
464
ae4735db 465let rec define_ident acc xs =
978fd7e5
C
466 match xs with
467 | [] -> List.rev acc
3a314143
C
468 | TUndef ii::xs ->
469 let acc = TUndef ii :: acc in
470 (match xs with
471 TCommentSpace i1::TIdent (s,i2)::xs ->
472 let acc = (TCommentSpace i1) :: acc in
473 let acc = (TIdentDefine (s,i2)) :: acc in
474 define_ident acc xs
475 | _ ->
476 pr2 "WEIRD: weird #define body";
477 define_ident acc xs
478 )
ae4735db 479 | TDefine ii::xs ->
978fd7e5
C
480 let acc = TDefine ii :: acc in
481 (match xs with
ae4735db 482 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
978fd7e5
C
483 (* Change also the kind of TIdent to avoid bad interaction
484 * with other parsing_hack tricks. For instant if keep TIdent then
485 * the stringication algo can believe the TIdent is a string-macro.
486 * So simpler to change the kind of the ident too.
487 *)
ae4735db 488 (* if TOParDefine sticked to the ident, then
978fd7e5
C
489 * it's a macro-function. Change token to avoid ambiguity
490 * between #define foo(x) and #define foo (x)
491 *)
492 let acc = (TCommentSpace i1) :: acc in
493 let acc = (TIdentDefine (s,i2)) :: acc in
494 let acc = (TOParDefine i3) :: acc in
495 define_ident acc xs
496
ae4735db 497 | TCommentSpace i1::TIdent (s,i2)::xs ->
978fd7e5
C
498 let acc = (TCommentSpace i1) :: acc in
499 let acc = (TIdentDefine (s,i2)) :: acc in
500 define_ident acc xs
501
502 (* bugfix: ident of macro (as well as params, cf below) can be tricky
503 * note, do we need to subst in the body of the define ? no cos
504 * here the issue is the name of the macro, as in #define inline,
ae4735db 505 * so obviously the name of this macro will not be used in its
978fd7e5
C
506 * body (it would be a recursive macro, which is forbidden).
507 *)
ae4735db
C
508
509 | TCommentSpace i1::t::xs ->
978fd7e5
C
510
511 let s = TH.str_of_tok t in
512 let ii = TH.info_of_tok t in
513 if s ==~ Common.regexp_alpha
514 then begin
515 pr2 (spf "remapping: %s to an ident in macro name" s);
516 let acc = (TCommentSpace i1) :: acc in
517 let acc = (TIdentDefine (s,ii)) :: acc in
518 define_ident acc xs
519 end
520 else begin
ae4735db 521 pr2 "WEIRD: weird #define body";
978fd7e5
C
522 define_ident acc xs
523 end
524
ae4735db
C
525 | _ ->
526 pr2 "WEIRD: weird #define body";
978fd7e5
C
527 define_ident acc xs
528 )
529 | x::xs ->
530 let acc = x :: acc in
531 define_ident acc xs
978fd7e5
C
532
533
ae4735db
C
534
535let fix_tokens_define2 xs =
978fd7e5
C
536 define_ident [] (define_line_1 [] xs)
537
ae4735db 538let fix_tokens_define a =
978fd7e5 539 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
ae4735db 540
978fd7e5
C
541
542
543
544
545(* ------------------------------------------------------------------------- *)
546(* Other parsing hacks related to cpp, Include/Define hacks *)
547(* ------------------------------------------------------------------------- *)
548
549(* Sometimes I prefer to generate a single token for a list of things in the
550 * lexer so that if I have to passed them, like for passing TInclude then
ae4735db
C
551 * it's easy. Also if I don't do a single token, then I need to
552 * parse the rest which may not need special stuff, like detecting
978fd7e5
C
553 * end of line which the parser is not really ready for. So for instance
554 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
ae4735db 555 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
978fd7e5
C
556 * but this kind of token is valid only after a #include and the
557 * lexing and parsing rules are different for such tokens so not that
558 * easy to parse such things in parser_c.mly. Hence the following hacks.
ae4735db 559 *
978fd7e5
C
560 * less?: maybe could get rid of this like I get rid of some of fix_define.
561 *)
562
563(* helpers *)
564
565(* used to generate new token from existing one *)
566let new_info posadd str ii =
ae4735db
C
567 { Ast_c.pinfo =
568 Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
978fd7e5
C
569 charpos = Ast_c.pos_of_info ii + posadd;
570 str = str;
571 column = Ast_c.col_of_info ii + posadd;
572 };
573 (* must generate a new ref each time, otherwise share *)
574 cocci_tag = ref Ast_c.emptyAnnot;
575 comments_tag = ref Ast_c.emptyComments;
576 }
577
578
ae4735db 579let rec comment_until_defeol xs =
978fd7e5 580 match xs with
ae4735db 581 | [] ->
978fd7e5
C
582 (* job not done in Cpp_token_c.define_parse ? *)
583 failwith "cant find end of define token TDefEOL"
ae4735db 584 | x::xs ->
978fd7e5 585 (match x with
ae4735db 586 | Parser_c.TDefEOL i ->
978fd7e5
C
587 Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
588 ::xs
ae4735db
C
589 | _ ->
590 let x' =
978fd7e5
C
591 (* bugfix: otherwise may lose a TComment token *)
592 if TH.is_real_comment x
593 then x
594 else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
595 in
596 x'::comment_until_defeol xs
597 )
598
ae4735db
C
599let drop_until_defeol xs =
600 List.tl
978fd7e5
C
601 (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
602
603
604
605(* ------------------------------------------------------------------------- *)
606(* returns a pair (replaced token, list of next tokens) *)
607(* ------------------------------------------------------------------------- *)
608
ae4735db
C
609let tokens_include (info, includes, filename, inifdef) =
610 Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
611 [Parser_c.TIncludeFilename
978fd7e5
C
612 (filename, (new_info (String.length includes) filename info))
613 ]
614
615
616
617
485bce71
C
618(*****************************************************************************)
619(* CPP handling: macros, ifdefs, macros defs *)
620(*****************************************************************************)
621
0708f913
C
622(* ------------------------------------------------------------------------- *)
623(* special skip_start skip_end handling *)
624(* ------------------------------------------------------------------------- *)
625
626(* note: after this normally the token list should not contain any more the
627 * TCommentSkipTagStart and End tokens.
628 *)
629let rec commentize_skip_start_to_end xs =
630 match xs with
631 | [] -> ()
ae4735db 632 | x::xs ->
0708f913 633 (match x with
ae4735db
C
634 | {tok = TCommentSkipTagStart info} ->
635 (try
636 let (before, x2, after) =
0708f913
C
637 xs +> Common.split_when (function
638 | {tok = TCommentSkipTagEnd _ } -> true
ae4735db 639 | _ -> false
0708f913
C
640 )
641 in
642 let topass = x::x2::before in
ae4735db 643 topass +> List.iter (fun tok ->
0708f913
C
644 set_as_comment Token_c.CppPassingExplicit tok
645 );
646 commentize_skip_start_to_end after
ae4735db 647 with Not_found ->
0708f913
C
648 failwith "could not find end of skip_start special comment"
649 )
ae4735db 650 | {tok = TCommentSkipTagEnd info} ->
0708f913 651 failwith "found skip_end comment but no skip_start"
ae4735db 652 | _ ->
0708f913
C
653 commentize_skip_start_to_end xs
654 )
ae4735db
C
655
656
0708f913
C
657
658
34e49164
C
659(* ------------------------------------------------------------------------- *)
660(* ifdef keeping/passing *)
661(* ------------------------------------------------------------------------- *)
662
663(* #if 0, #if 1, #if LINUX_VERSION handling *)
ae4735db
C
664let rec find_ifdef_bool xs =
665 xs +> List.iter (function
34e49164 666 | NotIfdefLine _ -> ()
ae4735db 667 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
485bce71
C
668
669 msg_ifdef_bool_passing is_ifdef_positif;
34e49164
C
670
671 (match xxs with
672 | [] -> raise Impossible
ae4735db 673 | firstclause::xxs ->
0708f913 674 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 675
34e49164 676 if is_ifdef_positif
ae4735db 677 then xxs +> List.iter
0708f913 678 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
34e49164 679 else begin
0708f913 680 firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal);
34e49164
C
681 (match List.rev xxs with
682 (* keep only last *)
ae4735db
C
683 | last::startxs ->
684 startxs +> List.iter
0708f913 685 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
34e49164
C
686 | [] -> (* not #else *) ()
687 );
688 end
689 );
ae4735db 690
34e49164
C
691 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
692 )
693
694
695
34e49164
C
696let thresholdIfdefSizeMid = 6
697
698(* infer ifdef involving not-closed expressions/statements *)
ae4735db
C
699let rec find_ifdef_mid xs =
700 xs +> List.iter (function
34e49164 701 | NotIfdefLine _ -> ()
ae4735db
C
702 | Ifdef (xxs, info_ifdef_stmt) ->
703 (match xxs with
34e49164
C
704 | [] -> raise Impossible
705 | [first] -> ()
ae4735db 706 | first::second::rest ->
34e49164 707 (* don't analyse big ifdef *)
ae4735db
C
708 if xxs +> List.for_all
709 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
34e49164 710 (* don't want nested ifdef *)
ae4735db
C
711 xxs +> List.for_all (fun xs ->
712 xs +> List.for_all
34e49164
C
713 (function NotIfdefLine _ -> true | _ -> false)
714 )
ae4735db
C
715
716 then
34e49164 717 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
ae4735db
C
718 let cnt1, cnt2 = List.hd counts in
719 if cnt1 <> 0 || cnt2 <> 0 &&
b1b2de81 720 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
34e49164 721 (*
ae4735db
C
722 if counts +> List.exists (fun (cnt1, cnt2) ->
723 cnt1 <> 0 || cnt2 <> 0
724 )
34e49164
C
725 *)
726 then begin
485bce71
C
727 msg_ifdef_mid_something();
728
34e49164 729 (* keep only first, treat the rest as comment *)
0708f913 730 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 731 (second::rest) +> List.iter
0708f913 732 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
34e49164 733 end
ae4735db 734
34e49164
C
735 );
736 List.iter find_ifdef_mid xxs
ae4735db 737
34e49164 738 (* no need complex analysis for ifdefbool *)
ae4735db 739 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
34e49164 740 List.iter find_ifdef_mid xxs
ae4735db
C
741
742
34e49164
C
743 )
744
745
746let thresholdFunheaderLimit = 4
747
748(* ifdef defining alternate function header, type *)
749let rec find_ifdef_funheaders = function
750 | [] -> ()
ae4735db 751 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
34e49164
C
752
753 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
ae4735db 754 | Ifdef
34e49164
C
755 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
756 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
ae4735db 757 ], info_ifdef_stmt
34e49164
C
758 )
759 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
ae4735db 760 ::xs
34e49164
C
761 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
762 List.length ifdefblock2 <= thresholdFunheaderLimit
ae4735db 763 ->
34e49164 764 find_ifdef_funheaders xs;
485bce71
C
765
766 msg_ifdef_funheaders ();
0708f913 767 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 768 let all_toks = [xline2] @ line2 in
0708f913
C
769 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ;
770 ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
771
772 (* ifdef with nested ifdef *)
ae4735db 773 | Ifdef
34e49164 774 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
ae4735db 775 [Ifdef
34e49164
C
776 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
777 [NotIfdefLine (({col = 0} as xline3)::line3)];
778 ], info_ifdef_stmt2
779 )
780 ]
ae4735db 781 ], info_ifdef_stmt
34e49164
C
782 )
783 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
784 ::xs
785 ->
34e49164 786 find_ifdef_funheaders xs;
485bce71
C
787
788 msg_ifdef_funheaders ();
0708f913
C
789 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
790 info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 791 let all_toks = [xline2;xline3] @ line2 @ line3 in
0708f913 792 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
793
794 (* ifdef with elseif *)
ae4735db 795 | Ifdef
34e49164
C
796 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
797 [NotIfdefLine (({col = 0} as xline2)::line2)];
798 [NotIfdefLine (({col = 0} as xline3)::line3)];
ae4735db 799 ], info_ifdef_stmt
34e49164
C
800 )
801 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
802 ::xs
803 ->
34e49164 804 find_ifdef_funheaders xs;
485bce71
C
805
806 msg_ifdef_funheaders ();
0708f913 807 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 808 let all_toks = [xline2;xline3] @ line2 @ line3 in
0708f913 809 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError)
ae4735db 810
485bce71 811 (* recurse *)
ae4735db
C
812 | Ifdef (xxs,info_ifdef_stmt)::xs
813 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
814 List.iter find_ifdef_funheaders xxs;
34e49164 815 find_ifdef_funheaders xs
ae4735db 816
34e49164
C
817
818
485bce71 819(* ?? *)
ae4735db
C
820let rec adjust_inifdef_include xs =
821 xs +> List.iter (function
34e49164 822 | NotIfdefLine _ -> ()
ae4735db
C
823 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
824 xxs +> List.iter (iter_token_ifdef (fun tokext ->
34e49164 825 match tokext.tok with
ae4735db 826 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
34e49164
C
827 inifdef_ref := true;
828 | _ -> ()
829 ));
830 )
831
832
833
34e49164 834
485bce71 835
34e49164
C
836
837
ae4735db
C
838let rec find_ifdef_cparen_else xs =
839 let rec aux xs =
840 xs +> List.iter (function
708f4980 841 | NotIfdefLine _ -> ()
ae4735db
C
842 | Ifdef (xxs, info_ifdef_stmt) ->
843 (match xxs with
708f4980
C
844 | [] -> raise Impossible
845 | [first] -> ()
ae4735db 846 | first::second::rest ->
34e49164 847
708f4980 848 (* found a closing ')' just after the #else *)
34e49164 849
708f4980
C
850 (* Too bad ocaml does not support better list pattern matching
851 * a la Prolog-III where can match the end of lists.
852 *)
ae4735db
C
853 let condition =
854 if List.length first = 0 then false
855 else
708f4980
C
856 let last_line = Common.last first in
857 match last_line with
ae4735db
C
858 | NotIfdefLine xs ->
859 if List.length xs = 0 then false
860 else
708f4980
C
861 let last_tok = Common.last xs in
862 TH.is_cpar last_tok.tok
ae4735db 863 | Ifdef _ | Ifdefbool _ -> false
708f4980
C
864 in
865 if condition then begin
866 msg_ifdef_cparen_else();
34e49164 867
708f4980
C
868 (* keep only first, treat the rest as comment *)
869 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 870 (second::rest) +> List.iter
708f4980
C
871 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
872 end
ae4735db 873
708f4980
C
874 );
875 List.iter aux xxs
ae4735db 876
708f4980 877 (* no need complex analysis for ifdefbool *)
ae4735db 878 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
708f4980
C
879 List.iter aux xxs
880 )
881 in aux xs
34e49164
C
882
883
708f4980
C
884(* ------------------------------------------------------------------------- *)
885(* cpp-builtin part2, macro, using standard.h or other defs *)
886(* ------------------------------------------------------------------------- *)
34e49164 887
ae4735db 888(* now in cpp_token_c.ml *)
34e49164
C
889
890(* ------------------------------------------------------------------------- *)
891(* stringification *)
892(* ------------------------------------------------------------------------- *)
893
ae4735db 894let rec find_string_macro_paren xs =
34e49164
C
895 match xs with
896 | [] -> ()
ae4735db
C
897 | Parenthised(xxs, info_parens)::xs ->
898 xxs +> List.iter (fun xs ->
899 if xs +> List.exists
485bce71 900 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
ae4735db
C
901 xs +> List.for_all
902 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
34e49164
C
903 true | _ -> false)
904 then
ae4735db 905 xs +> List.iter (fun tok ->
34e49164 906 match tok with
ae4735db 907 | PToken({tok = TIdent (s,_)} as id) ->
34e49164 908 msg_stringification s;
485bce71 909 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
910 | _ -> ()
911 )
ae4735db 912 else
34e49164
C
913 find_string_macro_paren xs
914 );
915 find_string_macro_paren xs
ae4735db 916 | PToken(tok)::xs ->
34e49164 917 find_string_macro_paren xs
ae4735db 918
34e49164
C
919
920(* ------------------------------------------------------------------------- *)
921(* macro2 *)
922(* ------------------------------------------------------------------------- *)
923
924(* don't forget to recurse in each case *)
ae4735db 925let rec find_macro_paren xs =
34e49164
C
926 match xs with
927 | [] -> ()
ae4735db 928
34e49164
C
929 (* attribute *)
930 | PToken ({tok = Tattribute _} as id)
931 ::Parenthised (xxs,info_parens)
932 ::xs
ae4735db 933 ->
34e49164 934 pr2_cpp ("MACRO: __attribute detected ");
ae4735db 935 [Parenthised (xxs, info_parens)] +>
0708f913
C
936 iter_token_paren (set_as_comment Token_c.CppAttr);
937 set_as_comment Token_c.CppAttr id;
34e49164
C
938 find_macro_paren xs
939
978fd7e5
C
940 | PToken ({tok = TattributeNoarg _} as id)
941 ::xs
ae4735db 942 ->
978fd7e5
C
943 pr2_cpp ("MACRO: __attributenoarg detected ");
944 set_as_comment Token_c.CppAttr id;
945 find_macro_paren xs
946
485bce71 947(*
708f4980 948 (* attribute cpp, __xxx id *)
485bce71 949 | PToken ({tok = TIdent (s,i1)} as id)
708f4980 950 ::PToken ({tok = TIdent (s2, i2)} as id2)
485bce71 951 ::xs when s ==~ regexp_annot
ae4735db 952 ->
485bce71
C
953 msg_attribute s;
954 id.tok <- TMacroAttr (s, i1);
708f4980 955 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
485bce71 956
708f4980
C
957 (* attribute cpp, id __xxx *)
958 | PToken ({tok = TIdent (s,i1)} as _id)
959 ::PToken ({tok = TIdent (s2, i2)} as id2)
960 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
ae4735db 961 ->
485bce71 962 msg_attribute s2;
708f4980
C
963 id2.tok <- TMacroAttr (s2, i2);
964 find_macro_paren xs
965
966 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
967 ::PToken ({tok = TIdent (s,i1)} as attr)
968 ::xs when s ==~ regexp_annot
ae4735db 969 ->
708f4980
C
970 pr2_cpp ("storage attribute: " ^ s);
971 attr.tok <- TMacroAttrStorage (s,i1);
972 (* recurse, may have other storage attributes *)
973 find_macro_paren (PToken (tok1)::xs)
ae4735db 974
708f4980 975
485bce71
C
976*)
977
978 (* storage attribute *)
979 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
ae4735db
C
980 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
981 ->
485bce71
C
982 pr2_cpp ("storage attribute: " ^ s);
983 attr.tok <- TMacroAttrStorage (s,i1);
984 (* recurse, may have other storage attributes *)
985 find_macro_paren (PToken (tok1)::xs)
708f4980 986
485bce71 987
34e49164 988 (* stringification
ae4735db 989 *
34e49164 990 * the order of the matching clause is important
ae4735db 991 *
34e49164
C
992 *)
993
994 (* string macro with params, before case *)
485bce71 995 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
34e49164 996 ::Parenthised (xxs, info_parens)
ae4735db 997 ::xs ->
485bce71
C
998
999 msg_stringification_params s;
1000 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1001 [Parenthised (xxs, info_parens)] +>
0708f913 1002 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1003 find_macro_paren xs
1004
1005 (* after case *)
1006 | PToken ({tok = TIdent (s,_)} as id)
1007 ::Parenthised (xxs, info_parens)
485bce71 1008 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1009 ::xs ->
485bce71
C
1010
1011 msg_stringification_params s;
1012 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1013 [Parenthised (xxs, info_parens)] +>
0708f913 1014 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1015 find_macro_paren xs
1016
1017
1018 (* for the case where the string is not inside a funcall, but
1019 * for instance in an initializer.
1020 *)
ae4735db 1021
34e49164 1022 (* string macro variable, before case *)
485bce71 1023 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
ae4735db 1024 ::xs ->
485bce71 1025
34e49164 1026 msg_stringification s;
485bce71 1027 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1028 find_macro_paren xs
1029
1030 (* after case *)
485bce71
C
1031 | PToken ({tok = TIdent (s,_)} as id)
1032 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1033 ::xs ->
485bce71 1034
34e49164 1035 msg_stringification s;
485bce71 1036 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1037 find_macro_paren xs
1038
1039
ae4735db 1040
34e49164
C
1041
1042
1043 (* recurse *)
ae4735db
C
1044 | (PToken x)::xs -> find_macro_paren xs
1045 | (Parenthised (xxs, info_parens))::xs ->
34e49164
C
1046 xxs +> List.iter find_macro_paren;
1047 find_macro_paren xs
1048
1049
1050
1051
1052
1053(* don't forget to recurse in each case *)
ae4735db 1054let rec find_macro_lineparen xs =
34e49164
C
1055 match xs with
1056 | [] -> ()
1057
1058 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
ae4735db 1059 | (Line
34e49164
C
1060 (
1061 [PToken ({tok = Tstatic _});
1062 PToken ({tok = TIdent (s,_)} as macro);
1063 Parenthised (xxs,info_parens);
1064 PToken ({tok = TPtVirg _});
ae4735db 1065 ]
34e49164 1066 ))
ae4735db
C
1067 ::xs
1068 when (s ==~ regexp_macro) ->
485bce71 1069
34e49164
C
1070 msg_declare_macro s;
1071 let info = TH.info_of_tok macro.tok in
1072 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1073
1074 find_macro_lineparen (xs)
1075
1076 (* the static const case *)
ae4735db 1077 | (Line
34e49164
C
1078 (
1079 [PToken ({tok = Tstatic _});
1080 PToken ({tok = Tconst _} as const);
1081 PToken ({tok = TIdent (s,_)} as macro);
1082 Parenthised (xxs,info_parens);
1083 PToken ({tok = TPtVirg _});
ae4735db 1084 ]
34e49164
C
1085 (*as line1*)
1086
1087 ))
ae4735db
C
1088 ::xs
1089 when (s ==~ regexp_macro) ->
485bce71 1090
34e49164
C
1091 msg_declare_macro s;
1092 let info = TH.info_of_tok macro.tok in
1093 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
ae4735db
C
1094
1095 (* need retag this const, otherwise ambiguity in grammar
34e49164
C
1096 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1097 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1098 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1099 storage_class_spec : Tstatic . (137)
1100 *)
1101 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1102
1103 find_macro_lineparen (xs)
1104
1105
1106 (* same but without trailing ';'
ae4735db 1107 *
34e49164
C
1108 * I do not put the final ';' because it can be on a multiline and
1109 * because of the way mk_line is coded, we will not have access to
1110 * this ';' on the next line, even if next to the ')' *)
ae4735db 1111 | (Line
34e49164
C
1112 ([PToken ({tok = Tstatic _});
1113 PToken ({tok = TIdent (s,_)} as macro);
1114 Parenthised (xxs,info_parens);
ae4735db 1115 ]
34e49164 1116 ))
ae4735db
C
1117 ::xs
1118 when s ==~ regexp_macro ->
34e49164
C
1119
1120 msg_declare_macro s;
1121 let info = TH.info_of_tok macro.tok in
1122 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1123
1124 find_macro_lineparen (xs)
1125
1126
1127
1128
1129 (* on multiple lines *)
ae4735db 1130 | (Line
34e49164
C
1131 (
1132 (PToken ({tok = Tstatic _})::[]
1133 )))
ae4735db 1134 ::(Line
34e49164
C
1135 (
1136 [PToken ({tok = TIdent (s,_)} as macro);
1137 Parenthised (xxs,info_parens);
1138 PToken ({tok = TPtVirg _});
1139 ]
ae4735db 1140 )
34e49164 1141 )
ae4735db
C
1142 ::xs
1143 when (s ==~ regexp_macro) ->
485bce71 1144
34e49164
C
1145 msg_declare_macro s;
1146 let info = TH.info_of_tok macro.tok in
1147 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1148
1149 find_macro_lineparen (xs)
1150
1151
ae4735db
C
1152 (* linuxext: ex: DECLARE_BITMAP();
1153 *
34e49164
C
1154 * Here I use regexp_declare and not regexp_macro because
1155 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1156 * Here we don't have the preceding 'static' so only way to
1157 * not have positive is to restrict to .*DECLARE.* macros.
1158 *
1159 * but there is a grammar rule for that, so don't need this case anymore
0708f913 1160 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
34e49164
C
1161 * on a argument_list
1162 *)
ae4735db
C
1163
1164 | (Line
34e49164
C
1165 ([PToken ({tok = TIdent (s,_)} as macro);
1166 Parenthised (xxs,info_parens);
1167 PToken ({tok = TPtVirg _});
1168 ]
1169 ))
ae4735db
C
1170 ::xs
1171 when (s ==~ regexp_declare) ->
34e49164
C
1172
1173 msg_declare_macro s;
1174 let info = TH.info_of_tok macro.tok in
1175 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1176
1177 find_macro_lineparen (xs)
1178
ae4735db 1179
34e49164
C
1180 (* toplevel macros.
1181 * module_init(xxx)
ae4735db 1182 *
34e49164
C
1183 * Could also transform the TIdent in a TMacroTop but can have false
1184 * positive, so easier to just change the TCPar and so just solve
1185 * the end-of-stream pb of ocamlyacc
1186 *)
ae4735db 1187 | (Line
34e49164
C
1188 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1189 Parenthised (xxs,info_parens);
1190 ] as _line1
1191 ))
b1b2de81 1192 ::xs when col1 =|= 0
ae4735db
C
1193 ->
1194 let condition =
34e49164
C
1195 (* to reduce number of false positive *)
1196 (match xs with
ae4735db 1197 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
b1b2de81 1198 TH.is_eof other.tok || (col2 =|= 0 &&
34e49164
C
1199 (match other.tok with
1200 | TOBrace _ -> false (* otherwise would match funcdecl *)
1201 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1202 | TPtVirg _
34e49164
C
1203 | TDotDot _
1204 -> false
1205 | tok when TH.is_binary_operator tok -> false
ae4735db 1206
34e49164
C
1207 | _ -> true
1208 )
1209 )
1210 | _ -> false
1211 )
1212 in
1213 if condition
1214 then begin
485bce71 1215
34e49164
C
1216 msg_macro_toplevel_noptvirg s;
1217 (* just to avoid the end-of-stream pb of ocamlyacc *)
1218 let tcpar = Common.last info_parens in
1219 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
ae4735db 1220
34e49164 1221 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
ae4735db 1222
34e49164
C
1223 end;
1224
1225 find_macro_lineparen (xs)
1226
1227
1228
ae4735db 1229 (* macro with parameters
34e49164
C
1230 * ex: DEBUG()
1231 * return x;
1232 *)
ae4735db 1233 | (Line
34e49164
C
1234 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1235 Parenthised (xxs,info_parens);
1236 ] as _line1
1237 ))
ae4735db 1238 ::(Line
34e49164
C
1239 (PToken ({col = col2 } as other)::restline2
1240 ) as line2)
ae4735db 1241 ::xs
34e49164 1242 (* when s ==~ regexp_macro *)
ae4735db
C
1243 ->
1244 let condition =
1245 (col1 =|= col2 &&
34e49164
C
1246 (match other.tok with
1247 | TOBrace _ -> false (* otherwise would match funcdecl *)
1248 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1249 | TPtVirg _
34e49164
C
1250 | TDotDot _
1251 -> false
1252 | tok when TH.is_binary_operator tok -> false
1253
1254 | _ -> true
1255 )
ae4735db
C
1256 )
1257 ||
34e49164 1258 (col2 <= col1 &&
113803cf 1259 (match other.tok, restline2 with
b1b2de81 1260 | TCBrace _, _ when ctx =*= InFunction -> true
113803cf
C
1261 | Treturn _, _ -> true
1262 | Tif _, _ -> true
1263 | Telse _, _ -> true
1264
1265 (* case of label, usually put in first line *)
ae4735db 1266 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
113803cf
C
1267 true
1268
34e49164
C
1269
1270 | _ -> false
1271 )
1272 )
1273
1274 in
ae4735db 1275
34e49164 1276 if condition
ae4735db 1277 then
b1b2de81 1278 if col1 =|= 0 then ()
34e49164
C
1279 else begin
1280 msg_macro_noptvirg s;
485bce71 1281 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
ae4735db 1282 [Parenthised (xxs, info_parens)] +>
0708f913 1283 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1284 end;
1285
1286 find_macro_lineparen (line2::xs)
ae4735db
C
1287
1288 (* linuxext:? single macro
34e49164
C
1289 * ex: LOCK
1290 * foo();
1291 * UNLOCK
ae4735db 1292 *
113803cf 1293 * todo: factorize code with previous rule ?
34e49164 1294 *)
ae4735db 1295 | (Line
34e49164
C
1296 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1297 ] as _line1
1298 ))
ae4735db 1299 ::(Line
34e49164
C
1300 (PToken ({col = col2 } as other)::restline2
1301 ) as line2)
ae4735db 1302 ::xs ->
34e49164 1303 (* when s ==~ regexp_macro *)
ae4735db
C
1304
1305 let condition =
1306 (col1 =|= col2 &&
34e49164
C
1307 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1308 (match other.tok with
ae4735db
C
1309 | TPtVirg _ -> false
1310 | TOr _ -> false
34e49164
C
1311 | TCBrace _ when ctx <> InFunction -> false
1312 | tok when TH.is_binary_operator tok -> false
1313
1314 | _ -> true
1315 )) ||
1316 (col2 <= col1 &&
1317 (match other.tok with
b1b2de81 1318 | TCBrace _ when ctx =*= InFunction -> true
34e49164
C
1319 | Treturn _ -> true
1320 | Tif _ -> true
1321 | Telse _ -> true
1322 | _ -> false
1323 ))
1324 in
ae4735db 1325
34e49164
C
1326 if condition
1327 then begin
1328 msg_macro_noptvirg_single s;
485bce71 1329 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
34e49164
C
1330 end;
1331 find_macro_lineparen (line2::xs)
ae4735db
C
1332
1333 | x::xs ->
34e49164
C
1334 find_macro_lineparen xs
1335
1336
485bce71
C
1337
1338(* ------------------------------------------------------------------------- *)
1339(* define tobrace init *)
1340(* ------------------------------------------------------------------------- *)
1341
ae4735db
C
1342let rec find_define_init_brace_paren xs =
1343 let rec aux xs =
485bce71
C
1344 match xs with
1345 | [] -> ()
1346
1347 (* mainly for firefox *)
1348 | (PToken {tok = TDefine _})
1349 ::(PToken {tok = TIdentDefine (s,_)})
1350 ::(PToken ({tok = TOBrace i1} as tokbrace))
1351 ::(PToken tok2)
1352 ::(PToken tok3)
ae4735db 1353 ::xs ->
485bce71
C
1354 let is_init =
1355 match tok2.tok, tok3.tok with
1356 | TInt _, TComma _ -> true
1357 | TString _, TComma _ -> true
1358 | TIdent _, TComma _ -> true
1359 | _ -> false
ae4735db 1360
485bce71
C
1361 in
1362 if is_init
ae4735db 1363 then begin
485bce71
C
1364 pr2_cpp("found define initializer: " ^s);
1365 tokbrace.tok <- TOBraceDefineInit i1;
1366 end;
1367
1368 aux xs
1369
1370 (* mainly for linux, especially in sound/ *)
1371 | (PToken {tok = TDefine _})
1372 ::(PToken {tok = TIdentDefine (s,_)})
1373 ::(Parenthised(xxx, info_parens))
1374 ::(PToken ({tok = TOBrace i1} as tokbrace))
1375 ::(PToken tok2)
1376 ::(PToken tok3)
ae4735db 1377 ::xs ->
485bce71
C
1378 let is_init =
1379 match tok2.tok, tok3.tok with
1380 | TInt _, TComma _ -> true
1381 | TDot _, TIdent _ -> true
1382 | TIdent _, TComma _ -> true
1383 | _ -> false
ae4735db 1384
485bce71
C
1385 in
1386 if is_init
ae4735db 1387 then begin
485bce71
C
1388 pr2_cpp("found define initializer with param: " ^ s);
1389 tokbrace.tok <- TOBraceDefineInit i1;
1390 end;
1391
1392 aux xs
1393
ae4735db 1394
485bce71
C
1395
1396 (* recurse *)
ae4735db
C
1397 | (PToken x)::xs -> aux xs
1398 | (Parenthised (xxs, info_parens))::xs ->
485bce71 1399 (* not need for tobrace init:
ae4735db 1400 * xxs +> List.iter aux;
485bce71
C
1401 *)
1402 aux xs
1403 in
1404 aux xs
1405
1406
34e49164
C
1407(* ------------------------------------------------------------------------- *)
1408(* action *)
1409(* ------------------------------------------------------------------------- *)
1410
708f4980 1411(* obsolete now with macro expansion ? get some regression if comment.
ae4735db 1412 * todo: if do bad decision here, then it can influence other phases
708f4980 1413 * and make it hard to parse. So maybe when have a parse error, should
ae4735db 1414 * undo some of the guess those heuristics have done, and restore
708f4980
C
1415 * the original token value.
1416 *)
1417
34e49164
C
1418let rec find_actions = function
1419 | [] -> ()
1420
1421 | PToken ({tok = TIdent (s,ii)})
1422 ::Parenthised (xxs,info_parens)
ae4735db 1423 ::xs ->
34e49164
C
1424 find_actions xs;
1425 xxs +> List.iter find_actions;
1426 let modified = find_actions_params xxs in
ae4735db 1427 if modified
34e49164 1428 then msg_macro_higher_order s
ae4735db
C
1429
1430 | x::xs ->
34e49164
C
1431 find_actions xs
1432
ae4735db
C
1433and find_actions_params xxs =
1434 xxs +> List.fold_left (fun acc xs ->
34e49164 1435 let toks = tokens_of_paren xs in
ae4735db
C
1436 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1437 (* undo: && List.length toks > 1
708f4980
C
1438 * good for sparse, not good for linux
1439 *)
34e49164 1440 then begin
ae4735db 1441 xs +> iter_token_paren (fun x ->
34e49164 1442 if TH.is_eof x.tok
ae4735db 1443 then
34e49164 1444 (* certainly because paren detection had a pb because of
708f4980
C
1445 * some ifdef-exp. Do similar additional checking than
1446 * what is done in set_as_comment.
34e49164 1447 *)
708f4980 1448 pr2 "PB: weird, I try to tag an EOF token as an action"
ae4735db 1449 else
708f4980 1450 (* cf tests-bis/no_cpar_macro.c *)
ae4735db
C
1451 if TH.is_eom x.tok
1452 then
708f4980 1453 pr2 "PB: weird, I try to tag an EOM token as an action"
ae4735db 1454 else
708f4980 1455 x.tok <- TAction (TH.info_of_tok x.tok);
34e49164
C
1456 );
1457 true (* modified *)
1458 end
1459 else acc
1460 ) false
1461
1462
1463
1464(* ------------------------------------------------------------------------- *)
1465(* main fix cpp function *)
1466(* ------------------------------------------------------------------------- *)
1467
ae4735db 1468let filter_cpp_stuff xs =
951c7801
C
1469 List.filter
1470 (function x ->
1471 (match x.tok with
1472 | tok when TH.is_comment tok -> false
34e49164
C
1473 (* don't want drop the define, or if drop, have to drop
1474 * also its body otherwise the line heuristics may be lost
1475 * by not finding the TDefine in column 0 but by finding
1476 * a TDefineIdent in a column > 0
1477 *)
951c7801
C
1478 | Parser_c.TDefine _ -> true
1479 | tok when TH.is_cpp_instruction tok -> false
1480 | _ -> true
1481 ))
1482 xs
34e49164
C
1483
1484let insert_virtual_positions l =
1485 let strlen x = String.length (Ast_c.str_of_info x) in
708f4980
C
1486 let rec loop prev offset acc = function
1487 [] -> List.rev acc
34e49164
C
1488 | x::xs ->
1489 let ii = TH.info_of_tok x in
1490 let inject pi =
1491 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1492 match Ast_c.pinfo_of_info ii with
1493 Ast_c.OriginTok pi ->
1494 let prev = Ast_c.parse_info_of_info ii in
ae4735db 1495 loop prev (strlen ii) (x::acc) xs
34e49164 1496 | Ast_c.ExpandedTok (pi,_) ->
708f4980 1497 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
ae4735db 1498 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164 1499 | Ast_c.FakeTok (s,_) ->
708f4980 1500 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
ae4735db 1501 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164
C
1502 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1503 let rec skip_fake = function
708f4980 1504 | [] -> []
34e49164
C
1505 | x::xs ->
1506 let ii = TH.info_of_tok x in
1507 match Ast_c.pinfo_of_info ii with
708f4980 1508 | Ast_c.OriginTok pi ->
34e49164 1509 let prev = Ast_c.parse_info_of_info ii in
708f4980
C
1510 let res = loop prev (strlen ii) [] xs in
1511 x::res
34e49164 1512 | _ -> x::skip_fake xs in
ae4735db 1513 skip_fake l
708f4980 1514
485bce71 1515(* ------------------------------------------------------------------------- *)
f59c9fb7 1516
ae4735db 1517let fix_tokens_cpp2 ~macro_defs tokens =
708f4980 1518 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
ae4735db
C
1519
1520 begin
34e49164
C
1521 (* the order is important, if you put the action heuristic first,
1522 * then because of ifdef, can have not closed paren
ae4735db
C
1523 * and so may believe that higher order macro
1524 * and it will eat too much tokens. So important to do
34e49164 1525 * first the ifdef.
ae4735db 1526 *
34e49164
C
1527 * I recompute multiple times cleaner cos the mutable
1528 * can have be changed and so may have more comments
1529 * in the token original list.
ae4735db 1530 *
34e49164
C
1531 *)
1532
0708f913
C
1533 commentize_skip_start_to_end !tokens2;
1534
34e49164 1535 (* ifdef *)
ae4735db
C
1536 let cleaner = !tokens2 +> List.filter (fun x ->
1537 (* is_comment will also filter the TCommentCpp created in
0708f913 1538 * commentize_skip_start_to_end *)
34e49164
C
1539 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1540 ) in
708f4980 1541 let ifdef_grouped = TV.mk_ifdef cleaner in
485bce71
C
1542 set_ifdef_parenthize_info ifdef_grouped;
1543
34e49164
C
1544 find_ifdef_funheaders ifdef_grouped;
1545 find_ifdef_bool ifdef_grouped;
1546 find_ifdef_mid ifdef_grouped;
ae4735db 1547 (* change order ? maybe cparen_else heuristic make some of the funheaders
708f4980
C
1548 * heuristics irrelevant ?
1549 *)
ae4735db 1550 find_ifdef_cparen_else ifdef_grouped;
34e49164
C
1551 adjust_inifdef_include ifdef_grouped;
1552
1553
1554 (* macro 1 *)
1555 let cleaner = !tokens2 +> filter_cpp_stuff in
1556
708f4980
C
1557 let paren_grouped = TV.mk_parenthised cleaner in
1558 Cpp_token_c.apply_macro_defs
ae4735db
C
1559 ~msg_apply_known_macro
1560 ~msg_apply_known_macro_hint
708f4980 1561 macro_defs paren_grouped;
34e49164 1562 (* because the before field is used by apply_macro_defs *)
ae4735db 1563 tokens2 := TV.rebuild_tokens_extented !tokens2;
34e49164
C
1564
1565 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1566 * that after the "ifdef-simplification" phase.
1567 *)
ae4735db 1568 let cleaner = !tokens2 +> List.filter (fun x ->
34e49164
C
1569 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1570 ) in
1571
708f4980 1572 let brace_grouped = TV.mk_braceised cleaner in
34e49164
C
1573 set_context_tag brace_grouped;
1574
34e49164
C
1575 (* macro *)
1576 let cleaner = !tokens2 +> filter_cpp_stuff in
1577
708f4980
C
1578 let paren_grouped = TV.mk_parenthised cleaner in
1579 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
485bce71 1580 find_define_init_brace_paren paren_grouped;
34e49164
C
1581 find_string_macro_paren paren_grouped;
1582 find_macro_lineparen line_paren_grouped;
1583 find_macro_paren paren_grouped;
1584
1585
708f4980 1586 (* obsolete: actions ? not yet *)
34e49164 1587 let cleaner = !tokens2 +> filter_cpp_stuff in
708f4980 1588 let paren_grouped = TV.mk_parenthised cleaner in
34e49164 1589 find_actions paren_grouped;
ae4735db 1590
34e49164
C
1591
1592
708f4980 1593 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
34e49164
C
1594 end
1595
ae4735db 1596let time_hack1 ~macro_defs a =
708f4980 1597 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
34e49164 1598
ae4735db 1599let fix_tokens_cpp ~macro_defs a =
708f4980 1600 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
34e49164 1601
34e49164 1602
34e49164 1603
34e49164
C
1604
1605(*****************************************************************************)
1606(* Lexing with lookahead *)
1607(*****************************************************************************)
1608
1609(* Why using yet another parsing_hack technique ? The fix_xxx where do
ae4735db 1610 * some pre-processing on the full list of tokens is not enough ?
34e49164
C
1611 * No cos sometimes we need more contextual info, and even if
1612 * set_context() tries to give some contextual info, it's not completely
1613 * accurate so the following code give yet another alternative, yet another
1614 * chance to transform some tokens.
ae4735db 1615 *
34e49164
C
1616 * todo?: maybe could try to get rid of this technique. Maybe a better
1617 * set_context() would make possible to move this code using a fix_xx
1618 * technique.
ae4735db 1619 *
485bce71 1620 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
34e49164
C
1621 * it is more general to do it via my LALR(k) tech. Because here we can
1622 * transform some token give some context information. So sometimes it
1623 * makes sense to transform a token in one context, sometimes not, and
1624 * lex can not provide us this context information. Note that the order
ae4735db
C
1625 * in the pattern matching in lookahead is important. Do not cut/paste.
1626 *
34e49164
C
1627 * Note that in next there is only "clean" tokens, there is no comment
1628 * or space tokens. This is done by the caller.
ae4735db 1629 *
34e49164
C
1630 *)
1631
485bce71
C
1632open Lexer_parser (* for the fields of lexer_hint type *)
1633
1634let not_struct_enum = function
1635 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1636 | _ -> true
34e49164 1637
f59c9fb7
C
1638let pointer = function
1639 TMul _ -> true
1640 | TAnd _ when !Flag.c_plus_plus -> true
1641 | _ -> false
485bce71 1642
ae4735db 1643let lookahead2 ~pass next before =
34e49164
C
1644
1645 match (next, before) with
1646
4dfbc1c2
C
1647 (* c++ hacks *)
1648 (* yy xx( and in function *)
1649 | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_
1650 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1651 pr2_cpp("constructed_object: " ^s);
1652 TOParCplusplusInit i1
1653 | TypedefIdent(s,i)::TOPar i1::_,_
1654 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1655 TIdent(s,i)
1656
34e49164
C
1657 (*-------------------------------------------------------------*)
1658 (* typedef inference, parse_typedef_fix3 *)
1659 (*-------------------------------------------------------------*)
1660 (* xx xx *)
b1b2de81 1661 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
34e49164
C
1662 && ok_typedef s
1663 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
ae4735db 1664 ->
34e49164
C
1665 (* parse_typedef_fix3:
1666 * acpi_object acpi_object;
ae4735db 1667 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
34e49164
C
1668 * Le parser en interne a deja appelé le prochain token pour pouvoir
1669 * decider des choses.
1670 * => special case in lexer_heuristic, again
1671 *)
ae4735db
C
1672 if !Flag_parsing_c.debug_typedef
1673 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
34e49164
C
1674
1675 LP.disable_typedef();
1676
4dfbc1c2 1677 msg_typedef s 1; LP.add_typedef_root s;
34e49164
C
1678 TypedefIdent (s, i1)
1679
1680 (* xx yy *)
ae4735db 1681 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
34e49164
C
1682 && ok_typedef s
1683 ->
1684 (* && not_annot s2 BUT lead to false positive*)
1685
4dfbc1c2 1686 msg_typedef s 2; LP.add_typedef_root s;
34e49164
C
1687 TypedefIdent (s, i1)
1688
1689
1690 (* xx inline *)
ae4735db 1691 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
34e49164 1692 && ok_typedef s
ae4735db 1693 ->
4dfbc1c2 1694 msg_typedef s 3; LP.add_typedef_root s;
34e49164
C
1695 TypedefIdent (s, i1)
1696
1697
1698 (* [,(] xx [,)] AND param decl *)
1699 | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ )
b1b2de81 1700 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
34e49164 1701 && ok_typedef s
ae4735db 1702 ->
4dfbc1c2 1703 msg_typedef s 4; LP.add_typedef_root s;
34e49164
C
1704 TypedefIdent (s, i1)
1705
1706 (* xx* [,)] *)
1707 (* specialcase: [,(] xx* [,)] *)
f59c9fb7
C
1708 | (TIdent (s, i1)::ptr::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1709 when pointer ptr && not_struct_enum before
34e49164
C
1710 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1711 && ok_typedef s
ae4735db 1712 ->
4dfbc1c2 1713 msg_typedef s 5; LP.add_typedef_root s;
34e49164
C
1714 TypedefIdent (s, i1)
1715
1716
1717 (* xx** [,)] *)
1718 (* specialcase: [,(] xx** [,)] *)
1719 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1720 when not_struct_enum before
1721 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1722 && ok_typedef s
ae4735db 1723 ->
4dfbc1c2 1724 msg_typedef s 6; LP.add_typedef_root s;
34e49164
C
1725 TypedefIdent (s, i1)
1726
1727
1728
1729 (* xx const * USELESS because of next rule ? *)
ae4735db
C
1730 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1731 when not_struct_enum before
34e49164
C
1732 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1733 && ok_typedef s
1734 ->
1735
4dfbc1c2 1736 msg_typedef s 7; LP.add_typedef_root s;
34e49164 1737 TypedefIdent (s, i1)
ae4735db 1738
34e49164 1739 (* xx const *)
ae4735db
C
1740 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
1741 when not_struct_enum before
34e49164
C
1742 && ok_typedef s
1743 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1744 ->
1745
4dfbc1c2 1746 msg_typedef s 8; LP.add_typedef_root s;
34e49164
C
1747 TypedefIdent (s, i1)
1748
1749
1750 (* xx * const *)
f59c9fb7
C
1751 | (TIdent (s, i1)::ptr::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ )
1752 when pointer ptr && not_struct_enum before
34e49164
C
1753 && ok_typedef s
1754 ->
1755 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1756
4dfbc1c2 1757 msg_typedef s 9; LP.add_typedef_root s;
34e49164
C
1758 TypedefIdent (s, i1)
1759
1760
1761 (* ( const xx) *)
485bce71 1762 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
34e49164 1763 ok_typedef s ->
4dfbc1c2 1764 msg_typedef s 10; LP.add_typedef_root s;
34e49164 1765 TypedefIdent (s, i1)
ae4735db 1766
34e49164
C
1767
1768
1769 (* ( xx ) [sizeof, ~] *)
485bce71 1770 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
34e49164
C
1771 when not_struct_enum before
1772 && ok_typedef s
ae4735db 1773 ->
4dfbc1c2 1774 msg_typedef s 11; LP.add_typedef_root s;
34e49164
C
1775 TypedefIdent (s, i1)
1776
1777 (* [(,] xx [ AND parameterdeclaration *)
1778 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
b1b2de81 1779 when (LP.current_context() =*= LP.InParameter)
34e49164 1780 && ok_typedef s
ae4735db 1781 ->
4dfbc1c2 1782 msg_typedef s 12; LP.add_typedef_root s;
34e49164 1783 TypedefIdent (s, i1)
ae4735db 1784
34e49164
C
1785 (*------------------------------------------------------------*)
1786 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
1787 (* but if have a '=', or ',' I think not *)
1788 (*------------------------------------------------------------*)
1789
1790 (* static xx * yy *)
f59c9fb7 1791 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ ,
485bce71 1792 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
f59c9fb7 1793 pointer ptr && ok_typedef s
34e49164 1794 ->
4dfbc1c2 1795 msg_typedef s 13; LP.add_typedef_root s;
34e49164 1796 TypedefIdent (s, i1)
ae4735db 1797
34e49164
C
1798 (* TODO xx * yy ; AND in start of compound element *)
1799
1800
1801 (* xx * yy, AND in paramdecl *)
f59c9fb7 1802 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _)
b1b2de81 1803 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
f59c9fb7 1804 && pointer ptr && ok_typedef s
ae4735db 1805 ->
34e49164 1806
4dfbc1c2 1807 msg_typedef s 14; LP.add_typedef_root s;
34e49164
C
1808 TypedefIdent (s, i1)
1809
1810
1811 (* xx * yy ; AND in Toplevel, except when have = before *)
1812 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
1813 TIdent (s, i1)
f59c9fb7
C
1814 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ , _)
1815 when not_struct_enum before && pointer ptr &&
1816 (LP.is_top_or_struct (LP.current_context ()))
ae4735db 1817 ->
4dfbc1c2 1818 msg_typedef s 15; LP.add_typedef_root s;
34e49164
C
1819 TypedefIdent (s, i1)
1820
1821 (* xx * yy , AND in Toplevel *)
f59c9fb7 1822 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _)
b1b2de81 1823 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
f59c9fb7 1824 && ok_typedef s && pointer ptr
ae4735db 1825 ->
34e49164 1826
4dfbc1c2 1827 msg_typedef s 16; LP.add_typedef_root s;
34e49164
C
1828 TypedefIdent (s, i1)
1829
1830 (* xx * yy ( AND in Toplevel *)
f59c9fb7 1831 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOPar _::_ , _)
ae4735db 1832 when not_struct_enum before
485bce71 1833 && (LP.is_top_or_struct (LP.current_context ()))
f59c9fb7 1834 && ok_typedef s && pointer ptr
34e49164 1835 ->
4dfbc1c2 1836 msg_typedef s 17; LP.add_typedef_root s;
34e49164 1837 TypedefIdent (s, i1)
ae4735db 1838
34e49164
C
1839 (* xx * yy [ *)
1840 (* todo? enough ? cos in struct def we can have some expression ! *)
f59c9fb7 1841 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOCro _::_ , _)
ae4735db 1842 when not_struct_enum before &&
485bce71 1843 (LP.is_top_or_struct (LP.current_context ()))
f59c9fb7 1844 && ok_typedef s && pointer ptr
ae4735db 1845 ->
4dfbc1c2 1846 msg_typedef s 18; LP.add_typedef_root s;
34e49164
C
1847 TypedefIdent (s, i1)
1848
1849 (* u16: 10; in struct *)
1850 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
485bce71 1851 when (LP.is_top_or_struct (LP.current_context ()))
ae4735db
C
1852 && ok_typedef s
1853 ->
4dfbc1c2 1854 msg_typedef s 19; LP.add_typedef_root s;
34e49164 1855 TypedefIdent (s, i1)
ae4735db 1856
34e49164
C
1857
1858 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
1859 (* YES cos at toplevel can have some expression !! for instance when *)
1860 (* enter in the dimension of an array *)
1861 (*
1862 | (TIdent s::TMul::TIdent s2::_ , _)
1863 when (take_safe 1 !passed_tok <> [Tstruct] &&
1864 (take_safe 1 !passed_tok <> [Tenum]))
1865 &&
ae4735db 1866 !LP._lexer_hint = Some LP.Toplevel ->
4dfbc1c2 1867 msg_typedef s 20; LP.add_typedef_root s;
34e49164
C
1868 TypedefIdent s
1869 *)
1870
1871 (* xx * yy = *)
f59c9fb7 1872 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TEq _::_ , _)
ae4735db 1873 when not_struct_enum before
f59c9fb7 1874 && ok_typedef s && pointer ptr
34e49164 1875 ->
4dfbc1c2 1876 msg_typedef s 21; LP.add_typedef_root s;
34e49164
C
1877 TypedefIdent (s, i1)
1878
1879
1880 (* xx * yy) AND in paramdecl *)
f59c9fb7 1881 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TCPar _::_ , _)
b1b2de81 1882 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
f59c9fb7 1883 && ok_typedef s && pointer ptr
34e49164 1884 ->
4dfbc1c2 1885 msg_typedef s 22; LP.add_typedef_root s;
34e49164 1886 TypedefIdent (s, i1)
ae4735db 1887
34e49164
C
1888
1889 (* xx * yy; *) (* wrong ? *)
f59c9fb7 1890 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ ,
ae4735db 1891 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
f59c9fb7 1892 && ok_typedef s & pointer ptr
34e49164 1893 ->
4dfbc1c2 1894 msg_typedef s 23; LP.add_typedef_root s;
485bce71 1895 msg_maybe_dangereous_typedef s;
34e49164
C
1896 TypedefIdent (s, i1)
1897
1898
1899 (* xx * yy, and ';' before xx *) (* wrong ? *)
f59c9fb7 1900 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ ,
34e49164 1901 (TOBrace _| TPtVirg _)::_) when
f59c9fb7 1902 ok_typedef s && pointer ptr
34e49164 1903 ->
4dfbc1c2 1904 msg_typedef s 24; LP.add_typedef_root s;
34e49164
C
1905 TypedefIdent (s, i1)
1906
1907
1908 (* xx_t * yy *)
f59c9fb7 1909 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ , _)
ae4735db
C
1910 when s ==~ regexp_typedef && not_struct_enum before
1911 (* struct user_info_t sometimes *)
f59c9fb7 1912 && ok_typedef s && pointer ptr
ae4735db 1913 ->
4dfbc1c2 1914 msg_typedef s 25; LP.add_typedef_root s;
34e49164
C
1915 TypedefIdent (s, i1)
1916
1917 (* xx ** yy *) (* wrong ? *)
1918 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
1919 when not_struct_enum before
1920 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 1921 && ok_typedef s
34e49164 1922 ->
4dfbc1c2 1923 msg_typedef s 26; LP.add_typedef_root s;
34e49164
C
1924 TypedefIdent (s, i1)
1925
1926 (* xx *** yy *)
1927 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
ae4735db
C
1928 when not_struct_enum before
1929 && ok_typedef s
34e49164
C
1930 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1931 ->
4dfbc1c2 1932 msg_typedef s 27; LP.add_typedef_root s;
34e49164
C
1933 TypedefIdent (s, i1)
1934
1935 (* xx ** ) *)
1936 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
ae4735db 1937 when not_struct_enum before
34e49164 1938 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 1939 && ok_typedef s
34e49164 1940 ->
4dfbc1c2 1941 msg_typedef s 28; LP.add_typedef_root s;
34e49164
C
1942 TypedefIdent (s, i1)
1943
1944
1945
1946 (* ----------------------------------- *)
ae4735db 1947 (* old: why not do like for other rules and start with TIdent ?
485bce71
C
1948 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
1949 * new: prefer now start with TIdent because otherwise the add_typedef_root
1950 * may have no effect if in second pass or if have disable the add_typedef.
1951 *)
34e49164
C
1952
1953 (* (xx) yy *)
ae4735db
C
1954 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
1955 (TOPar info)::x::_)
34e49164 1956 when not (TH.is_stuff_taking_parenthized x) &&
b1b2de81 1957 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3
ae4735db
C
1958 && ok_typedef s
1959 ->
34e49164 1960
4dfbc1c2 1961 msg_typedef s 29; LP.add_typedef_root s;
485bce71
C
1962 (*TOPar info*)
1963 TypedefIdent (s, i1)
34e49164
C
1964
1965
ae4735db 1966 (* (xx) ( yy)
91eba41f
C
1967 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
1968 *)
1969 (*
ae4735db
C
1970 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
1971 when not (TH.is_stuff_taking_parenthized x)
1972 && ok_typedef s
34e49164 1973 ->
4dfbc1c2 1974 msg_typedef s 30; LP.add_typedef_root s;
485bce71
C
1975 (* TOPar info *)
1976 TypedefIdent (s, i1)
91eba41f
C
1977 *)
1978 (* special case: = (xx) ( yy) *)
ae4735db 1979 | (TIdent (s, i1)::TCPar _::TOPar _::_ ,
91eba41f 1980 (TOPar info)::(TEq _ |TEqEq _)::_)
ae4735db 1981 when ok_typedef s
91eba41f 1982 ->
4dfbc1c2 1983 msg_typedef s 31; LP.add_typedef_root s;
91eba41f
C
1984 (* TOPar info *)
1985 TypedefIdent (s, i1)
1986
34e49164
C
1987
1988 (* (xx * ) yy *)
f59c9fb7
C
1989 | (TIdent (s, i1)::ptr::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_)
1990 when ok_typedef s && pointer ptr
ae4735db 1991 ->
4dfbc1c2 1992 msg_typedef s 32; LP.add_typedef_root s;
485bce71
C
1993 (*TOPar info*)
1994 TypedefIdent (s,i1)
1995
34e49164
C
1996
1997 (* (xx){ ... } constructor *)
ae4735db
C
1998 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
1999 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2000 && ok_typedef s
34e49164 2001 ->
4dfbc1c2 2002 msg_typedef s 33; LP.add_typedef_root s;
34e49164
C
2003 TypedefIdent (s, i1)
2004
2005
2006 (* can have sizeof on expression
ae4735db 2007 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
708f4980 2008 msg_typedef s; LP.add_typedef_root s;
34e49164
C
2009 Tsizeof
2010 *)
91eba41f
C
2011
2012
2013 (* ----------------------------------- *)
2014 (* x ( *y )(params), function pointer *)
ae4735db 2015 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
34e49164 2016 when not_struct_enum before
ae4735db 2017 && ok_typedef s
34e49164 2018 ->
4dfbc1c2 2019 msg_typedef s 34; LP.add_typedef_root s;
34e49164
C
2020 TypedefIdent (s, i1)
2021
91eba41f 2022 (* x* ( *y )(params), function pointer 2 *)
ae4735db 2023 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
91eba41f 2024 when not_struct_enum before
ae4735db 2025 && ok_typedef s
91eba41f 2026 ->
4dfbc1c2 2027 msg_typedef s 35; LP.add_typedef_root s;
91eba41f
C
2028 TypedefIdent (s, i1)
2029
34e49164
C
2030
2031 (*-------------------------------------------------------------*)
2032 (* CPP *)
2033 (*-------------------------------------------------------------*)
485bce71
C
2034 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2035 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
34e49164 2036 as x)
ae4735db
C
2037 ::_, _
2038 ->
485bce71 2039 (*
ae4735db 2040 if not !Flag_parsing_c.ifdef_to_if
34e49164 2041 then TCommentCpp (Ast_c.CppDirective, ii)
ae4735db 2042 else
485bce71 2043 *)
0708f913
C
2044 (* not !LP._lexer_hint.toplevel *)
2045 if !Flag_parsing_c.ifdef_directive_passing
708f4980 2046 || (pass >= 2)
0708f913 2047 then begin
ae4735db 2048
b1b2de81 2049 if (LP.current_context () =*= LP.InInitializer)
ae4735db 2050 then begin
0708f913
C
2051 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2052 incr Stat.nIfdefInitializer;
ae4735db 2053 end else begin
708f4980 2054 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
0708f913
C
2055 incr Stat.nIfdefPassing;
2056 end;
2057 TCommentCpp (Token_c.CppDirective, ii)
2058 end
2059 else x
ae4735db 2060
3a314143 2061 | (TUndef (ii) as x)::_, _
ae4735db 2062 ->
708f4980 2063 if (pass >= 2)
485bce71 2064 then begin
0708f913
C
2065 pr2_cpp("UNDEF: I treat it as comment");
2066 TCommentCpp (Token_c.CppDirective, ii)
113803cf
C
2067 end
2068 else x
2069
ae4735db
C
2070 | (TCppDirectiveOther (ii) as x)::_, _
2071 ->
708f4980 2072 if (pass >= 2)
113803cf 2073 then begin
0708f913
C
2074 pr2_cpp ("OTHER directive: I treat it as comment");
2075 TCommentCpp (Token_c.CppDirective, ii)
485bce71
C
2076 end
2077 else x
34e49164
C
2078
2079 (* If ident contain a for_each, then certainly a macro. But to be
2080 * sure should look if there is a '{' after the ')', but it requires
2081 * to count the '('. Because this can be expensive, we do that only
ae4735db 2082 * when the token contains "for_each".
34e49164 2083 *)
ae4735db 2084 | (TIdent (s, i1)::TOPar _::rest, _)
b1b2de81 2085 when not (LP.current_context () =*= LP.InTopLevel)
ae4735db
C
2086 (* otherwise a function such as static void loopback_enable(int i) {
2087 * will be considered as a loop
34e49164
C
2088 *)
2089 ->
2090
ae4735db 2091 if s ==~ regexp_foreach &&
34e49164 2092 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
ae4735db 2093
34e49164
C
2094 then begin
2095 msg_foreach s;
2096 TMacroIterator (s, i1)
2097 end
2098 else TIdent (s, i1)
2099
2100
ae4735db 2101
34e49164
C
2102 (*-------------------------------------------------------------*)
2103 | v::xs, _ -> v
2104 | _ -> raise Impossible
2105
ae4735db 2106let lookahead ~pass a b =
485bce71 2107 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
34e49164
C
2108
2109