Coccinelle release-1.0.0-rc11
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913 4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
34e49164
C
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
34e49164
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
708f4980 19module TV = Token_views_c
34e49164
C
20module LP = Lexer_parser
21
485bce71 22module Stat = Parsing_stat
34e49164 23
ae4735db 24open Parser_c
34e49164 25
ae4735db 26open TV
708f4980 27
34e49164
C
28(*****************************************************************************)
29(* Some debugging functions *)
30(*****************************************************************************)
31
708f4980 32let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
113803cf 33
ae4735db 34let pr2_cpp s =
34e49164
C
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once ("CPP-" ^ s)
37
38
ae4735db 39let msg_gen cond is_known printer s =
34e49164
C
40 if cond
41 then
42 if not (!Flag_parsing_c.filter_msg)
43 then printer s
44 else
45 if not (is_known s)
46 then printer s
ae4735db 47
34e49164 48
485bce71
C
49(* In the following, there are some harcoded names of types or macros
50 * but they are not used by our heuristics! They are just here to
51 * enable to detect false positive by printing only the typedef/macros
52 * that we don't know yet. If we print everything, then we can easily
53 * get lost with too much verbose tracing information. So those
54 * functions "filter" some messages. So our heuristics are still good,
55 * there is no more (or not that much) hardcoded linux stuff.
34e49164 56 *)
485bce71 57
ae4735db
C
58let is_known_typdef =
59 (fun s ->
34e49164
C
60 (match s with
61 | "u_char" | "u_short" | "u_int" | "u_long"
ae4735db
C
62 | "u8" | "u16" | "u32" | "u64"
63 | "s8" | "s16" | "s32" | "s64"
64 | "__u8" | "__u16" | "__u32" | "__u64"
34e49164 65 -> true
ae4735db
C
66
67 | "acpi_handle"
68 | "acpi_status"
34e49164
C
69 -> true
70
ae4735db
C
71 | "FILE"
72 | "DIR"
34e49164 73 -> true
ae4735db 74
34e49164 75 | s when s =~ ".*_t$" -> true
ae4735db 76 | _ -> false
34e49164
C
77 )
78 )
485bce71 79
ae4735db
C
80(* note: cant use partial application with let msg_typedef =
81 * because it would compute msg_typedef at compile time when
485bce71
C
82 * the flag debug_typedef is always false
83 *)
97111a47 84let msg_typedef s ii n =
485bce71
C
85 incr Stat.nTypedefInfer;
86 msg_gen (!Flag_parsing_c.debug_typedef)
87 is_known_typdef
ae4735db 88 (fun s ->
97111a47
C
89 pr2_cpp
90 (Printf.sprintf "TYPEDEF: promoting:(%d) %s on line %d" n s
91 (Ast_c.line_of_info ii))
92 (*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s
93 (Ast_c.line_of_info ii))*)
34e49164
C
94 )
95 s
96
485bce71
C
97let msg_maybe_dangereous_typedef s =
98 if not (is_known_typdef s)
ae4735db 99 then
113803cf
C
100 pr2
101 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
34e49164
C
102
103
104
ae4735db 105let msg_declare_macro s =
485bce71 106 incr Stat.nMacroDecl;
34e49164 107 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
108 (fun s ->
109 (match s with
34e49164 110 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
ae4735db 111 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
34e49164
C
112 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
113 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
114 | "SENSOR_DEVICE_ATTR"
115 | "LIST_HEAD"
116 | "DECLARE_WORK" | "DECLARE_TASKLET"
117 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
118 | "DECLARE_BITMAP"
119
120 -> true
121 (*
122 | s when s =~ "^DECLARE_.*" -> true
123 | s when s =~ ".*_ATTR$" -> true
124 | s when s =~ "^DEFINE_.*" -> true
125 *)
126
127 | _ -> false
128 )
129 )
130 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
131 s
34e49164 132
ae4735db
C
133
134let msg_foreach s =
485bce71 135 incr Stat.nIteratorHeuristic;
34e49164
C
136 pr2_cpp ("MACRO: found foreach: " ^ s)
137
138
ae4735db
C
139(* ??
140let msg_debug_macro s =
34e49164 141 pr2_cpp ("MACRO: found debug-macro: " ^ s)
485bce71 142*)
34e49164
C
143
144
ae4735db 145let msg_macro_noptvirg s =
485bce71 146 incr Stat.nMacroStmt;
34e49164
C
147 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
148
ae4735db 149let msg_macro_toplevel_noptvirg s =
485bce71 150 incr Stat.nMacroStmt;
34e49164
C
151 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
152
ae4735db 153let msg_macro_noptvirg_single s =
485bce71 154 incr Stat.nMacroStmt;
34e49164
C
155 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
156
157
485bce71
C
158
159
ae4735db 160let msg_macro_higher_order s =
485bce71 161 incr Stat.nMacroHigherOrder;
34e49164 162 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
163 (fun s ->
164 (match s with
34e49164
C
165 | "DBGINFO"
166 | "DBGPX"
167 | "DFLOW"
168 -> true
169 | _ -> false
170 )
171 )
172 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
173 s
174
175
ae4735db 176let msg_stringification s =
485bce71 177 incr Stat.nMacroString;
34e49164 178 msg_gen (!Flag_parsing_c.debug_cpp)
ae4735db
C
179 (fun s ->
180 (match s with
34e49164
C
181 | "REVISION"
182 | "UTS_RELEASE"
183 | "SIZE_STR"
184 | "DMA_STR"
185 -> true
ae4735db 186 (* s when s =~ ".*STR.*" -> true *)
34e49164
C
187 | _ -> false
188 )
189 )
190 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
191 s
192
485bce71
C
193let msg_stringification_params s =
194 incr Stat.nMacroString;
195 pr2_cpp ("MACRO: string-macro with params : " ^ s)
196
197
198
ae4735db 199let msg_apply_known_macro s =
485bce71
C
200 incr Stat.nMacroExpand;
201 pr2_cpp ("MACRO: found known macro = " ^ s)
202
ae4735db 203let msg_apply_known_macro_hint s =
485bce71
C
204 incr Stat.nMacroHint;
205 pr2_cpp ("MACRO: found known macro hint = " ^ s)
206
207
34e49164 208
ae4735db
C
209
210let msg_ifdef_bool_passing is_ifdef_positif =
485bce71
C
211 incr Stat.nIfdefZero; (* of Version ? *)
212 if is_ifdef_positif
213 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
214 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
215
216
217let msg_ifdef_mid_something () =
218 incr Stat.nIfdefExprPassing;
219 pr2_cpp "found ifdef-mid-something"
220
221let msg_ifdef_funheaders () =
222 incr Stat.nIfdefFunheader;
223 ()
224
ae4735db 225let msg_ifdef_cparen_else () =
708f4980
C
226 incr Stat.nIfdefPassing;
227 pr2_cpp("found ifdef-cparen-else")
228
485bce71 229
ae4735db 230let msg_attribute s =
485bce71
C
231 incr Stat.nMacroAttribute;
232 pr2_cpp("ATTR:" ^ s)
ae4735db 233
485bce71
C
234
235
34e49164 236(*****************************************************************************)
485bce71 237(* The regexp and basic view definitions *)
34e49164
C
238(*****************************************************************************)
239
240(* opti: better to built then once and for all, especially regexp_foreach *)
241
242let regexp_macro = Str.regexp
243 "^[A-Z_][A-Z_0-9]*$"
244
245(* linuxext: *)
246let regexp_annot = Str.regexp
247 "^__.*$"
248
249(* linuxext: *)
250let regexp_declare = Str.regexp
251 ".*DECLARE.*"
252
253(* linuxext: *)
ae4735db 254let regexp_foreach = Str.regexp_case_fold
34e49164
C
255 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
256
257let regexp_typedef = Str.regexp
258 ".*_t$"
259
34e49164
C
260let false_typedef = [
261 "printk";
262 ]
263
34e49164 264
485bce71
C
265let ok_typedef s = not (List.mem s false_typedef)
266
ae4735db 267let not_annot s =
485bce71
C
268 not (s ==~ regexp_annot)
269
270
34e49164 271
34e49164 272
485bce71
C
273(*****************************************************************************)
274(* Helpers *)
275(*****************************************************************************)
276
485bce71 277(* ------------------------------------------------------------------------- *)
ae4735db
C
278(* the pair is the status of '()' and '{}', ex: (-1,0)
279 * if too much ')' and good '{}'
280 * could do for [] too ?
485bce71
C
281 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
282 * then if have ifdef, then certainly can lead to a problem.
283 *)
708f4980 284let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
ae4735db 285 fun xs ->
485bce71 286 let cnt_paren, cnt_brace = ref 0, ref 0 in
ae4735db 287 xs +> TV.iter_token_ifdef (fun x ->
485bce71
C
288 (match x.tok with
289 | x when TH.is_opar x -> incr cnt_paren
290 | TOBrace _ -> incr cnt_brace
291 | x when TH.is_cpar x -> decr cnt_paren
292 | TCBrace _ -> decr cnt_brace
293 | _ -> ()
294 )
295 );
296 !cnt_paren, !cnt_brace
297
298
299(* ------------------------------------------------------------------------- *)
300let forLOOKAHEAD = 30
301
ae4735db 302
485bce71 303(* look if there is a '{' just after the closing ')', and handling the
ae4735db
C
304 * possibility to have nested expressions inside nested parenthesis
305 *
485bce71
C
306 * todo: use indentation instead of premier(statement) ?
307 *)
ae4735db 308let rec is_really_foreach xs =
485bce71
C
309 let rec is_foreach_aux = function
310 | [] -> false, []
311 | TCPar _::TOBrace _::xs -> true, xs
312 (* the following attempts to handle the cases where there is a
313 single statement in the body of the loop. undoubtedly more
ae4735db 314 cases are needed.
485bce71
C
315 todo: premier(statement) - suivant(funcall)
316 *)
317 | TCPar _::TIdent _::xs -> true, xs
318 | TCPar _::Tif _::xs -> true, xs
319 | TCPar _::Twhile _::xs -> true, xs
320 | TCPar _::Tfor _::xs -> true, xs
321 | TCPar _::Tswitch _::xs -> true, xs
322 | TCPar _::Treturn _::xs -> true, xs
323
324
325 | TCPar _::xs -> false, xs
ae4735db 326 | TOPar _::xs ->
485bce71
C
327 let (_, xs') = is_foreach_aux xs in
328 is_foreach_aux xs'
329 | x::xs -> is_foreach_aux xs
330 in
331 is_foreach_aux xs +> fst
332
333
334(* ------------------------------------------------------------------------- *)
ae4735db 335let set_ifdef_token_parenthize_info cnt x =
485bce71
C
336 match x with
337 | TIfdef (tag, _)
338 | TIfdefelse (tag, _)
339 | TIfdefelif (tag, _)
340 | TEndif (tag, _)
341
342 | TIfdefBool (_, tag, _)
ae4735db 343 | TIfdefMisc (_, tag, _)
485bce71 344 | TIfdefVersion (_, tag, _)
ae4735db 345 ->
485bce71
C
346 tag := Some cnt;
347
348 | _ -> raise Impossible
485bce71
C
349
350
485bce71 351
ae4735db 352let ifdef_paren_cnt = ref 0
485bce71 353
ae4735db
C
354
355let rec set_ifdef_parenthize_info xs =
485bce71
C
356 xs +> List.iter (function
357 | NotIfdefLine xs -> ()
ae4735db
C
358 | Ifdefbool (_, xxs, info_ifdef)
359 | Ifdef (xxs, info_ifdef) ->
360
485bce71
C
361 incr ifdef_paren_cnt;
362 let total_directives = List.length info_ifdef in
363
ae4735db 364 info_ifdef +> List.iter (fun x ->
485bce71
C
365 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
366 x.tok);
367 xxs +> List.iter set_ifdef_parenthize_info
368 )
369
370
978fd7e5
C
371(*****************************************************************************)
372(* The parsing hack for #define *)
373(*****************************************************************************)
374
ae4735db 375(* To parse macro definitions I need to do some tricks
978fd7e5
C
376 * as some information can be get only at the lexing level. For instance
377 * the space after the name of the macro in '#define foo (x)' is meaningful
378 * but the grammar can not get this information. So define_ident below
379 * look at such space and generate a special TOpardefine. In a similar
380 * way macro definitions can contain some antislash and newlines
ae4735db
C
381 * and the grammar need to know where the macro ends (which is
382 * a line-level and so low token-level information). Hence the
978fd7e5 383 * function 'define_line' below and the TDefEol.
ae4735db
C
384 *
385 * update: TDefEol is handled in a special way at different places,
978fd7e5
C
386 * a little bit like EOF, especially for error recovery, so this
387 * is an important token that should not be retagged!
ae4735db
C
388 *
389 *
390 * ugly hack, a better solution perhaps would be to erase TDefEOL
391 * from the Ast and list of tokens in parse_c.
392 *
978fd7e5 393 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
ae4735db 394 *
978fd7e5
C
395 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
396 * is where is the \n TCommentSpace. Normally there is always a last token
397 * to synchronize on, either EOF or the token of the next toplevel.
ae4735db 398 * In the case of the #define we got in list of token
978fd7e5
C
399 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
400 * not synchronize on it and so we will not print the "\n".
401 * A solution would be to put the TDefEOL before the "\n".
c491d8ee 402 * (jll: tried to do this, see the comment "Put end of line..." below)
ae4735db
C
403 *
404 * todo?: could put a ExpandedTok for that ?
978fd7e5 405 *)
ae4735db
C
406let mark_end_define ii =
407 let ii' =
408 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
409 Common.str = "";
978fd7e5
C
410 Common.charpos = Ast_c.pos_of_info ii + 1
411 };
412 cocci_tag = ref Ast_c.emptyAnnot;
413 comments_tag = ref Ast_c.emptyComments;
ae4735db 414 }
978fd7e5
C
415 in
416 TDefEOL (ii')
417
418(* put the TDefEOL at the good place *)
ae4735db 419let rec define_line_1 acc xs =
978fd7e5
C
420 match xs with
421 | [] -> List.rev acc
422 | TDefine ii::xs ->
423 let line = Ast_c.line_of_info ii in
424 let acc = (TDefine ii) :: acc in
425 define_line_2 acc line ii xs
3a314143
C
426 | TUndef ii::xs ->
427 let line = Ast_c.line_of_info ii in
428 let acc = (TUndef ii) :: acc in
429 define_line_2 acc line ii xs
978fd7e5
C
430 | TCppEscapedNewline ii::xs ->
431 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
432 pr2 (Ast_c.strloc_of_info ii);
433 let acc = (TCommentSpace ii) :: acc in
434 define_line_1 acc xs
435 | x::xs -> define_line_1 (x::acc) xs
436
ae4735db
C
437and define_line_2 acc line lastinfo xs =
438 match xs with
439 | [] ->
978fd7e5 440 (* should not happened, should meet EOF before *)
ae4735db 441 pr2 "PB: WEIRD";
978fd7e5 442 List.rev (mark_end_define lastinfo::acc)
ae4735db 443 | x::xs ->
978fd7e5
C
444 let line' = TH.line_of_tok x in
445 let info = TH.info_of_tok x in
446
447 (match x with
ae4735db 448 | EOF ii ->
978fd7e5
C
449 let acc = (mark_end_define lastinfo) :: acc in
450 let acc = (EOF ii) :: acc in
451 define_line_1 acc xs
ae4735db 452 | TCppEscapedNewline ii ->
978fd7e5
C
453 if (line' <> line) then pr2 "PB: WEIRD: not same line number";
454 let acc = (TCommentSpace ii) :: acc in
455 define_line_2 acc (line+1) info xs
ae4735db 456 | x ->
978fd7e5 457 if line' =|= line
ae4735db 458 then define_line_2 (x::acc) line info xs
c491d8ee
C
459 else
460 (* Put end of line token before the newline. A newline at least
461 must be there because the line changed and because we saw a
462 #define previously to get to this function at all *)
463 define_line_1
464 ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
465 (x::xs)
978fd7e5
C
466 )
467
ae4735db 468let rec define_ident acc xs =
978fd7e5
C
469 match xs with
470 | [] -> List.rev acc
3a314143
C
471 | TUndef ii::xs ->
472 let acc = TUndef ii :: acc in
473 (match xs with
474 TCommentSpace i1::TIdent (s,i2)::xs ->
475 let acc = (TCommentSpace i1) :: acc in
476 let acc = (TIdentDefine (s,i2)) :: acc in
477 define_ident acc xs
478 | _ ->
479 pr2 "WEIRD: weird #define body";
480 define_ident acc xs
481 )
ae4735db 482 | TDefine ii::xs ->
978fd7e5
C
483 let acc = TDefine ii :: acc in
484 (match xs with
ae4735db 485 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
978fd7e5
C
486 (* Change also the kind of TIdent to avoid bad interaction
487 * with other parsing_hack tricks. For instant if keep TIdent then
488 * the stringication algo can believe the TIdent is a string-macro.
489 * So simpler to change the kind of the ident too.
490 *)
ae4735db 491 (* if TOParDefine sticked to the ident, then
978fd7e5
C
492 * it's a macro-function. Change token to avoid ambiguity
493 * between #define foo(x) and #define foo (x)
494 *)
495 let acc = (TCommentSpace i1) :: acc in
496 let acc = (TIdentDefine (s,i2)) :: acc in
497 let acc = (TOParDefine i3) :: acc in
498 define_ident acc xs
499
ae4735db 500 | TCommentSpace i1::TIdent (s,i2)::xs ->
978fd7e5
C
501 let acc = (TCommentSpace i1) :: acc in
502 let acc = (TIdentDefine (s,i2)) :: acc in
503 define_ident acc xs
504
505 (* bugfix: ident of macro (as well as params, cf below) can be tricky
506 * note, do we need to subst in the body of the define ? no cos
507 * here the issue is the name of the macro, as in #define inline,
ae4735db 508 * so obviously the name of this macro will not be used in its
978fd7e5
C
509 * body (it would be a recursive macro, which is forbidden).
510 *)
ae4735db
C
511
512 | TCommentSpace i1::t::xs ->
978fd7e5
C
513
514 let s = TH.str_of_tok t in
515 let ii = TH.info_of_tok t in
516 if s ==~ Common.regexp_alpha
517 then begin
518 pr2 (spf "remapping: %s to an ident in macro name" s);
519 let acc = (TCommentSpace i1) :: acc in
520 let acc = (TIdentDefine (s,ii)) :: acc in
521 define_ident acc xs
522 end
523 else begin
ae4735db 524 pr2 "WEIRD: weird #define body";
978fd7e5
C
525 define_ident acc xs
526 end
527
ae4735db
C
528 | _ ->
529 pr2 "WEIRD: weird #define body";
978fd7e5
C
530 define_ident acc xs
531 )
532 | x::xs ->
533 let acc = x :: acc in
534 define_ident acc xs
978fd7e5
C
535
536
ae4735db
C
537
538let fix_tokens_define2 xs =
978fd7e5
C
539 define_ident [] (define_line_1 [] xs)
540
ae4735db 541let fix_tokens_define a =
978fd7e5 542 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
ae4735db 543
978fd7e5
C
544
545
546
547
548(* ------------------------------------------------------------------------- *)
549(* Other parsing hacks related to cpp, Include/Define hacks *)
550(* ------------------------------------------------------------------------- *)
551
552(* Sometimes I prefer to generate a single token for a list of things in the
553 * lexer so that if I have to passed them, like for passing TInclude then
ae4735db
C
554 * it's easy. Also if I don't do a single token, then I need to
555 * parse the rest which may not need special stuff, like detecting
978fd7e5
C
556 * end of line which the parser is not really ready for. So for instance
557 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
ae4735db 558 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
978fd7e5
C
559 * but this kind of token is valid only after a #include and the
560 * lexing and parsing rules are different for such tokens so not that
561 * easy to parse such things in parser_c.mly. Hence the following hacks.
ae4735db 562 *
978fd7e5
C
563 * less?: maybe could get rid of this like I get rid of some of fix_define.
564 *)
565
566(* helpers *)
567
568(* used to generate new token from existing one *)
569let new_info posadd str ii =
ae4735db
C
570 { Ast_c.pinfo =
571 Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
978fd7e5
C
572 charpos = Ast_c.pos_of_info ii + posadd;
573 str = str;
574 column = Ast_c.col_of_info ii + posadd;
575 };
576 (* must generate a new ref each time, otherwise share *)
577 cocci_tag = ref Ast_c.emptyAnnot;
578 comments_tag = ref Ast_c.emptyComments;
579 }
580
581
ae4735db 582let rec comment_until_defeol xs =
978fd7e5 583 match xs with
ae4735db 584 | [] ->
978fd7e5
C
585 (* job not done in Cpp_token_c.define_parse ? *)
586 failwith "cant find end of define token TDefEOL"
ae4735db 587 | x::xs ->
978fd7e5 588 (match x with
ae4735db 589 | Parser_c.TDefEOL i ->
978fd7e5
C
590 Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
591 ::xs
ae4735db
C
592 | _ ->
593 let x' =
978fd7e5
C
594 (* bugfix: otherwise may lose a TComment token *)
595 if TH.is_real_comment x
596 then x
597 else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
598 in
599 x'::comment_until_defeol xs
600 )
601
ae4735db
C
602let drop_until_defeol xs =
603 List.tl
978fd7e5
C
604 (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
605
606
607
608(* ------------------------------------------------------------------------- *)
609(* returns a pair (replaced token, list of next tokens) *)
610(* ------------------------------------------------------------------------- *)
611
ae4735db
C
612let tokens_include (info, includes, filename, inifdef) =
613 Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
614 [Parser_c.TIncludeFilename
978fd7e5
C
615 (filename, (new_info (String.length includes) filename info))
616 ]
617
618
619
620
485bce71
C
621(*****************************************************************************)
622(* CPP handling: macros, ifdefs, macros defs *)
623(*****************************************************************************)
624
0708f913
C
625(* ------------------------------------------------------------------------- *)
626(* special skip_start skip_end handling *)
627(* ------------------------------------------------------------------------- *)
628
629(* note: after this normally the token list should not contain any more the
630 * TCommentSkipTagStart and End tokens.
631 *)
632let rec commentize_skip_start_to_end xs =
633 match xs with
634 | [] -> ()
ae4735db 635 | x::xs ->
0708f913 636 (match x with
ae4735db
C
637 | {tok = TCommentSkipTagStart info} ->
638 (try
639 let (before, x2, after) =
0708f913
C
640 xs +> Common.split_when (function
641 | {tok = TCommentSkipTagEnd _ } -> true
ae4735db 642 | _ -> false
0708f913
C
643 )
644 in
645 let topass = x::x2::before in
ae4735db 646 topass +> List.iter (fun tok ->
0708f913
C
647 set_as_comment Token_c.CppPassingExplicit tok
648 );
649 commentize_skip_start_to_end after
ae4735db 650 with Not_found ->
0708f913
C
651 failwith "could not find end of skip_start special comment"
652 )
ae4735db 653 | {tok = TCommentSkipTagEnd info} ->
0708f913 654 failwith "found skip_end comment but no skip_start"
ae4735db 655 | _ ->
0708f913
C
656 commentize_skip_start_to_end xs
657 )
ae4735db
C
658
659
0708f913
C
660
661
34e49164
C
662(* ------------------------------------------------------------------------- *)
663(* ifdef keeping/passing *)
664(* ------------------------------------------------------------------------- *)
665
666(* #if 0, #if 1, #if LINUX_VERSION handling *)
ae4735db
C
667let rec find_ifdef_bool xs =
668 xs +> List.iter (function
34e49164 669 | NotIfdefLine _ -> ()
ae4735db 670 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
485bce71
C
671
672 msg_ifdef_bool_passing is_ifdef_positif;
34e49164
C
673
674 (match xxs with
675 | [] -> raise Impossible
ae4735db 676 | firstclause::xxs ->
0708f913 677 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 678
34e49164 679 if is_ifdef_positif
ae4735db 680 then xxs +> List.iter
0708f913 681 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
34e49164 682 else begin
0708f913 683 firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal);
34e49164
C
684 (match List.rev xxs with
685 (* keep only last *)
ae4735db
C
686 | last::startxs ->
687 startxs +> List.iter
0708f913 688 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
34e49164
C
689 | [] -> (* not #else *) ()
690 );
691 end
692 );
ae4735db 693
34e49164
C
694 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
695 )
696
697
698
34e49164
C
699let thresholdIfdefSizeMid = 6
700
701(* infer ifdef involving not-closed expressions/statements *)
ae4735db
C
702let rec find_ifdef_mid xs =
703 xs +> List.iter (function
34e49164 704 | NotIfdefLine _ -> ()
ae4735db
C
705 | Ifdef (xxs, info_ifdef_stmt) ->
706 (match xxs with
34e49164
C
707 | [] -> raise Impossible
708 | [first] -> ()
ae4735db 709 | first::second::rest ->
34e49164 710 (* don't analyse big ifdef *)
ae4735db
C
711 if xxs +> List.for_all
712 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
34e49164 713 (* don't want nested ifdef *)
ae4735db
C
714 xxs +> List.for_all (fun xs ->
715 xs +> List.for_all
34e49164
C
716 (function NotIfdefLine _ -> true | _ -> false)
717 )
ae4735db
C
718
719 then
34e49164 720 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
ae4735db
C
721 let cnt1, cnt2 = List.hd counts in
722 if cnt1 <> 0 || cnt2 <> 0 &&
b1b2de81 723 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
34e49164 724 (*
ae4735db
C
725 if counts +> List.exists (fun (cnt1, cnt2) ->
726 cnt1 <> 0 || cnt2 <> 0
727 )
34e49164
C
728 *)
729 then begin
485bce71
C
730 msg_ifdef_mid_something();
731
34e49164 732 (* keep only first, treat the rest as comment *)
0708f913 733 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 734 (second::rest) +> List.iter
0708f913 735 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
34e49164 736 end
ae4735db 737
34e49164
C
738 );
739 List.iter find_ifdef_mid xxs
ae4735db 740
34e49164 741 (* no need complex analysis for ifdefbool *)
ae4735db 742 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
34e49164 743 List.iter find_ifdef_mid xxs
ae4735db
C
744
745
34e49164
C
746 )
747
748
749let thresholdFunheaderLimit = 4
750
751(* ifdef defining alternate function header, type *)
752let rec find_ifdef_funheaders = function
753 | [] -> ()
ae4735db 754 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
34e49164
C
755
756 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
ae4735db 757 | Ifdef
34e49164
C
758 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
759 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
ae4735db 760 ], info_ifdef_stmt
34e49164
C
761 )
762 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
ae4735db 763 ::xs
34e49164
C
764 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
765 List.length ifdefblock2 <= thresholdFunheaderLimit
ae4735db 766 ->
34e49164 767 find_ifdef_funheaders xs;
485bce71
C
768
769 msg_ifdef_funheaders ();
0708f913 770 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 771 let all_toks = [xline2] @ line2 in
0708f913
C
772 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ;
773 ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
774
775 (* ifdef with nested ifdef *)
ae4735db 776 | Ifdef
34e49164 777 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
ae4735db 778 [Ifdef
34e49164
C
779 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
780 [NotIfdefLine (({col = 0} as xline3)::line3)];
781 ], info_ifdef_stmt2
782 )
783 ]
ae4735db 784 ], info_ifdef_stmt
34e49164
C
785 )
786 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
787 ::xs
788 ->
34e49164 789 find_ifdef_funheaders xs;
485bce71
C
790
791 msg_ifdef_funheaders ();
0708f913
C
792 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
793 info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 794 let all_toks = [xline2;xline3] @ line2 @ line3 in
0708f913 795 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError);
34e49164
C
796
797 (* ifdef with elseif *)
ae4735db 798 | Ifdef
34e49164
C
799 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
800 [NotIfdefLine (({col = 0} as xline2)::line2)];
801 [NotIfdefLine (({col = 0} as xline3)::line3)];
ae4735db 802 ], info_ifdef_stmt
34e49164
C
803 )
804 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
ae4735db
C
805 ::xs
806 ->
34e49164 807 find_ifdef_funheaders xs;
485bce71
C
808
809 msg_ifdef_funheaders ();
0708f913 810 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
34e49164 811 let all_toks = [xline2;xline3] @ line2 @ line3 in
0708f913 812 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError)
ae4735db 813
485bce71 814 (* recurse *)
ae4735db
C
815 | Ifdef (xxs,info_ifdef_stmt)::xs
816 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
817 List.iter find_ifdef_funheaders xxs;
34e49164 818 find_ifdef_funheaders xs
ae4735db 819
34e49164
C
820
821
485bce71 822(* ?? *)
ae4735db
C
823let rec adjust_inifdef_include xs =
824 xs +> List.iter (function
34e49164 825 | NotIfdefLine _ -> ()
ae4735db
C
826 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
827 xxs +> List.iter (iter_token_ifdef (fun tokext ->
34e49164 828 match tokext.tok with
ae4735db 829 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
34e49164
C
830 inifdef_ref := true;
831 | _ -> ()
832 ));
833 )
834
835
836
34e49164 837
485bce71 838
34e49164
C
839
840
ae4735db
C
841let rec find_ifdef_cparen_else xs =
842 let rec aux xs =
843 xs +> List.iter (function
708f4980 844 | NotIfdefLine _ -> ()
ae4735db
C
845 | Ifdef (xxs, info_ifdef_stmt) ->
846 (match xxs with
708f4980
C
847 | [] -> raise Impossible
848 | [first] -> ()
ae4735db 849 | first::second::rest ->
34e49164 850
708f4980 851 (* found a closing ')' just after the #else *)
34e49164 852
708f4980
C
853 (* Too bad ocaml does not support better list pattern matching
854 * a la Prolog-III where can match the end of lists.
855 *)
ae4735db
C
856 let condition =
857 if List.length first = 0 then false
858 else
708f4980
C
859 let last_line = Common.last first in
860 match last_line with
ae4735db
C
861 | NotIfdefLine xs ->
862 if List.length xs = 0 then false
863 else
708f4980
C
864 let last_tok = Common.last xs in
865 TH.is_cpar last_tok.tok
ae4735db 866 | Ifdef _ | Ifdefbool _ -> false
708f4980
C
867 in
868 if condition then begin
869 msg_ifdef_cparen_else();
34e49164 870
708f4980
C
871 (* keep only first, treat the rest as comment *)
872 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
ae4735db 873 (second::rest) +> List.iter
708f4980
C
874 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
875 end
ae4735db 876
708f4980
C
877 );
878 List.iter aux xxs
ae4735db 879
708f4980 880 (* no need complex analysis for ifdefbool *)
ae4735db 881 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
708f4980
C
882 List.iter aux xxs
883 )
884 in aux xs
34e49164
C
885
886
708f4980
C
887(* ------------------------------------------------------------------------- *)
888(* cpp-builtin part2, macro, using standard.h or other defs *)
889(* ------------------------------------------------------------------------- *)
34e49164 890
ae4735db 891(* now in cpp_token_c.ml *)
34e49164
C
892
893(* ------------------------------------------------------------------------- *)
894(* stringification *)
895(* ------------------------------------------------------------------------- *)
896
ae4735db 897let rec find_string_macro_paren xs =
34e49164
C
898 match xs with
899 | [] -> ()
ae4735db
C
900 | Parenthised(xxs, info_parens)::xs ->
901 xxs +> List.iter (fun xs ->
902 if xs +> List.exists
485bce71 903 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
ae4735db
C
904 xs +> List.for_all
905 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
34e49164
C
906 true | _ -> false)
907 then
ae4735db 908 xs +> List.iter (fun tok ->
34e49164 909 match tok with
ae4735db 910 | PToken({tok = TIdent (s,_)} as id) ->
34e49164 911 msg_stringification s;
485bce71 912 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
913 | _ -> ()
914 )
ae4735db 915 else
34e49164
C
916 find_string_macro_paren xs
917 );
918 find_string_macro_paren xs
ae4735db 919 | PToken(tok)::xs ->
34e49164 920 find_string_macro_paren xs
ae4735db 921
34e49164
C
922
923(* ------------------------------------------------------------------------- *)
924(* macro2 *)
925(* ------------------------------------------------------------------------- *)
926
927(* don't forget to recurse in each case *)
ae4735db 928let rec find_macro_paren xs =
34e49164
C
929 match xs with
930 | [] -> ()
ae4735db 931
34e49164
C
932 (* attribute *)
933 | PToken ({tok = Tattribute _} as id)
934 ::Parenthised (xxs,info_parens)
935 ::xs
ae4735db 936 ->
34e49164 937 pr2_cpp ("MACRO: __attribute detected ");
ae4735db 938 [Parenthised (xxs, info_parens)] +>
0708f913
C
939 iter_token_paren (set_as_comment Token_c.CppAttr);
940 set_as_comment Token_c.CppAttr id;
34e49164
C
941 find_macro_paren xs
942
978fd7e5
C
943 | PToken ({tok = TattributeNoarg _} as id)
944 ::xs
ae4735db 945 ->
978fd7e5
C
946 pr2_cpp ("MACRO: __attributenoarg detected ");
947 set_as_comment Token_c.CppAttr id;
948 find_macro_paren xs
949
485bce71 950(*
708f4980 951 (* attribute cpp, __xxx id *)
485bce71 952 | PToken ({tok = TIdent (s,i1)} as id)
708f4980 953 ::PToken ({tok = TIdent (s2, i2)} as id2)
485bce71 954 ::xs when s ==~ regexp_annot
ae4735db 955 ->
485bce71
C
956 msg_attribute s;
957 id.tok <- TMacroAttr (s, i1);
708f4980 958 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
485bce71 959
708f4980
C
960 (* attribute cpp, id __xxx *)
961 | PToken ({tok = TIdent (s,i1)} as _id)
962 ::PToken ({tok = TIdent (s2, i2)} as id2)
963 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
ae4735db 964 ->
485bce71 965 msg_attribute s2;
708f4980
C
966 id2.tok <- TMacroAttr (s2, i2);
967 find_macro_paren xs
968
969 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
970 ::PToken ({tok = TIdent (s,i1)} as attr)
971 ::xs when s ==~ regexp_annot
ae4735db 972 ->
708f4980
C
973 pr2_cpp ("storage attribute: " ^ s);
974 attr.tok <- TMacroAttrStorage (s,i1);
975 (* recurse, may have other storage attributes *)
976 find_macro_paren (PToken (tok1)::xs)
ae4735db 977
485bce71
C
978*)
979
980 (* storage attribute *)
981 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
ae4735db
C
982 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
983 ->
485bce71
C
984 pr2_cpp ("storage attribute: " ^ s);
985 attr.tok <- TMacroAttrStorage (s,i1);
986 (* recurse, may have other storage attributes *)
987 find_macro_paren (PToken (tok1)::xs)
708f4980 988
485bce71 989
34e49164 990 (* stringification
ae4735db 991 *
34e49164 992 * the order of the matching clause is important
ae4735db 993 *
34e49164
C
994 *)
995
996 (* string macro with params, before case *)
485bce71 997 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
34e49164 998 ::Parenthised (xxs, info_parens)
ae4735db 999 ::xs ->
485bce71
C
1000
1001 msg_stringification_params s;
1002 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1003 [Parenthised (xxs, info_parens)] +>
0708f913 1004 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1005 find_macro_paren xs
1006
1007 (* after case *)
1008 | PToken ({tok = TIdent (s,_)} as id)
1009 ::Parenthised (xxs, info_parens)
485bce71 1010 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1011 ::xs ->
485bce71
C
1012
1013 msg_stringification_params s;
1014 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
ae4735db 1015 [Parenthised (xxs, info_parens)] +>
0708f913 1016 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1017 find_macro_paren xs
1018
1019
1020 (* for the case where the string is not inside a funcall, but
1021 * for instance in an initializer.
1022 *)
ae4735db 1023
34e49164 1024 (* string macro variable, before case *)
485bce71 1025 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
ae4735db 1026 ::xs ->
485bce71 1027
34e49164 1028 msg_stringification s;
485bce71 1029 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1030 find_macro_paren xs
1031
1032 (* after case *)
485bce71
C
1033 | PToken ({tok = TIdent (s,_)} as id)
1034 ::PToken ({tok = (TString _ | TMacroString _)})
ae4735db 1035 ::xs ->
485bce71 1036
34e49164 1037 msg_stringification s;
485bce71 1038 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
34e49164
C
1039 find_macro_paren xs
1040
1041
ae4735db 1042
34e49164
C
1043
1044
1045 (* recurse *)
ae4735db
C
1046 | (PToken x)::xs -> find_macro_paren xs
1047 | (Parenthised (xxs, info_parens))::xs ->
34e49164
C
1048 xxs +> List.iter find_macro_paren;
1049 find_macro_paren xs
1050
1051
1052
1053
1054
1055(* don't forget to recurse in each case *)
ae4735db 1056let rec find_macro_lineparen xs =
34e49164
C
1057 match xs with
1058 | [] -> ()
1059
1060 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
ae4735db 1061 | (Line
34e49164
C
1062 (
1063 [PToken ({tok = Tstatic _});
1064 PToken ({tok = TIdent (s,_)} as macro);
1065 Parenthised (xxs,info_parens);
1066 PToken ({tok = TPtVirg _});
ae4735db 1067 ]
34e49164 1068 ))
ae4735db
C
1069 ::xs
1070 when (s ==~ regexp_macro) ->
485bce71 1071
34e49164
C
1072 msg_declare_macro s;
1073 let info = TH.info_of_tok macro.tok in
1074 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1075
1076 find_macro_lineparen (xs)
1077
1078 (* the static const case *)
ae4735db 1079 | (Line
34e49164
C
1080 (
1081 [PToken ({tok = Tstatic _});
1082 PToken ({tok = Tconst _} as const);
1083 PToken ({tok = TIdent (s,_)} as macro);
1084 Parenthised (xxs,info_parens);
1085 PToken ({tok = TPtVirg _});
ae4735db 1086 ]
34e49164
C
1087 (*as line1*)
1088
1089 ))
ae4735db
C
1090 ::xs
1091 when (s ==~ regexp_macro) ->
485bce71 1092
34e49164
C
1093 msg_declare_macro s;
1094 let info = TH.info_of_tok macro.tok in
1095 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
ae4735db
C
1096
1097 (* need retag this const, otherwise ambiguity in grammar
34e49164
C
1098 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1099 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1100 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1101 storage_class_spec : Tstatic . (137)
1102 *)
1103 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1104
1105 find_macro_lineparen (xs)
1106
1107
1108 (* same but without trailing ';'
ae4735db 1109 *
34e49164
C
1110 * I do not put the final ';' because it can be on a multiline and
1111 * because of the way mk_line is coded, we will not have access to
1112 * this ';' on the next line, even if next to the ')' *)
ae4735db 1113 | (Line
34e49164
C
1114 ([PToken ({tok = Tstatic _});
1115 PToken ({tok = TIdent (s,_)} as macro);
1116 Parenthised (xxs,info_parens);
ae4735db 1117 ]
34e49164 1118 ))
ae4735db
C
1119 ::xs
1120 when s ==~ regexp_macro ->
34e49164
C
1121
1122 msg_declare_macro s;
1123 let info = TH.info_of_tok macro.tok in
1124 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1125
1126 find_macro_lineparen (xs)
1127
1128
1129
1130
1131 (* on multiple lines *)
ae4735db 1132 | (Line
34e49164
C
1133 (
1134 (PToken ({tok = Tstatic _})::[]
1135 )))
ae4735db 1136 ::(Line
34e49164
C
1137 (
1138 [PToken ({tok = TIdent (s,_)} as macro);
1139 Parenthised (xxs,info_parens);
1140 PToken ({tok = TPtVirg _});
1141 ]
ae4735db 1142 )
34e49164 1143 )
ae4735db
C
1144 ::xs
1145 when (s ==~ regexp_macro) ->
485bce71 1146
34e49164
C
1147 msg_declare_macro s;
1148 let info = TH.info_of_tok macro.tok in
1149 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1150
1151 find_macro_lineparen (xs)
1152
1153
ae4735db
C
1154 (* linuxext: ex: DECLARE_BITMAP();
1155 *
34e49164
C
1156 * Here I use regexp_declare and not regexp_macro because
1157 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1158 * Here we don't have the preceding 'static' so only way to
1159 * not have positive is to restrict to .*DECLARE.* macros.
1160 *
1161 * but there is a grammar rule for that, so don't need this case anymore
0708f913 1162 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
34e49164
C
1163 * on a argument_list
1164 *)
ae4735db
C
1165
1166 | (Line
34e49164
C
1167 ([PToken ({tok = TIdent (s,_)} as macro);
1168 Parenthised (xxs,info_parens);
1169 PToken ({tok = TPtVirg _});
1170 ]
1171 ))
ae4735db
C
1172 ::xs
1173 when (s ==~ regexp_declare) ->
34e49164
C
1174
1175 msg_declare_macro s;
1176 let info = TH.info_of_tok macro.tok in
1177 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1178
1179 find_macro_lineparen (xs)
1180
ae4735db 1181
34e49164
C
1182 (* toplevel macros.
1183 * module_init(xxx)
ae4735db 1184 *
34e49164
C
1185 * Could also transform the TIdent in a TMacroTop but can have false
1186 * positive, so easier to just change the TCPar and so just solve
1187 * the end-of-stream pb of ocamlyacc
1188 *)
ae4735db 1189 | (Line
34e49164
C
1190 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1191 Parenthised (xxs,info_parens);
1192 ] as _line1
1193 ))
b1b2de81 1194 ::xs when col1 =|= 0
ae4735db
C
1195 ->
1196 let condition =
34e49164
C
1197 (* to reduce number of false positive *)
1198 (match xs with
ae4735db 1199 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
b1b2de81 1200 TH.is_eof other.tok || (col2 =|= 0 &&
34e49164
C
1201 (match other.tok with
1202 | TOBrace _ -> false (* otherwise would match funcdecl *)
1203 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1204 | TPtVirg _
34e49164
C
1205 | TDotDot _
1206 -> false
1207 | tok when TH.is_binary_operator tok -> false
ae4735db 1208
34e49164
C
1209 | _ -> true
1210 )
1211 )
1212 | _ -> false
1213 )
1214 in
1215 if condition
1216 then begin
485bce71 1217
34e49164
C
1218 msg_macro_toplevel_noptvirg s;
1219 (* just to avoid the end-of-stream pb of ocamlyacc *)
1220 let tcpar = Common.last info_parens in
1221 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
ae4735db 1222
34e49164 1223 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
ae4735db 1224
34e49164
C
1225 end;
1226
1227 find_macro_lineparen (xs)
1228
1229
1230
ae4735db 1231 (* macro with parameters
34e49164
C
1232 * ex: DEBUG()
1233 * return x;
1234 *)
ae4735db 1235 | (Line
34e49164
C
1236 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1237 Parenthised (xxs,info_parens);
1238 ] as _line1
1239 ))
ae4735db 1240 ::(Line
34e49164
C
1241 (PToken ({col = col2 } as other)::restline2
1242 ) as line2)
ae4735db 1243 ::xs
34e49164 1244 (* when s ==~ regexp_macro *)
ae4735db 1245 ->
97111a47
C
1246 (* This can give a false positive for K&R functions if the function
1247 name is in the same column as the first parameter declaration. *)
ae4735db
C
1248 let condition =
1249 (col1 =|= col2 &&
34e49164
C
1250 (match other.tok with
1251 | TOBrace _ -> false (* otherwise would match funcdecl *)
1252 | TCBrace _ when ctx <> InFunction -> false
ae4735db 1253 | TPtVirg _
34e49164
C
1254 | TDotDot _
1255 -> false
1256 | tok when TH.is_binary_operator tok -> false
1257
1258 | _ -> true
1259 )
ae4735db
C
1260 )
1261 ||
34e49164 1262 (col2 <= col1 &&
113803cf 1263 (match other.tok, restline2 with
b1b2de81 1264 | TCBrace _, _ when ctx =*= InFunction -> true
113803cf
C
1265 | Treturn _, _ -> true
1266 | Tif _, _ -> true
1267 | Telse _, _ -> true
1268
1269 (* case of label, usually put in first line *)
ae4735db 1270 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
113803cf
C
1271 true
1272
34e49164
C
1273
1274 | _ -> false
1275 )
1276 )
1277
1278 in
ae4735db 1279
34e49164 1280 if condition
ae4735db 1281 then
b1b2de81 1282 if col1 =|= 0 then ()
34e49164
C
1283 else begin
1284 msg_macro_noptvirg s;
485bce71 1285 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
ae4735db 1286 [Parenthised (xxs, info_parens)] +>
0708f913 1287 iter_token_paren (set_as_comment Token_c.CppMacro);
34e49164
C
1288 end;
1289
1290 find_macro_lineparen (line2::xs)
ae4735db
C
1291
1292 (* linuxext:? single macro
34e49164
C
1293 * ex: LOCK
1294 * foo();
1295 * UNLOCK
ae4735db 1296 *
113803cf 1297 * todo: factorize code with previous rule ?
34e49164 1298 *)
ae4735db 1299 | (Line
34e49164
C
1300 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1301 ] as _line1
1302 ))
ae4735db 1303 ::(Line
34e49164
C
1304 (PToken ({col = col2 } as other)::restline2
1305 ) as line2)
ae4735db 1306 ::xs ->
34e49164 1307 (* when s ==~ regexp_macro *)
ae4735db
C
1308
1309 let condition =
1310 (col1 =|= col2 &&
34e49164
C
1311 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1312 (match other.tok with
ae4735db
C
1313 | TPtVirg _ -> false
1314 | TOr _ -> false
34e49164
C
1315 | TCBrace _ when ctx <> InFunction -> false
1316 | tok when TH.is_binary_operator tok -> false
1317
1318 | _ -> true
1319 )) ||
1320 (col2 <= col1 &&
1321 (match other.tok with
b1b2de81 1322 | TCBrace _ when ctx =*= InFunction -> true
34e49164
C
1323 | Treturn _ -> true
1324 | Tif _ -> true
1325 | Telse _ -> true
1326 | _ -> false
1327 ))
1328 in
ae4735db 1329
34e49164
C
1330 if condition
1331 then begin
1332 msg_macro_noptvirg_single s;
485bce71 1333 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
34e49164
C
1334 end;
1335 find_macro_lineparen (line2::xs)
ae4735db
C
1336
1337 | x::xs ->
34e49164
C
1338 find_macro_lineparen xs
1339
1340
485bce71
C
1341
1342(* ------------------------------------------------------------------------- *)
1343(* define tobrace init *)
1344(* ------------------------------------------------------------------------- *)
1345
ae4735db
C
1346let rec find_define_init_brace_paren xs =
1347 let rec aux xs =
485bce71
C
1348 match xs with
1349 | [] -> ()
1350
1351 (* mainly for firefox *)
1352 | (PToken {tok = TDefine _})
1353 ::(PToken {tok = TIdentDefine (s,_)})
1354 ::(PToken ({tok = TOBrace i1} as tokbrace))
1355 ::(PToken tok2)
1356 ::(PToken tok3)
ae4735db 1357 ::xs ->
485bce71
C
1358 let is_init =
1359 match tok2.tok, tok3.tok with
1360 | TInt _, TComma _ -> true
1361 | TString _, TComma _ -> true
1362 | TIdent _, TComma _ -> true
1363 | _ -> false
ae4735db 1364
485bce71
C
1365 in
1366 if is_init
ae4735db 1367 then begin
485bce71
C
1368 pr2_cpp("found define initializer: " ^s);
1369 tokbrace.tok <- TOBraceDefineInit i1;
1370 end;
1371
1372 aux xs
1373
1374 (* mainly for linux, especially in sound/ *)
1375 | (PToken {tok = TDefine _})
1376 ::(PToken {tok = TIdentDefine (s,_)})
1377 ::(Parenthised(xxx, info_parens))
1378 ::(PToken ({tok = TOBrace i1} as tokbrace))
1379 ::(PToken tok2)
1380 ::(PToken tok3)
ae4735db 1381 ::xs ->
485bce71
C
1382 let is_init =
1383 match tok2.tok, tok3.tok with
1384 | TInt _, TComma _ -> true
1385 | TDot _, TIdent _ -> true
1386 | TIdent _, TComma _ -> true
1387 | _ -> false
ae4735db 1388
485bce71
C
1389 in
1390 if is_init
ae4735db 1391 then begin
485bce71
C
1392 pr2_cpp("found define initializer with param: " ^ s);
1393 tokbrace.tok <- TOBraceDefineInit i1;
1394 end;
1395
1396 aux xs
1397
ae4735db 1398
485bce71
C
1399
1400 (* recurse *)
ae4735db
C
1401 | (PToken x)::xs -> aux xs
1402 | (Parenthised (xxs, info_parens))::xs ->
485bce71 1403 (* not need for tobrace init:
ae4735db 1404 * xxs +> List.iter aux;
485bce71
C
1405 *)
1406 aux xs
1407 in
1408 aux xs
1409
1410
34e49164
C
1411(* ------------------------------------------------------------------------- *)
1412(* action *)
1413(* ------------------------------------------------------------------------- *)
1414
708f4980 1415(* obsolete now with macro expansion ? get some regression if comment.
ae4735db 1416 * todo: if do bad decision here, then it can influence other phases
708f4980 1417 * and make it hard to parse. So maybe when have a parse error, should
ae4735db 1418 * undo some of the guess those heuristics have done, and restore
708f4980
C
1419 * the original token value.
1420 *)
1421
34e49164
C
1422let rec find_actions = function
1423 | [] -> ()
1424
1425 | PToken ({tok = TIdent (s,ii)})
1426 ::Parenthised (xxs,info_parens)
ae4735db 1427 ::xs ->
34e49164
C
1428 find_actions xs;
1429 xxs +> List.iter find_actions;
1430 let modified = find_actions_params xxs in
ae4735db 1431 if modified
34e49164 1432 then msg_macro_higher_order s
ae4735db
C
1433
1434 | x::xs ->
34e49164
C
1435 find_actions xs
1436
ae4735db
C
1437and find_actions_params xxs =
1438 xxs +> List.fold_left (fun acc xs ->
34e49164 1439 let toks = tokens_of_paren xs in
ae4735db
C
1440 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1441 (* undo: && List.length toks > 1
708f4980
C
1442 * good for sparse, not good for linux
1443 *)
34e49164 1444 then begin
ae4735db 1445 xs +> iter_token_paren (fun x ->
34e49164 1446 if TH.is_eof x.tok
ae4735db 1447 then
34e49164 1448 (* certainly because paren detection had a pb because of
708f4980
C
1449 * some ifdef-exp. Do similar additional checking than
1450 * what is done in set_as_comment.
34e49164 1451 *)
708f4980 1452 pr2 "PB: weird, I try to tag an EOF token as an action"
ae4735db 1453 else
708f4980 1454 (* cf tests-bis/no_cpar_macro.c *)
ae4735db
C
1455 if TH.is_eom x.tok
1456 then
708f4980 1457 pr2 "PB: weird, I try to tag an EOM token as an action"
ae4735db 1458 else
708f4980 1459 x.tok <- TAction (TH.info_of_tok x.tok);
34e49164
C
1460 );
1461 true (* modified *)
1462 end
1463 else acc
1464 ) false
1465
1466
1467
1468(* ------------------------------------------------------------------------- *)
1469(* main fix cpp function *)
1470(* ------------------------------------------------------------------------- *)
1471
ae4735db 1472let filter_cpp_stuff xs =
951c7801
C
1473 List.filter
1474 (function x ->
1475 (match x.tok with
1476 | tok when TH.is_comment tok -> false
34e49164
C
1477 (* don't want drop the define, or if drop, have to drop
1478 * also its body otherwise the line heuristics may be lost
1479 * by not finding the TDefine in column 0 but by finding
1480 * a TDefineIdent in a column > 0
1481 *)
951c7801
C
1482 | Parser_c.TDefine _ -> true
1483 | tok when TH.is_cpp_instruction tok -> false
1484 | _ -> true
1485 ))
1486 xs
34e49164
C
1487
1488let insert_virtual_positions l =
1489 let strlen x = String.length (Ast_c.str_of_info x) in
708f4980
C
1490 let rec loop prev offset acc = function
1491 [] -> List.rev acc
34e49164
C
1492 | x::xs ->
1493 let ii = TH.info_of_tok x in
1494 let inject pi =
1495 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1496 match Ast_c.pinfo_of_info ii with
1497 Ast_c.OriginTok pi ->
1498 let prev = Ast_c.parse_info_of_info ii in
ae4735db 1499 loop prev (strlen ii) (x::acc) xs
34e49164 1500 | Ast_c.ExpandedTok (pi,_) ->
708f4980 1501 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
ae4735db 1502 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164 1503 | Ast_c.FakeTok (s,_) ->
708f4980 1504 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
ae4735db 1505 loop prev (offset + (strlen ii)) (x'::acc) xs
34e49164
C
1506 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1507 let rec skip_fake = function
708f4980 1508 | [] -> []
34e49164
C
1509 | x::xs ->
1510 let ii = TH.info_of_tok x in
1511 match Ast_c.pinfo_of_info ii with
708f4980 1512 | Ast_c.OriginTok pi ->
34e49164 1513 let prev = Ast_c.parse_info_of_info ii in
708f4980
C
1514 let res = loop prev (strlen ii) [] xs in
1515 x::res
34e49164 1516 | _ -> x::skip_fake xs in
ae4735db 1517 skip_fake l
708f4980 1518
485bce71 1519(* ------------------------------------------------------------------------- *)
f59c9fb7 1520
ae4735db 1521let fix_tokens_cpp2 ~macro_defs tokens =
708f4980 1522 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
ae4735db
C
1523
1524 begin
34e49164
C
1525 (* the order is important, if you put the action heuristic first,
1526 * then because of ifdef, can have not closed paren
ae4735db
C
1527 * and so may believe that higher order macro
1528 * and it will eat too much tokens. So important to do
34e49164 1529 * first the ifdef.
ae4735db 1530 *
34e49164
C
1531 * I recompute multiple times cleaner cos the mutable
1532 * can have be changed and so may have more comments
1533 * in the token original list.
ae4735db 1534 *
34e49164
C
1535 *)
1536
0708f913
C
1537 commentize_skip_start_to_end !tokens2;
1538
34e49164 1539 (* ifdef *)
ae4735db
C
1540 let cleaner = !tokens2 +> List.filter (fun x ->
1541 (* is_comment will also filter the TCommentCpp created in
0708f913 1542 * commentize_skip_start_to_end *)
34e49164
C
1543 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1544 ) in
708f4980 1545 let ifdef_grouped = TV.mk_ifdef cleaner in
485bce71
C
1546 set_ifdef_parenthize_info ifdef_grouped;
1547
34e49164
C
1548 find_ifdef_funheaders ifdef_grouped;
1549 find_ifdef_bool ifdef_grouped;
1550 find_ifdef_mid ifdef_grouped;
ae4735db 1551 (* change order ? maybe cparen_else heuristic make some of the funheaders
708f4980
C
1552 * heuristics irrelevant ?
1553 *)
ae4735db 1554 find_ifdef_cparen_else ifdef_grouped;
34e49164
C
1555 adjust_inifdef_include ifdef_grouped;
1556
1557
1558 (* macro 1 *)
1559 let cleaner = !tokens2 +> filter_cpp_stuff in
1560
708f4980
C
1561 let paren_grouped = TV.mk_parenthised cleaner in
1562 Cpp_token_c.apply_macro_defs
ae4735db
C
1563 ~msg_apply_known_macro
1564 ~msg_apply_known_macro_hint
708f4980 1565 macro_defs paren_grouped;
34e49164 1566 (* because the before field is used by apply_macro_defs *)
ae4735db 1567 tokens2 := TV.rebuild_tokens_extented !tokens2;
34e49164
C
1568
1569 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1570 * that after the "ifdef-simplification" phase.
1571 *)
ae4735db 1572 let cleaner = !tokens2 +> List.filter (fun x ->
34e49164
C
1573 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1574 ) in
1575
708f4980 1576 let brace_grouped = TV.mk_braceised cleaner in
34e49164
C
1577 set_context_tag brace_grouped;
1578
34e49164
C
1579 (* macro *)
1580 let cleaner = !tokens2 +> filter_cpp_stuff in
1581
708f4980
C
1582 let paren_grouped = TV.mk_parenthised cleaner in
1583 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
485bce71 1584 find_define_init_brace_paren paren_grouped;
34e49164
C
1585 find_string_macro_paren paren_grouped;
1586 find_macro_lineparen line_paren_grouped;
1587 find_macro_paren paren_grouped;
1588
1589
708f4980 1590 (* obsolete: actions ? not yet *)
34e49164 1591 let cleaner = !tokens2 +> filter_cpp_stuff in
708f4980 1592 let paren_grouped = TV.mk_parenthised cleaner in
34e49164 1593 find_actions paren_grouped;
ae4735db 1594
34e49164
C
1595
1596
708f4980 1597 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
34e49164
C
1598 end
1599
ae4735db 1600let time_hack1 ~macro_defs a =
708f4980 1601 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
34e49164 1602
ae4735db 1603let fix_tokens_cpp ~macro_defs a =
708f4980 1604 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
34e49164 1605
34e49164 1606
34e49164 1607
34e49164
C
1608
1609(*****************************************************************************)
1610(* Lexing with lookahead *)
1611(*****************************************************************************)
1612
1613(* Why using yet another parsing_hack technique ? The fix_xxx where do
ae4735db 1614 * some pre-processing on the full list of tokens is not enough ?
34e49164
C
1615 * No cos sometimes we need more contextual info, and even if
1616 * set_context() tries to give some contextual info, it's not completely
1617 * accurate so the following code give yet another alternative, yet another
1618 * chance to transform some tokens.
ae4735db 1619 *
34e49164
C
1620 * todo?: maybe could try to get rid of this technique. Maybe a better
1621 * set_context() would make possible to move this code using a fix_xx
1622 * technique.
ae4735db 1623 *
485bce71 1624 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
34e49164
C
1625 * it is more general to do it via my LALR(k) tech. Because here we can
1626 * transform some token give some context information. So sometimes it
1627 * makes sense to transform a token in one context, sometimes not, and
1628 * lex can not provide us this context information. Note that the order
ae4735db
C
1629 * in the pattern matching in lookahead is important. Do not cut/paste.
1630 *
34e49164
C
1631 * Note that in next there is only "clean" tokens, there is no comment
1632 * or space tokens. This is done by the caller.
ae4735db 1633 *
34e49164
C
1634 *)
1635
485bce71
C
1636open Lexer_parser (* for the fields of lexer_hint type *)
1637
1638let not_struct_enum = function
1639 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1640 | _ -> true
34e49164 1641
f59c9fb7
C
1642let pointer = function
1643 TMul _ -> true
1644 | TAnd _ when !Flag.c_plus_plus -> true
1645 | _ -> false
485bce71 1646
17ba0788
C
1647let ident_or_star = function
1648 TIdent _ -> true
1649 | x -> pointer x
1650
97111a47
C
1651(* This function is inefficient, because it will look over a K&R header,
1652or function prototype multiple times. At least when we see a , and are in a
1653parameter list, we know we will eventually see a close paren, and it
1654should come fairly soon. *)
1655let k_and_r l =
1656 let l1 = drop_until (function (TCPar _) -> true | _ -> false) l in
1657 match l1 with
1658 (TCPar _) :: (TOCro _) :: _ -> false
1659 | (TCPar _) :: _ -> true
1660 | _ -> false
34e49164 1661
97111a47
C
1662(* (a)(b) is ambiguous, because (a) could be a function name or a cast.
1663At this point, we just see an ident for a; we don't know if it is eg a local
1664variable. This function sees at least if b is the only argument, ie there
1665are no commas at top level *)
1666let paren_before_comma l =
1667 let rec loop level = function
1668 [] -> false
1669 | (TComma _)::_ when level = 1 -> false
1670 | (TCPar _)::_ when level = 1 -> true
1671 | (TCPar _)::rest -> loop (level-1) rest
1672 | (TOPar _)::rest -> loop (level+1) rest
1673 | x::rest -> loop level rest in
1674 loop 0 l
1675
1676let lookahead2 ~pass next before =
34e49164
C
1677 match (next, before) with
1678
4dfbc1c2
C
1679 (* c++ hacks *)
1680 (* yy xx( and in function *)
1681 | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_
1682 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1683 pr2_cpp("constructed_object: " ^s);
1684 TOParCplusplusInit i1
1685 | TypedefIdent(s,i)::TOPar i1::_,_
1686 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1687 TIdent(s,i)
1688
34e49164
C
1689 (*-------------------------------------------------------------*)
1690 (* typedef inference, parse_typedef_fix3 *)
1691 (*-------------------------------------------------------------*)
1692 (* xx xx *)
b1b2de81 1693 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
34e49164
C
1694 && ok_typedef s
1695 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
ae4735db 1696 ->
34e49164
C
1697 (* parse_typedef_fix3:
1698 * acpi_object acpi_object;
ae4735db 1699 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
34e49164
C
1700 * Le parser en interne a deja appelé le prochain token pour pouvoir
1701 * decider des choses.
1702 * => special case in lexer_heuristic, again
1703 *)
ae4735db
C
1704 if !Flag_parsing_c.debug_typedef
1705 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
34e49164
C
1706
1707 LP.disable_typedef();
1708
97111a47 1709 msg_typedef s i1 1; LP.add_typedef_root s;
34e49164
C
1710 TypedefIdent (s, i1)
1711
1712 (* xx yy *)
ae4735db 1713 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
34e49164
C
1714 && ok_typedef s
1715 ->
1716 (* && not_annot s2 BUT lead to false positive*)
1717
97111a47 1718 msg_typedef s i1 2; LP.add_typedef_root s;
34e49164
C
1719 TypedefIdent (s, i1)
1720
1721
1722 (* xx inline *)
ae4735db 1723 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
34e49164 1724 && ok_typedef s
ae4735db 1725 ->
97111a47 1726 msg_typedef s i1 3; LP.add_typedef_root s;
34e49164
C
1727 TypedefIdent (s, i1)
1728
1729
1730 (* [,(] xx [,)] AND param decl *)
97111a47
C
1731 | (TIdent (s, i1)::(((TComma _|TCPar _)::_) as rest) ,
1732 (TComma _ |TOPar _)::_ )
1733 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1734 && k_and_r rest
1735 ->
1736 TKRParam(s,i1)
1737
1738 | (TIdent (s, i1)::((TComma _|TCPar _)::_) , (TComma _ |TOPar _)::_ )
b1b2de81 1739 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
34e49164 1740 && ok_typedef s
ae4735db 1741 ->
97111a47
C
1742 msg_typedef s i1 4; LP.add_typedef_root s;
1743 TypedefIdent (s, i1)
34e49164
C
1744
1745 (* xx* [,)] *)
1746 (* specialcase: [,(] xx* [,)] *)
f59c9fb7
C
1747 | (TIdent (s, i1)::ptr::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1748 when pointer ptr && not_struct_enum before
34e49164
C
1749 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1750 && ok_typedef s
ae4735db 1751 ->
97111a47 1752 msg_typedef s i1 5; LP.add_typedef_root s;
34e49164
C
1753 TypedefIdent (s, i1)
1754
1755
1756 (* xx** [,)] *)
1757 (* specialcase: [,(] xx** [,)] *)
1758 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1759 when not_struct_enum before
1760 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1761 && ok_typedef s
ae4735db 1762 ->
97111a47 1763 msg_typedef s i1 6; LP.add_typedef_root s;
34e49164
C
1764 TypedefIdent (s, i1)
1765
1766
1767
1768 (* xx const * USELESS because of next rule ? *)
ae4735db
C
1769 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1770 when not_struct_enum before
34e49164
C
1771 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1772 && ok_typedef s
1773 ->
1774
97111a47 1775 msg_typedef s i1 7; LP.add_typedef_root s;
34e49164 1776 TypedefIdent (s, i1)
ae4735db 1777
34e49164 1778 (* xx const *)
ae4735db
C
1779 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
1780 when not_struct_enum before
34e49164
C
1781 && ok_typedef s
1782 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1783 ->
1784
97111a47 1785 msg_typedef s i1 8; LP.add_typedef_root s;
34e49164
C
1786 TypedefIdent (s, i1)
1787
1788
1789 (* xx * const *)
f59c9fb7
C
1790 | (TIdent (s, i1)::ptr::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ )
1791 when pointer ptr && not_struct_enum before
34e49164
C
1792 && ok_typedef s
1793 ->
1794 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1795
97111a47 1796 msg_typedef s i1 9; LP.add_typedef_root s;
34e49164
C
1797 TypedefIdent (s, i1)
1798
1799
1800 (* ( const xx) *)
485bce71 1801 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
34e49164 1802 ok_typedef s ->
97111a47 1803 msg_typedef s i1 10; LP.add_typedef_root s;
34e49164 1804 TypedefIdent (s, i1)
ae4735db 1805
34e49164
C
1806
1807
1808 (* ( xx ) [sizeof, ~] *)
485bce71 1809 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
34e49164
C
1810 when not_struct_enum before
1811 && ok_typedef s
ae4735db 1812 ->
97111a47 1813 msg_typedef s i1 11; LP.add_typedef_root s;
34e49164
C
1814 TypedefIdent (s, i1)
1815
1816 (* [(,] xx [ AND parameterdeclaration *)
1817 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
b1b2de81 1818 when (LP.current_context() =*= LP.InParameter)
34e49164 1819 && ok_typedef s
ae4735db 1820 ->
97111a47 1821 msg_typedef s i1 12; LP.add_typedef_root s;
34e49164 1822 TypedefIdent (s, i1)
ae4735db 1823
34e49164
C
1824 (*------------------------------------------------------------*)
1825 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
1826 (* but if have a '=', or ',' I think not *)
1827 (*------------------------------------------------------------*)
1828
1829 (* static xx * yy *)
f59c9fb7 1830 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ ,
485bce71 1831 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
f59c9fb7 1832 pointer ptr && ok_typedef s
34e49164 1833 ->
97111a47 1834 msg_typedef s i1 13; LP.add_typedef_root s;
34e49164 1835 TypedefIdent (s, i1)
ae4735db 1836
34e49164
C
1837 (* TODO xx * yy ; AND in start of compound element *)
1838
1839
1840 (* xx * yy, AND in paramdecl *)
f59c9fb7 1841 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _)
b1b2de81 1842 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
f59c9fb7 1843 && pointer ptr && ok_typedef s
ae4735db 1844 ->
34e49164 1845
97111a47 1846 msg_typedef s i1 14; LP.add_typedef_root s;
34e49164
C
1847 TypedefIdent (s, i1)
1848
1849
1850 (* xx * yy ; AND in Toplevel, except when have = before *)
1851 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
1852 TIdent (s, i1)
f59c9fb7
C
1853 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ , _)
1854 when not_struct_enum before && pointer ptr &&
1855 (LP.is_top_or_struct (LP.current_context ()))
ae4735db 1856 ->
97111a47 1857 msg_typedef s i1 15; LP.add_typedef_root s;
34e49164
C
1858 TypedefIdent (s, i1)
1859
1860 (* xx * yy , AND in Toplevel *)
f59c9fb7 1861 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _)
b1b2de81 1862 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
f59c9fb7 1863 && ok_typedef s && pointer ptr
ae4735db 1864 ->
34e49164 1865
97111a47 1866 msg_typedef s i1 16; LP.add_typedef_root s;
34e49164
C
1867 TypedefIdent (s, i1)
1868
1869 (* xx * yy ( AND in Toplevel *)
f59c9fb7 1870 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOPar _::_ , _)
ae4735db 1871 when not_struct_enum before
485bce71 1872 && (LP.is_top_or_struct (LP.current_context ()))
f59c9fb7 1873 && ok_typedef s && pointer ptr
34e49164 1874 ->
97111a47 1875 msg_typedef s i1 17; LP.add_typedef_root s;
34e49164 1876 TypedefIdent (s, i1)
ae4735db 1877
34e49164
C
1878 (* xx * yy [ *)
1879 (* todo? enough ? cos in struct def we can have some expression ! *)
f59c9fb7 1880 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOCro _::_ , _)
ae4735db 1881 when not_struct_enum before &&
485bce71 1882 (LP.is_top_or_struct (LP.current_context ()))
f59c9fb7 1883 && ok_typedef s && pointer ptr
ae4735db 1884 ->
97111a47 1885 msg_typedef s i1 18; LP.add_typedef_root s;
34e49164
C
1886 TypedefIdent (s, i1)
1887
1888 (* u16: 10; in struct *)
1889 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
485bce71 1890 when (LP.is_top_or_struct (LP.current_context ()))
ae4735db
C
1891 && ok_typedef s
1892 ->
97111a47 1893 msg_typedef s i1 19; LP.add_typedef_root s;
34e49164 1894 TypedefIdent (s, i1)
ae4735db 1895
34e49164
C
1896
1897 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
1898 (* YES cos at toplevel can have some expression !! for instance when *)
1899 (* enter in the dimension of an array *)
1900 (*
1901 | (TIdent s::TMul::TIdent s2::_ , _)
1902 when (take_safe 1 !passed_tok <> [Tstruct] &&
1903 (take_safe 1 !passed_tok <> [Tenum]))
1904 &&
ae4735db 1905 !LP._lexer_hint = Some LP.Toplevel ->
4dfbc1c2 1906 msg_typedef s 20; LP.add_typedef_root s;
34e49164
C
1907 TypedefIdent s
1908 *)
1909
1910 (* xx * yy = *)
f59c9fb7 1911 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TEq _::_ , _)
ae4735db 1912 when not_struct_enum before
f59c9fb7 1913 && ok_typedef s && pointer ptr
34e49164 1914 ->
97111a47 1915 msg_typedef s i1 21; LP.add_typedef_root s;
34e49164
C
1916 TypedefIdent (s, i1)
1917
1918
1919 (* xx * yy) AND in paramdecl *)
f59c9fb7 1920 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TCPar _::_ , _)
b1b2de81 1921 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
f59c9fb7 1922 && ok_typedef s && pointer ptr
34e49164 1923 ->
97111a47 1924 msg_typedef s i1 22; LP.add_typedef_root s;
34e49164 1925 TypedefIdent (s, i1)
ae4735db 1926
34e49164
C
1927
1928 (* xx * yy; *) (* wrong ? *)
f59c9fb7 1929 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ ,
ae4735db 1930 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
f59c9fb7 1931 && ok_typedef s & pointer ptr
34e49164 1932 ->
97111a47 1933 msg_typedef s i1 23; LP.add_typedef_root s;
485bce71 1934 msg_maybe_dangereous_typedef s;
34e49164
C
1935 TypedefIdent (s, i1)
1936
1937
1938 (* xx * yy, and ';' before xx *) (* wrong ? *)
f59c9fb7 1939 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ ,
34e49164 1940 (TOBrace _| TPtVirg _)::_) when
f59c9fb7 1941 ok_typedef s && pointer ptr
34e49164 1942 ->
97111a47 1943 msg_typedef s i1 24; LP.add_typedef_root s;
34e49164
C
1944 TypedefIdent (s, i1)
1945
1946
1947 (* xx_t * yy *)
f59c9fb7 1948 | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ , _)
ae4735db
C
1949 when s ==~ regexp_typedef && not_struct_enum before
1950 (* struct user_info_t sometimes *)
f59c9fb7 1951 && ok_typedef s && pointer ptr
ae4735db 1952 ->
97111a47 1953 msg_typedef s i1 25; LP.add_typedef_root s;
34e49164
C
1954 TypedefIdent (s, i1)
1955
1956 (* xx ** yy *) (* wrong ? *)
1957 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
1958 when not_struct_enum before
1959 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 1960 && ok_typedef s
34e49164 1961 ->
97111a47 1962 msg_typedef s i1 26; LP.add_typedef_root s;
34e49164
C
1963 TypedefIdent (s, i1)
1964
1965 (* xx *** yy *)
1966 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
ae4735db
C
1967 when not_struct_enum before
1968 && ok_typedef s
34e49164
C
1969 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1970 ->
97111a47 1971 msg_typedef s i1 27; LP.add_typedef_root s;
34e49164
C
1972 TypedefIdent (s, i1)
1973
1974 (* xx ** ) *)
1975 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
ae4735db 1976 when not_struct_enum before
34e49164 1977 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
ae4735db 1978 && ok_typedef s
34e49164 1979 ->
97111a47 1980 msg_typedef s i1 28; LP.add_typedef_root s;
34e49164
C
1981 TypedefIdent (s, i1)
1982
1983
1984
1985 (* ----------------------------------- *)
ae4735db 1986 (* old: why not do like for other rules and start with TIdent ?
485bce71
C
1987 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
1988 * new: prefer now start with TIdent because otherwise the add_typedef_root
1989 * may have no effect if in second pass or if have disable the add_typedef.
1990 *)
34e49164
C
1991
1992 (* (xx) yy *)
17ba0788 1993 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::after::_ ,
ae4735db 1994 (TOPar info)::x::_)
17ba0788
C
1995 when not (TH.is_stuff_taking_parenthized x) (* &&
1996 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful?
1997 *)
ae4735db 1998 && ok_typedef s
17ba0788 1999 && not (ident_or_star after) (* possible K&R declaration *)
ae4735db 2000 ->
97111a47 2001 msg_typedef s i1 29; LP.add_typedef_root s;
485bce71
C
2002 (*TOPar info*)
2003 TypedefIdent (s, i1)
34e49164
C
2004
2005
ae4735db 2006 (* (xx) ( yy)
91eba41f
C
2007 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
2008 *)
2009 (*
ae4735db
C
2010 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2011 when not (TH.is_stuff_taking_parenthized x)
2012 && ok_typedef s
34e49164 2013 ->
4dfbc1c2 2014 msg_typedef s 30; LP.add_typedef_root s;
485bce71
C
2015 (* TOPar info *)
2016 TypedefIdent (s, i1)
91eba41f
C
2017 *)
2018 (* special case: = (xx) ( yy) *)
97111a47 2019 | (TIdent (s, i1)::TCPar _::((TOPar _::_) as rest) ,
91eba41f 2020 (TOPar info)::(TEq _ |TEqEq _)::_)
97111a47 2021 when ok_typedef s && paren_before_comma rest
91eba41f 2022 ->
97111a47 2023 msg_typedef s i1 31; LP.add_typedef_root s;
91eba41f
C
2024 (* TOPar info *)
2025 TypedefIdent (s, i1)
2026
34e49164
C
2027
2028 (* (xx * ) yy *)
f59c9fb7
C
2029 | (TIdent (s, i1)::ptr::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_)
2030 when ok_typedef s && pointer ptr
ae4735db 2031 ->
97111a47 2032 msg_typedef s i1 32; LP.add_typedef_root s;
485bce71
C
2033 (*TOPar info*)
2034 TypedefIdent (s,i1)
2035
34e49164
C
2036
2037 (* (xx){ ... } constructor *)
ae4735db
C
2038 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
2039 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2040 && ok_typedef s
34e49164 2041 ->
97111a47 2042 msg_typedef s i1 33; LP.add_typedef_root s;
34e49164
C
2043 TypedefIdent (s, i1)
2044
2045
2046 (* can have sizeof on expression
ae4735db 2047 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
708f4980 2048 msg_typedef s; LP.add_typedef_root s;
34e49164
C
2049 Tsizeof
2050 *)
91eba41f
C
2051
2052
2053 (* ----------------------------------- *)
2054 (* x ( *y )(params), function pointer *)
ae4735db 2055 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
34e49164 2056 when not_struct_enum before
ae4735db 2057 && ok_typedef s
34e49164 2058 ->
97111a47 2059 msg_typedef s i1 34; LP.add_typedef_root s;
34e49164
C
2060 TypedefIdent (s, i1)
2061
91eba41f 2062 (* x* ( *y )(params), function pointer 2 *)
ae4735db 2063 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
91eba41f 2064 when not_struct_enum before
ae4735db 2065 && ok_typedef s
91eba41f 2066 ->
97111a47 2067 msg_typedef s i1 35; LP.add_typedef_root s;
91eba41f
C
2068 TypedefIdent (s, i1)
2069
34e49164
C
2070
2071 (*-------------------------------------------------------------*)
2072 (* CPP *)
2073 (*-------------------------------------------------------------*)
485bce71
C
2074 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2075 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
34e49164 2076 as x)
ae4735db
C
2077 ::_, _
2078 ->
485bce71 2079 (*
ae4735db 2080 if not !Flag_parsing_c.ifdef_to_if
34e49164 2081 then TCommentCpp (Ast_c.CppDirective, ii)
ae4735db 2082 else
485bce71 2083 *)
0708f913
C
2084 (* not !LP._lexer_hint.toplevel *)
2085 if !Flag_parsing_c.ifdef_directive_passing
708f4980 2086 || (pass >= 2)
0708f913 2087 then begin
ae4735db 2088
b1b2de81 2089 if (LP.current_context () =*= LP.InInitializer)
ae4735db 2090 then begin
0708f913
C
2091 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2092 incr Stat.nIfdefInitializer;
ae4735db 2093 end else begin
708f4980 2094 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
0708f913
C
2095 incr Stat.nIfdefPassing;
2096 end;
2097 TCommentCpp (Token_c.CppDirective, ii)
2098 end
2099 else x
ae4735db 2100
3a314143 2101 | (TUndef (ii) as x)::_, _
ae4735db 2102 ->
708f4980 2103 if (pass >= 2)
485bce71 2104 then begin
0708f913
C
2105 pr2_cpp("UNDEF: I treat it as comment");
2106 TCommentCpp (Token_c.CppDirective, ii)
113803cf
C
2107 end
2108 else x
2109
ae4735db
C
2110 | (TCppDirectiveOther (ii) as x)::_, _
2111 ->
708f4980 2112 if (pass >= 2)
113803cf 2113 then begin
0708f913
C
2114 pr2_cpp ("OTHER directive: I treat it as comment");
2115 TCommentCpp (Token_c.CppDirective, ii)
485bce71
C
2116 end
2117 else x
34e49164
C
2118
2119 (* If ident contain a for_each, then certainly a macro. But to be
2120 * sure should look if there is a '{' after the ')', but it requires
2121 * to count the '('. Because this can be expensive, we do that only
ae4735db 2122 * when the token contains "for_each".
34e49164 2123 *)
ae4735db 2124 | (TIdent (s, i1)::TOPar _::rest, _)
b1b2de81 2125 when not (LP.current_context () =*= LP.InTopLevel)
ae4735db
C
2126 (* otherwise a function such as static void loopback_enable(int i) {
2127 * will be considered as a loop
34e49164
C
2128 *)
2129 ->
2130
ae4735db 2131 if s ==~ regexp_foreach &&
34e49164 2132 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
ae4735db 2133
34e49164
C
2134 then begin
2135 msg_foreach s;
2136 TMacroIterator (s, i1)
2137 end
2138 else TIdent (s, i1)
2139
34e49164
C
2140 (*-------------------------------------------------------------*)
2141 | v::xs, _ -> v
2142 | _ -> raise Impossible
2143
ae4735db 2144let lookahead ~pass a b =
485bce71 2145 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
34e49164
C
2146
2147