permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 module TH = Token_helpers
19 module TV = Token_views_c
20 module LP = Lexer_parser
21
22 module Stat = Parsing_stat
23
24 open Parser_c
25
26 open TV
27
28 (*****************************************************************************)
29 (* Some debugging functions *)
30 (*****************************************************************************)
31
32 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
33
34 let pr2_cpp s =
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once ("CPP-" ^ s)
37
38
39 let msg_gen cond is_known printer s =
40 if cond
41 then
42 if not (!Flag_parsing_c.filter_msg)
43 then printer s
44 else
45 if not (is_known s)
46 then printer s
47
48 (* In the following, there are some harcoded names of types or macros
49 * but they are not used by our heuristics! They are just here to
50 * enable to detect false positive by printing only the typedef/macros
51 * that we don't know yet. If we print everything, then we can easily
52 * get lost with too much verbose tracing information. So those
53 * functions "filter" some messages. So our heuristics are still good,
54 * there is no more (or not that much) hardcoded linux stuff.
55 *)
56
57 let is_known_typdef =
58 (fun s ->
59 (match s with
60 | "u_char" | "u_short" | "u_int" | "u_long"
61 | "u8" | "u16" | "u32" | "u64"
62 | "s8" | "s16" | "s32" | "s64"
63 | "__u8" | "__u16" | "__u32" | "__u64"
64 -> true
65
66 | "acpi_handle"
67 | "acpi_status"
68 -> true
69
70 | "FILE"
71 | "DIR"
72 -> true
73
74 | s when s =~ ".*_t$" -> true
75 | _ -> false
76 )
77 )
78
79 (* note: cant use partial application with let msg_typedef =
80 * because it would compute msg_typedef at compile time when
81 * the flag debug_typedef is always false
82 *)
83 let msg_typedef s ii n =
84 incr Stat.nTypedefInfer;
85 msg_gen (!Flag_parsing_c.debug_typedef)
86 is_known_typdef
87 (fun s ->
88 pr2_cpp
89 (Printf.sprintf "TYPEDEF: promoting:(%d) %s on line %d" n s
90 (Ast_c.line_of_info ii))
91 (*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s
92 (Ast_c.line_of_info ii))*)
93 )
94 s
95
96 let msg_maybe_dangereous_typedef s =
97 if not (is_known_typdef s)
98 then
99 pr2
100 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
101
102
103
104 let msg_declare_macro s =
105 incr Stat.nMacroDecl;
106 msg_gen (!Flag_parsing_c.debug_cpp)
107 (fun s ->
108 (match s with
109 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
110 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
111 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
112 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
113 | "SENSOR_DEVICE_ATTR"
114 | "LIST_HEAD"
115 | "DECLARE_WORK" | "DECLARE_TASKLET"
116 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
117 | "DECLARE_BITMAP"
118
119 -> true
120 (*
121 | s when s =~ "^DECLARE_.*" -> true
122 | s when s =~ ".*_ATTR$" -> true
123 | s when s =~ "^DEFINE_.*" -> true
124 *)
125
126 | _ -> false
127 )
128 )
129 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
130 s
131
132
133 let msg_foreach s =
134 incr Stat.nIteratorHeuristic;
135 pr2_cpp ("MACRO: found foreach: " ^ s)
136
137
138 (* ??
139 let msg_debug_macro s =
140 pr2_cpp ("MACRO: found debug-macro: " ^ s)
141 *)
142
143
144 let msg_macro_noptvirg s =
145 incr Stat.nMacroStmt;
146 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
147
148 let msg_macro_toplevel_noptvirg s =
149 incr Stat.nMacroStmt;
150 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
151
152 let msg_macro_noptvirg_single s =
153 incr Stat.nMacroStmt;
154 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
155
156
157
158
159 let msg_macro_higher_order s =
160 incr Stat.nMacroHigherOrder;
161 msg_gen (!Flag_parsing_c.debug_cpp)
162 (fun s ->
163 (match s with
164 | "DBGINFO"
165 | "DBGPX"
166 | "DFLOW"
167 -> true
168 | _ -> false
169 )
170 )
171 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
172 s
173
174
175 let msg_stringification s =
176 incr Stat.nMacroString;
177 msg_gen (!Flag_parsing_c.debug_cpp)
178 (fun s ->
179 (match s with
180 | "REVISION"
181 | "UTS_RELEASE"
182 | "SIZE_STR"
183 | "DMA_STR"
184 -> true
185 (* s when s =~ ".*STR.*" -> true *)
186 | _ -> false
187 )
188 )
189 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
190 s
191
192 let msg_stringification_params s =
193 incr Stat.nMacroString;
194 pr2_cpp ("MACRO: string-macro with params : " ^ s)
195
196
197
198 let msg_apply_known_macro s =
199 incr Stat.nMacroExpand;
200 pr2_cpp ("MACRO: found known macro = " ^ s)
201
202 let msg_apply_known_macro_hint s =
203 incr Stat.nMacroHint;
204 pr2_cpp ("MACRO: found known macro hint = " ^ s)
205
206
207
208
209 let msg_ifdef_bool_passing is_ifdef_positif =
210 incr Stat.nIfdefZero; (* of Version ? *)
211 if is_ifdef_positif
212 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
213 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
214
215
216 let msg_ifdef_mid_something () =
217 incr Stat.nIfdefExprPassing;
218 pr2_cpp "found ifdef-mid-something"
219
220 let msg_ifdef_funheaders () =
221 incr Stat.nIfdefFunheader;
222 ()
223
224 let msg_ifdef_cparen_else () =
225 incr Stat.nIfdefPassing;
226 pr2_cpp("found ifdef-cparen-else")
227
228
229 let msg_attribute s =
230 incr Stat.nMacroAttribute;
231 pr2_cpp("ATTR:" ^ s)
232
233
234
235 (*****************************************************************************)
236 (* The regexp and basic view definitions *)
237 (*****************************************************************************)
238
239 (* opti: better to built then once and for all, especially regexp_foreach *)
240
241 let regexp_macro = Str.regexp
242 "^[A-Z_][A-Z_0-9]*$"
243
244 (* linuxext: *)
245 let regexp_annot = Str.regexp
246 "^__.*$"
247
248 (* linuxext: *)
249 let regexp_declare = Str.regexp
250 ".*DECLARE.*"
251
252 (* linuxext: *)
253 let regexp_foreach = Str.regexp_case_fold
254 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
255
256 let regexp_typedef = Str.regexp
257 ".*_t$"
258
259 let false_typedef = [
260 "printk";
261 ]
262
263
264 let ok_typedef s = not (List.mem s false_typedef)
265
266 let not_annot s =
267 not (s ==~ regexp_annot)
268 let is_macro s =
269 s ==~ regexp_macro
270 let not_macro s =
271 not (is_macro s)
272
273
274
275
276 (*****************************************************************************)
277 (* Helpers *)
278 (*****************************************************************************)
279
280 (* ------------------------------------------------------------------------- *)
281 (* the pair is the status of '()' and '{}', ex: (-1,0)
282 * if too much ')' and good '{}'
283 * could do for [] too ?
284 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
285 * then if have ifdef, then certainly can lead to a problem.
286 *)
287 let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
288 fun xs ->
289 let cnt_paren, cnt_brace = ref 0, ref 0 in
290 xs +> TV.iter_token_ifdef (fun x ->
291 (match x.tok with
292 | x when TH.is_opar x -> incr cnt_paren
293 | TOBrace _ -> incr cnt_brace
294 | x when TH.is_cpar x -> decr cnt_paren
295 | TCBrace _ -> decr cnt_brace
296 | _ -> ()
297 )
298 );
299 !cnt_paren, !cnt_brace
300
301
302 (* ------------------------------------------------------------------------- *)
303 let forLOOKAHEAD = 30
304
305
306 (* look if there is a '{' just after the closing ')', and handling the
307 * possibility to have nested expressions inside nested parenthesis
308 *
309 * todo: use indentation instead of premier(statement) ?
310 *)
311 let rec is_really_foreach xs =
312 let rec is_foreach_aux = function
313 | [] -> false, []
314 | TCPar _::TOBrace _::xs -> true, xs
315 (* the following attempts to handle the cases where there is a
316 single statement in the body of the loop. undoubtedly more
317 cases are needed.
318 todo: premier(statement) - suivant(funcall)
319 *)
320 | TCPar _::TIdent _::xs -> true, xs
321 | TCPar _::Tif _::xs -> true, xs
322 | TCPar _::Twhile _::xs -> true, xs
323 | TCPar _::Tfor _::xs -> true, xs
324 | TCPar _::Tswitch _::xs -> true, xs
325 | TCPar _::Treturn _::xs -> true, xs
326
327
328 | TCPar _::xs -> false, xs
329 | TOPar _::xs ->
330 let (_, xs') = is_foreach_aux xs in
331 is_foreach_aux xs'
332 | x::xs -> is_foreach_aux xs
333 in
334 is_foreach_aux xs +> fst
335
336
337 (* ------------------------------------------------------------------------- *)
338 let set_ifdef_token_parenthize_info cnt x =
339 match x with
340 | TIfdef (tag, _)
341 | TIfdefelse (tag, _)
342 | TIfdefelif (tag, _)
343 | TEndif (tag, _)
344
345 | TIfdefBool (_, tag, _)
346 | TIfdefMisc (_, tag, _)
347 | TIfdefVersion (_, tag, _) ->
348 tag := Some cnt;
349
350 | _ -> raise (Impossible 89)
351
352
353
354 let ifdef_paren_cnt = ref 0
355
356
357 let rec set_ifdef_parenthize_info xs =
358 xs +> List.iter (function
359 | NotIfdefLine xs -> ()
360 | Ifdefbool (_, xxs, info_ifdef)
361 | Ifdef (xxs, info_ifdef) ->
362
363 incr ifdef_paren_cnt;
364 let total_directives = List.length info_ifdef in
365
366 info_ifdef +> List.iter (fun x ->
367 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
368 x.tok);
369 xxs +> List.iter set_ifdef_parenthize_info
370 )
371
372
373 (*****************************************************************************)
374 (* The parsing hack for #define *)
375 (*****************************************************************************)
376
377 (* To parse macro definitions I need to do some tricks
378 * as some information can be get only at the lexing level. For instance
379 * the space after the name of the macro in '#define foo (x)' is meaningful
380 * but the grammar can not get this information. So define_ident below
381 * look at such space and generate a special TOpardefine. In a similar
382 * way macro definitions can contain some antislash and newlines
383 * and the grammar need to know where the macro ends (which is
384 * a line-level and so low token-level information). Hence the
385 * function 'define_line' below and the TDefEol.
386 *
387 * update: TDefEol is handled in a special way at different places,
388 * a little bit like EOF, especially for error recovery, so this
389 * is an important token that should not be retagged!
390 *
391 *
392 * ugly hack, a better solution perhaps would be to erase TDefEOL
393 * from the Ast and list of tokens in parse_c.
394 *
395 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
396 *
397 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
398 * is where is the \n TCommentSpace. Normally there is always a last token
399 * to synchronize on, either EOF or the token of the next toplevel.
400 * In the case of the #define we got in list of token
401 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
402 * not synchronize on it and so we will not print the "\n".
403 * A solution would be to put the TDefEOL before the "\n".
404 * (jll: tried to do this, see the comment "Put end of line..." below)
405 *
406 * todo?: could put a ExpandedTok for that ?
407 *)
408 let mark_end_define ii =
409 let ii' =
410 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
411 Common.str = "";
412 Common.charpos = Ast_c.pos_of_info ii + 1
413 };
414 cocci_tag = ref Ast_c.emptyAnnot;
415 annots_tag = Token_annot.empty;
416 comments_tag = ref Ast_c.emptyComments;
417 }
418 in
419 TDefEOL (ii')
420
421 (* put the TDefEOL at the good place *)
422 let rec define_line_1 acc xs =
423 match xs with
424 | [] -> List.rev acc
425 | TDefine ii::xs ->
426 let line = Ast_c.line_of_info ii in
427 let acc = (TDefine ii) :: acc in
428 define_line_2 acc ((=|=) line) ii xs
429 | TUndef ii::xs ->
430 let line = Ast_c.line_of_info ii in
431 let acc = (TUndef ii) :: acc in
432 define_line_2 acc ((=|=) line) ii xs
433 | TCppEscapedNewline ii::xs ->
434 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
435 pr2 (Ast_c.strloc_of_info ii);
436 let acc = (TCommentSpace ii) :: acc in
437 define_line_1 acc xs
438 | x::xs -> define_line_1 (x::acc) xs
439
440 and define_line_2 acc lineOk lastinfo xs =
441 match xs with
442 | [] ->
443 (* should not happened, should meet EOF before *)
444 pr2 "PB: WEIRD";
445 List.rev (mark_end_define lastinfo::acc)
446 | x::xs ->
447 let line = TH.line_of_tok x in
448 let info = TH.info_of_tok x in
449
450 (match x with
451 | EOF ii ->
452 let acc = (mark_end_define lastinfo) :: acc in
453 let acc = (EOF ii) :: acc in
454 define_line_1 acc xs
455 | TCppEscapedNewline ii ->
456 if not (lineOk line) then pr2 "PB: WEIRD: not same line number";
457 let acc = (TCommentSpace ii) :: acc in
458 define_line_2 acc ((=|=) (line + 1)) info xs
459 | TComment _ when lineOk line ->
460 define_line_2 (x::acc) (function x -> true) info xs
461 | TString _ when lineOk line ->
462 define_line_2 (x::acc) (function x -> true) info xs
463 | x ->
464 if lineOk line
465 then define_line_2 (x::acc) ((=|=) line) info xs
466 else
467 (* Put end of line token before the newline. A newline at least
468 must be there because the line changed and because we saw a
469 #define previously to get to this function at all *)
470 define_line_1
471 ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
472 (x::xs)
473 )
474
475 let rec define_ident acc xs =
476 match xs with
477 | [] -> List.rev acc
478 | TUndef ii::xs ->
479 let acc = TUndef ii :: acc in
480 (match xs with
481 TCommentSpace i1::TIdent (s,i2)::xs ->
482 let acc = (TCommentSpace i1) :: acc in
483 let acc = (TIdentDefine (s,i2)) :: acc in
484 define_ident acc xs
485 | _ ->
486 pr2 "WEIRD: weird #define body";
487 define_ident acc xs
488 )
489 | TDefine ii::xs ->
490 let acc = TDefine ii :: acc in
491 (match xs with
492 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
493 (* Change also the kind of TIdent to avoid bad interaction
494 * with other parsing_hack tricks. For instant if keep TIdent then
495 * the stringication algo can believe the TIdent is a string-macro.
496 * So simpler to change the kind of the ident too.
497 *)
498 (* if TOParDefine sticked to the ident, then
499 * it's a macro-function. Change token to avoid ambiguity
500 * between #define foo(x) and #define foo (x)
501 *)
502 let acc = (TCommentSpace i1) :: acc in
503 let acc = (TIdentDefine (s,i2)) :: acc in
504 let acc = (TOParDefine i3) :: acc in
505 define_ident acc xs
506
507 | TCommentSpace i1::TIdent (s,i2)::xs ->
508 let acc = (TCommentSpace i1) :: acc in
509 let acc = (TIdentDefine (s,i2)) :: acc in
510 define_ident acc xs
511
512 (* bugfix: ident of macro (as well as params, cf below) can be tricky
513 * note, do we need to subst in the body of the define ? no cos
514 * here the issue is the name of the macro, as in #define inline,
515 * so obviously the name of this macro will not be used in its
516 * body (it would be a recursive macro, which is forbidden).
517 *)
518
519 | TCommentSpace i1::t::xs
520 when TH.str_of_tok t ==~ Common.regexp_alpha
521 ->
522 let s = TH.str_of_tok t in
523 let ii = TH.info_of_tok t in
524 pr2 (spf "remapping: %s to an ident in macro name" s);
525 let acc = (TCommentSpace i1) :: acc in
526 let acc = (TIdentDefine (s,ii)) :: acc in
527 define_ident acc xs
528
529 | TCommentSpace _::_::xs
530 | xs ->
531 pr2 "WEIRD: weird #define body";
532 define_ident acc xs
533 )
534 | x::xs ->
535 let acc = x :: acc in
536 define_ident acc xs
537
538
539
540 let fix_tokens_define2 xs =
541 define_ident [] (define_line_1 [] xs)
542
543 let fix_tokens_define a =
544 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
545
546
547
548
549
550 (* ------------------------------------------------------------------------- *)
551 (* Other parsing hacks related to cpp, Include/Define hacks *)
552 (* ------------------------------------------------------------------------- *)
553
554 (* Sometimes I prefer to generate a single token for a list of things in the
555 * lexer so that if I have to passed them, like for passing TInclude then
556 * it's easy. Also if I don't do a single token, then I need to
557 * parse the rest which may not need special stuff, like detecting
558 * end of line which the parser is not really ready for. So for instance
559 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
560 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
561 * but this kind of token is valid only after a #include and the
562 * lexing and parsing rules are different for such tokens so not that
563 * easy to parse such things in parser_c.mly. Hence the following hacks.
564 *
565 * less?: maybe could get rid of this like I get rid of some of fix_define.
566 *)
567
568 (* helpers *)
569
570 (* used to generate new token from existing one *)
571 let new_info posadd str ii =
572 { Ast_c.pinfo =
573 Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
574 charpos = Ast_c.pos_of_info ii + posadd;
575 str = str;
576 column = Ast_c.col_of_info ii + posadd;
577 };
578 (* must generate a new ref each time, otherwise share *)
579 cocci_tag = ref Ast_c.emptyAnnot;
580 annots_tag = Token_annot.empty;
581 comments_tag = ref Ast_c.emptyComments;
582 }
583
584
585 let rec comment_until_defeol xs =
586 match xs with
587 | [] ->
588 (* job not done in Cpp_token_c.define_parse ? *)
589 failwith "cant find end of define token TDefEOL"
590 | x::xs ->
591 (match x with
592 | Parser_c.TDefEOL i ->
593 Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
594 ::xs
595 | _ ->
596 let x' =
597 (* bugfix: otherwise may lose a TComment token *)
598 if TH.is_real_comment x
599 then x
600 else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
601 in
602 x'::comment_until_defeol xs
603 )
604
605 let drop_until_defeol xs =
606 List.tl
607 (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
608
609
610
611 (* ------------------------------------------------------------------------- *)
612 (* returns a pair (replaced token, list of next tokens) *)
613 (* ------------------------------------------------------------------------- *)
614
615 let tokens_include (info, includes, filename, inifdef) =
616 Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
617 [Parser_c.TIncludeFilename
618 (filename, (new_info (String.length includes) filename info))
619 ]
620
621
622
623
624 (*****************************************************************************)
625 (* CPP handling: macros, ifdefs, macros defs *)
626 (*****************************************************************************)
627
628 (* ------------------------------------------------------------------------- *)
629 (* special skip_start skip_end handling *)
630 (* ------------------------------------------------------------------------- *)
631
632 (* note: after this normally the token list should not contain any more the
633 * TCommentSkipTagStart and End tokens.
634 *)
635 let rec commentize_skip_start_to_end xs =
636 match xs with
637 | [] -> ()
638 | x::xs ->
639 (match x with
640 | {tok = TCommentSkipTagStart info} ->
641 (try
642 let (before, x2, after) =
643 xs +> Common.split_when (function
644 | {tok = TCommentSkipTagEnd _ } -> true
645 | _ -> false
646 )
647 in
648 let topass = x::x2::before in
649 topass +> List.iter (fun tok ->
650 TV.set_as_comment Token_c.CppPassingExplicit tok
651 );
652 commentize_skip_start_to_end after
653 with Not_found ->
654 failwith "could not find end of skip_start special comment"
655 )
656 | {tok = TCommentSkipTagEnd info} ->
657 failwith "found skip_end comment but no skip_start"
658 | _ ->
659 commentize_skip_start_to_end xs
660 )
661
662
663
664
665 (* ------------------------------------------------------------------------- *)
666 (* ifdef keeping/passing *)
667 (* ------------------------------------------------------------------------- *)
668
669 (* #if 0, #if 1, #if LINUX_VERSION handling *)
670 let rec find_ifdef_bool xs =
671 xs +> List.iter (function
672 | NotIfdefLine _ -> ()
673 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
674
675 msg_ifdef_bool_passing is_ifdef_positif;
676
677 (match xxs with
678 | [] -> raise (Impossible 90)
679 | firstclause::xxs ->
680 info_ifdef_stmt +>
681 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
682
683 if is_ifdef_positif
684 then xxs +> List.iter
685 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
686 else begin
687 firstclause +>
688 iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal);
689 (match List.rev xxs with
690 (* keep only last *)
691 | last::startxs ->
692 startxs +> List.iter
693 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
694 | [] -> (* not #else *) ()
695 );
696 end
697 );
698
699 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
700 )
701
702
703
704 let thresholdIfdefSizeMid = 6
705
706 (* infer ifdef involving not-closed expressions/statements *)
707 let rec find_ifdef_mid xs =
708 xs +> List.iter (function
709 | NotIfdefLine _ -> ()
710 | Ifdef (xxs, info_ifdef_stmt) ->
711 (match xxs with
712 | [] -> raise (Impossible 91)
713 | [first] -> ()
714 | first::second::rest ->
715 (* don't analyse big ifdef *)
716 if xxs +> List.for_all
717 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
718 (* don't want nested ifdef *)
719 xxs +> List.for_all (fun xs ->
720 xs +> List.for_all
721 (function NotIfdefLine _ -> true | _ -> false)
722 )
723
724 then
725 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
726 let cnt1, cnt2 = List.hd counts in
727 if cnt1 <> 0 || cnt2 <> 0 &&
728 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
729 (*
730 if counts +> List.exists (fun (cnt1, cnt2) ->
731 cnt1 <> 0 || cnt2 <> 0
732 )
733 *)
734 then begin
735 msg_ifdef_mid_something();
736
737 (* keep only first, treat the rest as comment *)
738 info_ifdef_stmt +>
739 List.iter
740 (TV.save_as_comment (function x -> Token_c.CppIfDirective x));
741 (second::rest) +> List.iter
742 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
743 end
744
745 );
746 List.iter find_ifdef_mid xxs
747
748 (* no need complex analysis for ifdefbool *)
749 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
750 List.iter find_ifdef_mid xxs
751
752
753 )
754
755
756 let thresholdFunheaderLimit = 4
757
758 (* ifdef defining alternate function header, type *)
759 let rec find_ifdef_funheaders = function
760 | [] -> ()
761 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
762
763 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
764 | Ifdef
765 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
766 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
767 ], info_ifdef_stmt
768 )
769 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
770 ::xs
771 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
772 List.length ifdefblock2 <= thresholdFunheaderLimit
773 ->
774 find_ifdef_funheaders xs;
775
776 msg_ifdef_funheaders ();
777 info_ifdef_stmt +>
778 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
779 let all_toks = [xline2] @ line2 in
780 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError) ;
781 ifdefblock2 +> iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
782
783 (* ifdef with nested ifdef *)
784 | Ifdef
785 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
786 [Ifdef
787 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
788 [NotIfdefLine (({col = 0} as xline3)::line3)];
789 ], info_ifdef_stmt2
790 )
791 ]
792 ], info_ifdef_stmt
793 )
794 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
795 ::xs
796 ->
797 find_ifdef_funheaders xs;
798
799 msg_ifdef_funheaders ();
800 info_ifdef_stmt +>
801 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
802 info_ifdef_stmt2 +>
803 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
804 let all_toks = [xline2;xline3] @ line2 @ line3 in
805 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
806
807 (* ifdef with elseif *)
808 | Ifdef
809 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
810 [NotIfdefLine (({col = 0} as xline2)::line2)];
811 [NotIfdefLine (({col = 0} as xline3)::line3)];
812 ], info_ifdef_stmt
813 )
814 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
815 ::xs
816 ->
817 find_ifdef_funheaders xs;
818
819 msg_ifdef_funheaders ();
820 info_ifdef_stmt +>
821 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
822 let all_toks = [xline2;xline3] @ line2 @ line3 in
823 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError)
824
825 (* recurse *)
826 | Ifdef (xxs,info_ifdef_stmt)::xs
827 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
828 List.iter find_ifdef_funheaders xxs;
829 find_ifdef_funheaders xs
830
831
832
833 (* ?? *)
834 let rec adjust_inifdef_include xs =
835 xs +> List.iter (function
836 | NotIfdefLine _ -> ()
837 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
838 xxs +> List.iter (iter_token_ifdef (fun tokext ->
839 match tokext.tok with
840 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
841 inifdef_ref := true;
842 | _ -> ()
843 ));
844 )
845
846
847
848
849
850
851
852 let rec find_ifdef_cparen_else xs =
853 let rec aux xs =
854 xs +> List.iter (function
855 | NotIfdefLine _ -> ()
856 | Ifdef (xxs, info_ifdef_stmt) ->
857 (match xxs with
858 | [] -> raise (Impossible 92)
859 | [first] -> ()
860 | first::second::rest ->
861
862 (* found a closing ')' just after the #else *)
863
864 (* Too bad ocaml does not support better list pattern matching
865 * a la Prolog-III where can match the end of lists.
866 *)
867 let condition =
868 if List.length first = 0 then false
869 else
870 let last_line = Common.last first in
871 match last_line with
872 | NotIfdefLine xs ->
873 if List.length xs = 0 then false
874 else
875 let last_tok = Common.last xs in
876 TH.is_cpar last_tok.tok
877 | Ifdef _ | Ifdefbool _ -> false
878 in
879 if condition then begin
880 msg_ifdef_cparen_else();
881
882 (* keep only first, treat the rest as comment *)
883 info_ifdef_stmt +>
884 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
885 (second::rest) +> List.iter
886 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
887 end
888
889 );
890 List.iter aux xxs
891
892 (* no need complex analysis for ifdefbool *)
893 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
894 List.iter aux xxs
895 )
896 in aux xs
897
898
899 (* ------------------------------------------------------------------------- *)
900 (* cpp-builtin part2, macro, using standard.h or other defs *)
901 (* ------------------------------------------------------------------------- *)
902
903 (* now in cpp_token_c.ml *)
904
905 (* ------------------------------------------------------------------------- *)
906 (* stringification *)
907 (* ------------------------------------------------------------------------- *)
908
909 let rec find_string_macro_paren xs =
910 match xs with
911 | [] -> ()
912 | Parenthised(xxs, info_parens)::xs ->
913 xxs +> List.iter (fun xs ->
914 if xs +> List.exists
915 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
916 xs +> List.for_all
917 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
918 true | _ -> false)
919 then
920 xs +> List.iter (fun tok ->
921 match tok with
922 | PToken({tok = TIdent (s,_)} as id) ->
923
924 msg_stringification s;
925 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
926 | _ -> ()
927 )
928 else
929 find_string_macro_paren xs
930 );
931 find_string_macro_paren xs
932 | PToken(tok)::xs ->
933 find_string_macro_paren xs
934
935
936 (* ------------------------------------------------------------------------- *)
937 (* macro2 *)
938 (* ------------------------------------------------------------------------- *)
939
940 (* don't forget to recurse in each case *)
941 let rec find_macro_paren xs =
942 match xs with
943 | [] -> ()
944
945 (* attribute *)
946 | PToken ({tok = Tattribute _} as id)
947 ::Parenthised (xxs,info_parens)
948 ::xs
949 ->
950 pr2_cpp ("MACRO: __attribute detected ");
951 [Parenthised (xxs, info_parens)] +>
952 iter_token_paren (TV.set_as_comment Token_c.CppAttr);
953 TV.set_as_comment Token_c.CppAttr id;
954 find_macro_paren xs
955
956 | PToken ({tok = TattributeNoarg _} as id)
957 ::xs
958 ->
959 pr2_cpp ("MACRO: __attributenoarg detected ");
960 TV.set_as_comment Token_c.CppAttr id;
961 find_macro_paren xs
962
963 (*
964 (* attribute cpp, __xxx id *)
965 | PToken ({tok = TIdent (s,i1)} as id)
966 ::PToken ({tok = TIdent (s2, i2)} as id2)
967 ::xs when s ==~ regexp_annot
968 ->
969 msg_attribute s;
970 id.tok <- TMacroAttr (s, i1);
971 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
972
973 (* attribute cpp, id __xxx *)
974 | PToken ({tok = TIdent (s,i1)} as _id)
975 ::PToken ({tok = TIdent (s2, i2)} as id2)
976 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
977 ->
978 msg_attribute s2;
979 id2.tok <- TMacroAttr (s2, i2);
980 find_macro_paren xs
981
982 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
983 ::PToken ({tok = TIdent (s,i1)} as attr)
984 ::xs when s ==~ regexp_annot
985 ->
986 pr2_cpp ("storage attribute: " ^ s);
987 attr.tok <- TMacroAttrStorage (s,i1);
988 (* recurse, may have other storage attributes *)
989 find_macro_paren (PToken (tok1)::xs)
990
991 *)
992
993 (* storage attribute *)
994 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
995 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
996 ->
997 pr2_cpp ("storage attribute: " ^ s);
998 attr.tok <- TMacroAttrStorage (s,i1);
999 (* recurse, may have other storage attributes *)
1000 find_macro_paren (PToken (tok1)::xs)
1001
1002
1003 (* stringification
1004 *
1005 * the order of the matching clause is important
1006 *
1007 *)
1008
1009 (* string macro with params, before case *)
1010 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1011 ::Parenthised (xxs, info_parens)
1012 ::xs ->
1013
1014 msg_stringification_params s;
1015 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1016 [Parenthised (xxs, info_parens)] +>
1017 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
1018 find_macro_paren xs
1019
1020 (* after case *)
1021 | PToken ({tok = TIdent (s,_)} as id)
1022 ::Parenthised (xxs, info_parens)
1023 ::PToken ({tok = (TString _ | TMacroString _)})
1024 ::xs ->
1025
1026 msg_stringification_params s;
1027 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1028 [Parenthised (xxs, info_parens)] +>
1029 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
1030 find_macro_paren xs
1031
1032
1033 (* for the case where the string is not inside a funcall, but
1034 * for instance in an initializer.
1035 *)
1036
1037 (* string macro variable, before case *)
1038 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1039 ::xs when not !Flag.c_plus_plus ->
1040
1041 msg_stringification s;
1042 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1043 find_macro_paren xs
1044
1045 (* after case *)
1046 | PToken ({tok = TIdent (s,_)} as id)
1047 ::PToken ({tok = (TString _ | TMacroString _)})
1048 ::xs ->
1049
1050 msg_stringification s;
1051 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1052 find_macro_paren xs
1053
1054
1055
1056
1057
1058 (* recurse *)
1059 | (PToken x)::xs -> find_macro_paren xs
1060 | (Parenthised (xxs, info_parens))::xs ->
1061 xxs +> List.iter find_macro_paren;
1062 find_macro_paren xs
1063
1064
1065
1066
1067
1068 (* don't forget to recurse in each case *)
1069 let rec find_macro_lineparen xs =
1070 match xs with
1071 | [] -> ()
1072
1073 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
1074 | (Line
1075 (
1076 [PToken ({tok = Tstatic _});
1077 PToken ({tok = TIdent (s,_)} as macro);
1078 Parenthised (xxs,info_parens);
1079 PToken ({tok = TPtVirg _});
1080 ]
1081 ))
1082 ::xs
1083 when (s ==~ regexp_macro) ->
1084
1085 msg_declare_macro s;
1086 let info = TH.info_of_tok macro.tok in
1087 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1088
1089 find_macro_lineparen (xs)
1090
1091 (* the static const case *)
1092 | (Line
1093 (
1094 [PToken ({tok = Tstatic _});
1095 PToken ({tok = Tconst _} as const);
1096 PToken ({tok = TIdent (s,_)} as macro);
1097 Parenthised (xxs,info_parens);
1098 PToken ({tok = TPtVirg _});
1099 ]
1100 (*as line1*)
1101
1102 ))
1103 ::xs
1104 when (s ==~ regexp_macro) ->
1105
1106 msg_declare_macro s;
1107 let info = TH.info_of_tok macro.tok in
1108 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1109
1110 (* need retag this const, otherwise ambiguity in grammar
1111 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1112 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1113 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1114 storage_class_spec : Tstatic . (137)
1115 *)
1116 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1117
1118 find_macro_lineparen (xs)
1119
1120
1121 (* same but without trailing ';'
1122 *
1123 * I do not put the final ';' because it can be on a multiline and
1124 * because of the way mk_line is coded, we will not have access to
1125 * this ';' on the next line, even if next to the ')' *)
1126 | (Line
1127 ([PToken ({tok = Tstatic _});
1128 PToken ({tok = TIdent (s,_)} as macro);
1129 Parenthised (xxs,info_parens);
1130 ]
1131 ))
1132 ::xs
1133 when s ==~ regexp_macro ->
1134
1135 msg_declare_macro s;
1136 let info = TH.info_of_tok macro.tok in
1137 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1138
1139 find_macro_lineparen (xs)
1140
1141
1142 (* on multiple lines *)
1143 | (Line
1144 (
1145 (PToken ({tok = Tstatic _})::[]
1146 )))
1147 ::(Line
1148 (
1149 [PToken ({tok = TIdent (s,_)} as macro);
1150 Parenthised (xxs,info_parens);
1151 PToken ({tok = TPtVirg _});
1152 ]
1153 )
1154 )
1155 ::xs
1156 when (s ==~ regexp_macro) ->
1157
1158 msg_declare_macro s;
1159 let info = TH.info_of_tok macro.tok in
1160 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1161
1162 find_macro_lineparen (xs)
1163
1164
1165 | (Line (* initializer case *)
1166 (
1167 PToken ({tok = Tstatic _}) ::
1168 PToken ({tok = TIdent (s,_)} as macro) ::
1169 Parenthised (xxs,info_parens) ::
1170 PToken ({tok = TEq _}) :: rest
1171 ))
1172 ::xs
1173 when (s ==~ regexp_macro) ->
1174
1175 msg_declare_macro s;
1176 let info = TH.info_of_tok macro.tok in
1177 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1178
1179 (* continue with the rest of the line *)
1180 find_macro_lineparen ((Line(rest))::xs)
1181
1182
1183 | (Line (* multi-line initializer case *)
1184 (
1185 (PToken ({tok = Tstatic _})::[]
1186 )))
1187 ::(Line
1188 (
1189 PToken ({tok = Tstatic _}) ::
1190 PToken ({tok = TIdent (s,_)} as macro) ::
1191 Parenthised (xxs,info_parens) ::
1192 PToken ({tok = TEq _}) :: rest
1193 ))
1194 ::xs
1195 when (s ==~ regexp_macro) ->
1196
1197 msg_declare_macro s;
1198 let info = TH.info_of_tok macro.tok in
1199 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1200
1201 (* continue with the rest of the line *)
1202 find_macro_lineparen ((Line(rest))::xs)
1203
1204
1205 (* linuxext: ex: DECLARE_BITMAP();
1206 *
1207 * Here I use regexp_declare and not regexp_macro because
1208 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1209 * Here we don't have the preceding 'static' so only way to
1210 * not have positive is to restrict to .*DECLARE.* macros.
1211 *
1212 * but there is a grammar rule for that, so don't need this case anymore
1213 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
1214 * on a argument_list
1215 *)
1216
1217 | (Line
1218 ([PToken ({tok = TIdent (s,_)} as macro);
1219 Parenthised (xxs,info_parens);
1220 PToken ({tok = TPtVirg _});
1221 ]
1222 ))
1223 ::xs
1224 when (s ==~ regexp_declare) ->
1225
1226 msg_declare_macro s;
1227 let info = TH.info_of_tok macro.tok in
1228 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1229
1230 find_macro_lineparen (xs)
1231
1232
1233 (* toplevel macros.
1234 * module_init(xxx)
1235 *
1236 * Could also transform the TIdent in a TMacroTop but can have false
1237 * positive, so easier to just change the TCPar and so just solve
1238 * the end-of-stream pb of ocamlyacc
1239 *)
1240 | (Line
1241 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1242 Parenthised (xxs,info_parens);
1243 ] as _line1
1244 ))
1245 ::xs when col1 =|= 0
1246 ->
1247 let condition =
1248 (* to reduce number of false positive *)
1249 (match xs with
1250 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
1251 TH.is_eof other.tok || (col2 =|= 0 &&
1252 (match other.tok with
1253 | TOBrace _ -> false (* otherwise would match funcdecl *)
1254 | TCBrace _ when ctx <> InFunction -> false
1255 | TPtVirg _
1256 | TDotDot _
1257 | TWhy _
1258 -> false
1259 | tok when TH.is_binary_operator tok -> false
1260
1261 | _ -> true
1262 )
1263 )
1264 | _ -> false
1265 )
1266 in
1267 if condition
1268 then begin
1269
1270 msg_macro_toplevel_noptvirg s;
1271 (* just to avoid the end-of-stream pb of ocamlyacc *)
1272 let tcpar = Common.last info_parens in
1273 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
1274
1275 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
1276
1277 end;
1278
1279 find_macro_lineparen (xs)
1280
1281
1282
1283 (* macro with parameters
1284 * ex: DEBUG()
1285 * return x;
1286 *)
1287 | (Line
1288 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1289 Parenthised (xxs,info_parens);
1290 ] as _line1
1291 ))
1292 ::(Line
1293 (PToken ({col = col2 } as other)::restline2
1294 ) as line2)
1295 ::xs
1296 (* when s ==~ regexp_macro *)
1297 ->
1298 (* This can give a false positive for K&R functions if the function
1299 name is in the same column as the first parameter declaration. *)
1300 let condition =
1301 (col1 =|= col2 &&
1302 (match other.tok with
1303 | TOBrace _ -> false (* otherwise would match funcdecl *)
1304 | TCBrace _ when ctx <> InFunction -> false
1305 | TPtVirg _
1306 | TDotDot _
1307 | TWhy _
1308 -> false
1309 | tok when TH.is_binary_operator tok -> false
1310
1311 | _ -> true
1312 )
1313 )
1314 ||
1315 (col2 <= col1 &&
1316 (match other.tok, restline2 with
1317 | TCBrace _, _ when ctx =*= InFunction -> true
1318 | Treturn _, _ -> true
1319 | Tif _, _ -> true
1320 | Telse _, _ -> true
1321
1322 (* case of label, usually put in first line *)
1323 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
1324 true
1325
1326
1327 | _ -> false
1328 )
1329 )
1330
1331 in
1332
1333 if condition
1334 then
1335 if col1 =|= 0 then ()
1336 else begin
1337 msg_macro_noptvirg s;
1338 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1339 [Parenthised (xxs, info_parens)] +>
1340 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
1341 end;
1342
1343 find_macro_lineparen (line2::xs)
1344
1345 (* linuxext:? single macro
1346 * ex: LOCK
1347 * foo();
1348 * UNLOCK
1349 *
1350 * todo: factorize code with previous rule ?
1351 *)
1352 | (Line
1353 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1354 ] as _line1
1355 ))
1356 ::(Line
1357 (PToken ({col = col2 } as other)::restline2
1358 ) as line2)
1359 ::xs ->
1360 (* when s ==~ regexp_macro *)
1361
1362 let condition =
1363 (col1 =|= col2 &&
1364 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1365 (match other.tok with
1366 | TPtVirg _ -> false
1367 | TOr _ -> false
1368 | TCBrace _ when ctx <> InFunction -> false
1369 | TWhy _ -> false
1370 | tok when TH.is_binary_operator tok -> false
1371
1372 | _ -> true
1373 )) ||
1374 (col2 <= col1 &&
1375 (match other.tok with
1376 | TCBrace _ when ctx =*= InFunction -> true
1377 | Treturn _ -> true
1378 | Tif _ -> true
1379 | Telse _ -> true
1380 | _ -> false
1381 ))
1382 in
1383
1384 if condition
1385 then begin
1386 msg_macro_noptvirg_single s;
1387 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1388 end;
1389 find_macro_lineparen (line2::xs)
1390
1391 | x::xs ->
1392 find_macro_lineparen xs
1393
1394
1395
1396 (* ------------------------------------------------------------------------- *)
1397 (* define tobrace init *)
1398 (* ------------------------------------------------------------------------- *)
1399
1400 let rec find_define_init_brace_paren xs =
1401 let rec aux xs =
1402 match xs with
1403 | [] -> ()
1404
1405 (* mainly for firefox *)
1406 | (PToken {tok = TDefine _})
1407 ::(PToken {tok = TIdentDefine (s,_)})
1408 ::(PToken ({tok = TOBrace i1} as tokbrace))
1409 ::(PToken tok2)
1410 ::(PToken tok3)
1411 ::xs ->
1412 let is_init =
1413 match tok2.tok, tok3.tok with
1414 | TInt _, TComma _ -> true
1415 | TString _, TComma _ -> true
1416 | TIdent _, TComma _ -> true
1417 | _ -> false
1418
1419 in
1420 if is_init
1421 then begin
1422 pr2_cpp("found define initializer: " ^s);
1423 tokbrace.tok <- TOBraceDefineInit i1;
1424 end;
1425
1426 aux xs
1427
1428 (* mainly for linux, especially in sound/ *)
1429 | (PToken {tok = TDefine _})
1430 ::(PToken {tok = TIdentDefine (s,_)})
1431 ::(Parenthised(xxx, info_parens))
1432 ::(PToken ({tok = TOBrace i1} as tokbrace))
1433 ::(PToken tok2)
1434 ::(PToken tok3)
1435 ::xs ->
1436 let is_init =
1437 match tok2.tok, tok3.tok with
1438 | TInt _, TComma _ -> true
1439 | TDot _, TIdent _ -> true
1440 | TIdent _, TComma _ -> true
1441 | _ -> false
1442
1443 in
1444 if is_init
1445 then begin
1446 pr2_cpp("found define initializer with param: " ^ s);
1447 tokbrace.tok <- TOBraceDefineInit i1;
1448 end;
1449
1450 aux xs
1451
1452
1453
1454 (* recurse *)
1455 | (PToken x)::xs -> aux xs
1456 | (Parenthised (xxs, info_parens))::xs ->
1457 (* not need for tobrace init:
1458 * xxs +> List.iter aux;
1459 *)
1460 aux xs
1461 in
1462 aux xs
1463
1464
1465 (* ------------------------------------------------------------------------- *)
1466 (* action *)
1467 (* ------------------------------------------------------------------------- *)
1468
1469 (* obsolete now with macro expansion ? get some regression if comment.
1470 * todo: if do bad decision here, then it can influence other phases
1471 * and make it hard to parse. So maybe when have a parse error, should
1472 * undo some of the guess those heuristics have done, and restore
1473 * the original token value.
1474 *)
1475
1476 let rec find_actions = function
1477 | [] -> ()
1478
1479 | PToken ({tok = TIdent (s,ii)})
1480 ::Parenthised (xxs,info_parens)
1481 ::xs ->
1482 find_actions xs;
1483 xxs +> List.iter find_actions;
1484 let modified = find_actions_params xxs in
1485 if modified
1486 then msg_macro_higher_order s
1487
1488 | x::xs ->
1489 find_actions xs
1490
1491 and find_actions_params xxs =
1492 xxs +> List.fold_left (fun acc xs ->
1493 let toks = tokens_of_paren xs in
1494 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1495 (* undo: && List.length toks > 1
1496 * good for sparse, not good for linux
1497 *)
1498 then begin
1499 xs +> iter_token_paren (fun x ->
1500 if TH.is_eof x.tok
1501 then
1502 (* certainly because paren detection had a pb because of
1503 * some ifdef-exp. Do similar additional checking than
1504 * what is done in TV.set_as_comment.
1505 *)
1506 pr2 "PB: weird, I try to tag an EOF token as an action"
1507 else
1508 (* cf tests-bis/no_cpar_macro.c *)
1509 if TH.is_eom x.tok
1510 then
1511 pr2 "PB: weird, I try to tag an EOM token as an action"
1512 else
1513 x.tok <- TAction (TH.info_of_tok x.tok);
1514 );
1515 true (* modified *)
1516 end
1517 else acc
1518 ) false
1519
1520
1521
1522 (* ------------------------------------------------------------------------- *)
1523 (* main fix cpp function *)
1524 (* ------------------------------------------------------------------------- *)
1525
1526 let filter_cpp_stuff xs =
1527 List.filter
1528 (function x ->
1529 (match x.tok with
1530 | tok when TH.is_comment tok -> false
1531 (* don't want drop the define, or if drop, have to drop
1532 * also its body otherwise the line heuristics may be lost
1533 * by not finding the TDefine in column 0 but by finding
1534 * a TDefineIdent in a column > 0
1535 *)
1536 | Parser_c.TDefine _ -> true
1537 | tok when TH.is_cpp_instruction tok -> false
1538 | _ -> true
1539 ))
1540 xs
1541
1542 let insert_virtual_positions l =
1543 let strlen x = String.length (Ast_c.str_of_info x) in
1544 let rec loop prev offset acc = function
1545 [] -> List.rev acc
1546 | x::xs ->
1547 let ii = TH.info_of_tok x in
1548 let inject pi =
1549 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1550 match Ast_c.pinfo_of_info ii with
1551 Ast_c.OriginTok pi ->
1552 let prev = Ast_c.parse_info_of_info ii in
1553 loop prev (strlen ii) (x::acc) xs
1554 | Ast_c.ExpandedTok (pi,_) ->
1555 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
1556 loop prev (offset + (strlen ii)) (x'::acc) xs
1557 | Ast_c.FakeTok (s,_) ->
1558 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
1559 loop prev (offset + (strlen ii)) (x'::acc) xs
1560 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1561 let rec skip_fake = function
1562 | [] -> []
1563 | x::xs ->
1564 let ii = TH.info_of_tok x in
1565 match Ast_c.pinfo_of_info ii with
1566 | Ast_c.OriginTok pi ->
1567 let prev = Ast_c.parse_info_of_info ii in
1568 let res = loop prev (strlen ii) [] xs in
1569 x::res
1570 | _ -> x::skip_fake xs in
1571 skip_fake l
1572
1573 (* ------------------------------------------------------------------------- *)
1574
1575 let fix_tokens_cpp2 ~macro_defs tokens =
1576 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
1577
1578 begin
1579 (* the order is important, if you put the action heuristic first,
1580 * then because of ifdef, can have not closed paren
1581 * and so may believe that higher order macro
1582 * and it will eat too much tokens. So important to do
1583 * first the ifdef.
1584 *
1585 * I recompute multiple times cleaner cos the mutable
1586 * can have be changed and so may have more comments
1587 * in the token original list.
1588 *
1589 *)
1590
1591 commentize_skip_start_to_end !tokens2;
1592
1593 (* ifdef *)
1594 let cleaner = !tokens2 +> List.filter (fun x ->
1595 (* is_comment will also filter the TCommentCpp created in
1596 * commentize_skip_start_to_end *)
1597 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1598 ) in
1599 let ifdef_grouped = TV.mk_ifdef cleaner in
1600 set_ifdef_parenthize_info ifdef_grouped;
1601
1602 find_ifdef_funheaders ifdef_grouped;
1603 find_ifdef_bool ifdef_grouped;
1604 find_ifdef_mid ifdef_grouped;
1605 (* change order ? maybe cparen_else heuristic make some of the funheaders
1606 * heuristics irrelevant ?
1607 *)
1608 find_ifdef_cparen_else ifdef_grouped;
1609 adjust_inifdef_include ifdef_grouped;
1610
1611
1612 (* macro 1 *)
1613 let cleaner = !tokens2 +> filter_cpp_stuff in
1614
1615 let paren_grouped = TV.mk_parenthised cleaner in
1616 Cpp_token_c.apply_macro_defs
1617 ~msg_apply_known_macro
1618 ~msg_apply_known_macro_hint
1619 macro_defs paren_grouped;
1620 (* because the before field is used by apply_macro_defs *)
1621 tokens2 := TV.rebuild_tokens_extented !tokens2;
1622
1623 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1624 * that after the "ifdef-simplification" phase.
1625 *)
1626 let cleaner = !tokens2 +> List.filter (fun x ->
1627 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1628 ) in
1629
1630 let brace_grouped = TV.mk_braceised cleaner in
1631 set_context_tag brace_grouped;
1632
1633 (* macro *)
1634 let cleaner = !tokens2 +> filter_cpp_stuff in
1635
1636 let paren_grouped = TV.mk_parenthised cleaner in
1637 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
1638 find_define_init_brace_paren paren_grouped;
1639 find_string_macro_paren paren_grouped;
1640 find_macro_lineparen line_paren_grouped;
1641 find_macro_paren paren_grouped;
1642
1643
1644 (* obsolete: actions ? not yet *)
1645 let cleaner = !tokens2 +> filter_cpp_stuff in
1646 let paren_grouped = TV.mk_parenthised cleaner in
1647 find_actions paren_grouped;
1648
1649
1650
1651 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
1652 end
1653
1654 let time_hack1 ~macro_defs a =
1655 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
1656
1657 let fix_tokens_cpp ~macro_defs a =
1658 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
1659
1660
1661
1662 let can_be_on_top_level tl =
1663 match tl with
1664 | Tstruct _
1665 | Ttypedef _
1666 | TDefine _
1667 | TIfdef _
1668 | TIfdefelse _
1669 | TIfdefelif _
1670 | TIfdefBool _
1671 | TIfdefMisc _
1672 | TIfdefVersion _
1673 | TEndif _ -> true
1674 | _ -> false
1675
1676
1677 (*****************************************************************************)
1678 (* Lexing with lookahead *)
1679 (*****************************************************************************)
1680
1681 (* Why using yet another parsing_hack technique ? The fix_xxx where do
1682 * some pre-processing on the full list of tokens is not enough ?
1683 * No cos sometimes we need more contextual info, and even if
1684 * set_context() tries to give some contextual info, it's not completely
1685 * accurate so the following code give yet another alternative, yet another
1686 * chance to transform some tokens.
1687 *
1688 * todo?: maybe could try to get rid of this technique. Maybe a better
1689 * set_context() would make possible to move this code using a fix_xx
1690 * technique.
1691 *
1692 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
1693 * it is more general to do it via my LALR(k) tech. Because here we can
1694 * transform some token give some context information. So sometimes it
1695 * makes sense to transform a token in one context, sometimes not, and
1696 * lex can not provide us this context information. Note that the order
1697 * in the pattern matching in lookahead is important. Do not cut/paste.
1698 *
1699 * Note that in next there is only "clean" tokens, there is no comment
1700 * or space tokens. This is done by the caller.
1701 *
1702 *)
1703
1704 open Lexer_parser (* for the fields of lexer_hint type *)
1705
1706 let not_struct_enum = function
1707 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1708 | _ -> true
1709
1710 let pointer ?(followed_by=fun _ -> true)
1711 ?(followed_by_more=fun _ -> true) ts =
1712 let rec loop ts =
1713 match ts with
1714 | TMul _ :: rest -> loop rest
1715 | TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
1716 | t :: ts' -> followed_by t && followed_by_more ts'
1717 | [] -> failwith "unexpected end of token stream" in
1718 match ts with
1719 | TMul _ :: rest -> loop rest
1720 | TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
1721 | _ -> false
1722
1723 let ident = function
1724 TIdent _ -> true
1725 | _ -> false
1726
1727 let is_type = function
1728 | TypedefIdent _
1729 | Tvoid _
1730 | Tchar _
1731 | Tfloat _
1732 | Tdouble _
1733
1734 | Tsize_t _
1735 | Tssize_t _
1736 | Tptrdiff_t _
1737
1738 | Tint _
1739 | Tlong _
1740 | Tshort _ -> true
1741 | _ -> false
1742
1743 let is_cparen = function (TCPar _) -> true | _ -> false
1744 let is_oparen = function (TOPar _) -> true | _ -> false
1745
1746 let rec not_has_type_before f xs =
1747 match xs with
1748 | [] -> raise (Impossible 666)
1749 | x :: xs ->
1750 if f x then
1751 true
1752 else if is_type x then
1753 false
1754 else
1755 not_has_type_before f xs
1756
1757 (* This function is inefficient, because it will look over a K&R header,
1758 or function prototype multiple times. At least when we see a , and are in a
1759 parameter list, we know we will eventually see a close paren, and it
1760 should come fairly soon. *)
1761 let k_and_r l =
1762 let l1 = drop_until is_cparen l in
1763 match l1 with
1764 (TCPar _) :: (TOCro _) :: _ -> false
1765 | (TCPar _) :: _ -> true
1766 | _ -> false
1767
1768 (* (a)(b) is ambiguous, because (a) could be a function name or a cast.
1769 At this point, we just see an ident for a; we don't know if it is eg a local
1770 variable. This function sees at least if b is the only argument, ie there
1771 are no commas at top level *)
1772 let paren_before_comma l =
1773 let rec loop level = function
1774 [] -> false
1775 | (TComma _)::_ when level = 1 -> false
1776 | (TCPar _)::_ when level = 1 -> true
1777 | (TCPar _)::rest -> loop (level-1) rest
1778 | (TOPar _)::rest -> loop (level+1) rest
1779 | x::rest -> loop level rest in
1780 loop 0 l
1781
1782 let lookahead2 ~pass next before =
1783 match (next, before) with
1784
1785 (* c++ hacks *)
1786 (* yy xx( and in function *)
1787 | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_
1788 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1789 pr2_cpp("constructed_object: " ^s);
1790 TOParCplusplusInit i1
1791 | TOPar i1::_, TIdent(s,i2)::ptr
1792 when !Flag.c_plus_plus
1793 && pointer ~followed_by:(function TypedefIdent _ -> true | _ -> false) ptr
1794 && (LP.current_context () = (LP.InFunction)) ->
1795 pr2_cpp("constructed_object: " ^s);
1796 TOParCplusplusInit i1
1797 | TypedefIdent(s,i)::TOPar i1::_,_
1798 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1799 TIdent(s,i)
1800
1801 (*-------------------------------------------------------------*)
1802 (* typedef inference, parse_typedef_fix3 *)
1803 (*-------------------------------------------------------------*)
1804 (* xx xx *)
1805 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
1806 && ok_typedef s
1807 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
1808 ->
1809 (* parse_typedef_fix3:
1810 * acpi_object acpi_object;
1811 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
1812 * Le parser en interne a deja appelé le prochain token pour pouvoir
1813 * decider des choses.
1814 * => special case in lexer_heuristic, again
1815 *)
1816 if !Flag_parsing_c.debug_typedef
1817 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
1818
1819
1820 LP.disable_typedef();
1821 msg_typedef s i1 1; LP.add_typedef_root s;
1822 TypedefIdent (s, i1)
1823
1824 (* christia *)
1825
1826 (* delete[] *)
1827 | (TOCro i1 :: _, Tdelete _ :: _)
1828 when !Flag.c_plus_plus ->
1829 TCommentCpp (Token_c.CppDirective, i1)
1830 (* delete[] *)
1831 | (TCCro i1 :: _, Tdelete _ :: _)
1832 when !Flag.c_plus_plus ->
1833 TCommentCpp (Token_c.CppDirective, i1)
1834
1835 (* extern "_" tt *)
1836 | ((TString ((s, _), i1) | TMacroString (s, i1)) :: _ , Textern _ :: _)
1837 when !Flag.c_plus_plus ->
1838 TCommentCpp (Token_c.CppDirective, i1)
1839
1840 (* ) const { *)
1841 | (Tconst i1 :: TOBrace _ :: _ , TCPar _ :: _)
1842 when !Flag.c_plus_plus ->
1843 TCommentCpp (Token_c.CppDirective, i1)
1844
1845 (* xx const tt *)
1846 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::type_::_ , _)
1847 when not_struct_enum before
1848 && is_type type_ ->
1849 TCommentCpp (Token_c.CppDirective, i1)
1850
1851 (* xx struct *)
1852 | (TIdent (s, i1)::Tstruct _::_ , _)
1853 when not_struct_enum before ->
1854 TCommentCpp (Token_c.CppDirective, i1)
1855
1856 (* xx tt *)
1857 | (TIdent (s, i1)::type_::_ , _)
1858 when not_struct_enum before
1859 && is_type type_ ->
1860 TCommentCpp (Token_c.CppDirective, i1)
1861
1862 (* tt xx yy *)
1863 | (TIdent (s, i1)::TIdent (s2, i2)::_ , seen::_)
1864 when not_struct_enum before
1865 && is_type seen ->
1866 if is_macro s2 then
1867 TIdent (s, i1)
1868 else
1869 TCommentCpp (Token_c.CppDirective, i1)
1870
1871 (* exception to next rule *)
1872 | (TIdent (s2, i2)::TOPar _::_ , TIdent (s1, i1)::seen::_)
1873 when not_struct_enum before
1874 && is_macro s2 && is_type seen ->
1875 TIdent (s2, i2)
1876
1877 | (TIdent (s2, i2)::_ , TIdent (s, i1)::seen::_)
1878 when not_struct_enum before
1879 && is_macro s2 && is_type seen ->
1880 TCommentCpp (Token_c.CppDirective, i2)
1881
1882 (* tt xx * *)
1883 | (TIdent (s, i1)::ptr , seen::_)
1884 when not_struct_enum before
1885 && pointer ptr && is_type seen ->
1886 TCommentCpp (Token_c.CppDirective, i1)
1887
1888 (* tt * xx yy *)
1889 | (TIdent (s, i1)::TIdent(s2, i2)::_ , ptr)
1890 when not_struct_enum before
1891 && pointer ptr ->
1892 if is_macro s2 then
1893 TIdent (s, i1)
1894 else
1895 TCommentCpp (Token_c.CppDirective, i1)
1896
1897 (* exception to next rule *)
1898 | (TIdent (s2, i2)::TOPar _::_ , TIdent (s1, i1)::ptr)
1899 when not_struct_enum before
1900 && is_macro s2 && pointer ptr ->
1901 TIdent (s2, i2)
1902 (* tt * xx yy *)
1903 | (TIdent(s2, i2)::_ , TIdent (s, i1)::ptr)
1904 when not_struct_enum before
1905 && is_macro s2 && pointer ptr ->
1906 TCommentCpp (Token_c.CppDirective, i2)
1907
1908 (* exception to next rule *)
1909 | (TIdent(s2, i2)::TOPar _ :: _ , TIdent(s, i1)::seen::_)
1910 when not_struct_enum before
1911 && is_macro s2 && is_type seen ->
1912 TIdent(s2, i2)
1913 (* tt xx yy *)
1914 | (TIdent(s2, i2)::_ , TIdent(s, i1)::seen::_)
1915 when not_struct_enum before
1916 && is_macro s2 && is_type seen ->
1917 TCommentCpp (Token_c.CppDirective, i2)
1918
1919 (* xx * yy AND in paramdecl *)
1920 | (TIdent (s, i1)::ptr , _)
1921 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1922 && pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
1923 && ok_typedef s ->
1924 msg_typedef s i1 14; LP.add_typedef_root s;
1925 TypedefIdent (s, i1)
1926
1927 (* xx MM ( *)
1928 (* exception to next rule *)
1929 | (TIdent (s, i1)::TIdent (s2, i2)::TOPar _::_ , _) when not_struct_enum before
1930 && ok_typedef s && is_macro s2
1931 ->
1932 TIdent (s, i1)
1933 (* xx yy *)
1934 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
1935 && ok_typedef s
1936 ->
1937 (* && not_annot s2 BUT lead to false positive*)
1938 msg_typedef s i1 2; LP.add_typedef_root s;
1939 TypedefIdent (s, i1)
1940
1941
1942 (* xx inline *)
1943 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
1944 && ok_typedef s
1945 ->
1946
1947 msg_typedef s i1 3; LP.add_typedef_root s;
1948 TypedefIdent (s, i1)
1949
1950
1951 (* [,(] xx [,)] AND param decl *)
1952 | (TIdent (s, i1)::(((TComma _|TCPar _)::_) as rest) ,
1953 ((TComma _ |TOPar _)::_ as bef))
1954 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1955 && k_and_r rest
1956 && not_has_type_before is_cparen rest
1957 && not_has_type_before is_oparen bef
1958 ->
1959
1960 TKRParam(s,i1)
1961
1962 | (TIdent (s, i1)::((TComma _|TCPar _)::_) , (TComma _ |TOPar _)::_ )
1963 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1964 && ok_typedef s
1965 ->
1966
1967 msg_typedef s i1 4; LP.add_typedef_root s;
1968 TypedefIdent (s, i1)
1969
1970 (* xx* [,)] *)
1971 (* specialcase: [,(] xx* [,)] *)
1972 | (TIdent (s, i1)::ptr , (*(TComma _|TOPar _)::*)_ )
1973 when pointer ~followed_by:(function TComma _ |TCPar _ -> true | _ -> false) ptr
1974 && not_struct_enum before
1975 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1976 && ok_typedef s
1977 ->
1978
1979 msg_typedef s i1 5; LP.add_typedef_root s;
1980 TypedefIdent (s, i1)
1981
1982
1983 (* xx** [,)] *)
1984 (* specialcase: [,(] xx** [,)] *)
1985 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1986 when not_struct_enum before
1987 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1988 && ok_typedef s
1989 ->
1990
1991 msg_typedef s i1 6; LP.add_typedef_root s;
1992 TypedefIdent (s, i1)
1993
1994
1995
1996 (* xx const * USELESS because of next rule ? *)
1997 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1998 when not_struct_enum before
1999 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2000 && ok_typedef s
2001 ->
2002
2003 msg_typedef s i1 7; LP.add_typedef_root s;
2004 TypedefIdent (s, i1)
2005
2006 (* xx const *)
2007 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
2008 when not_struct_enum before
2009 && ok_typedef s
2010 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2011 ->
2012
2013 msg_typedef s i1 8; LP.add_typedef_root s;
2014 TypedefIdent (s, i1)
2015
2016
2017 (* xx * const *)
2018 | (TIdent (s, i1)::ptr , _ )
2019 when pointer ~followed_by:(function Tconst _ | Tvolatile _ | Trestrict _ -> true | _ -> false) ptr
2020 && not_struct_enum before
2021 && ok_typedef s
2022 ->
2023
2024 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2025
2026 msg_typedef s i1 9; LP.add_typedef_root s;
2027 TypedefIdent (s, i1)
2028
2029
2030 (* ( const xx) *)
2031 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
2032 ok_typedef s ->
2033
2034 msg_typedef s i1 10; LP.add_typedef_root s;
2035 TypedefIdent (s, i1)
2036
2037
2038
2039 (* ( xx ) [sizeof, ~] *)
2040 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
2041 when not_struct_enum before
2042 && ok_typedef s
2043 ->
2044
2045 msg_typedef s i1 11; LP.add_typedef_root s;
2046 TypedefIdent (s, i1)
2047
2048 (* [(,] xx [ AND parameterdeclaration *)
2049 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
2050 when (LP.current_context() =*= LP.InParameter)
2051 && ok_typedef s
2052 ->
2053
2054 msg_typedef s i1 12; LP.add_typedef_root s;
2055 TypedefIdent (s, i1)
2056
2057 (*------------------------------------------------------------*)
2058 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
2059 (* but if have a '=', or ',' I think not *)
2060 (*------------------------------------------------------------*)
2061
2062 (* static xx * yy *)
2063 | (TIdent (s, i1)::ptr ,
2064 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
2065 pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
2066 && ok_typedef s
2067 ->
2068
2069 msg_typedef s i1 13; LP.add_typedef_root s;
2070 TypedefIdent (s, i1)
2071
2072 (* TODO xx * yy ; AND in start of compound element *)
2073
2074
2075 (* xx * yy, AND in paramdecl *)
2076 | (TIdent (s, i1)::ptr , _)
2077 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
2078 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2079 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2080 && ok_typedef s
2081 ->
2082
2083 msg_typedef s i1 14; LP.add_typedef_root s;
2084 TypedefIdent (s, i1)
2085
2086
2087 (* xx * yy ; AND in Toplevel, except when have = before *)
2088 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
2089 TIdent (s, i1)
2090 | (TIdent (s, i1)::ptr , _)
2091 when not_struct_enum before
2092 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2093 ~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
2094 && (LP.is_top_or_struct (LP.current_context ()))
2095 ->
2096
2097 msg_typedef s i1 15; LP.add_typedef_root s;
2098 TypedefIdent (s, i1)
2099
2100 (* xx * yy , AND in Toplevel *)
2101 | (TIdent (s, i1)::ptr , _)
2102 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
2103 && ok_typedef s
2104 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2105 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2106 ->
2107
2108 msg_typedef s i1 16; LP.add_typedef_root s;
2109 TypedefIdent (s, i1)
2110
2111 (* xx * yy ( AND in Toplevel *)
2112 | (TIdent (s, i1)::ptr , _)
2113 when not_struct_enum before
2114 && (LP.is_top_or_struct (LP.current_context ()))
2115 && ok_typedef s
2116 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2117 ~followed_by_more:(function TOPar _ :: _ -> true | _ -> false) ptr
2118 ->
2119
2120 msg_typedef s i1 17; LP.add_typedef_root s;
2121 TypedefIdent (s, i1)
2122
2123 (* xx * yy [ *)
2124 (* todo? enough ? cos in struct def we can have some expression ! *)
2125 | (TIdent (s, i1)::ptr , _)
2126 when not_struct_enum before
2127 && (LP.is_top_or_struct (LP.current_context ()))
2128 && ok_typedef s
2129 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2130 ~followed_by_more:(function TOCro _ :: _ -> true | _ -> false) ptr
2131 ->
2132
2133 msg_typedef s i1 18; LP.add_typedef_root s;
2134 TypedefIdent (s, i1)
2135
2136 (* u16: 10; in struct *)
2137 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
2138 when (LP.is_top_or_struct (LP.current_context ()))
2139 && ok_typedef s
2140 ->
2141
2142 msg_typedef s i1 19; LP.add_typedef_root s;
2143 TypedefIdent (s, i1)
2144
2145
2146 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
2147 (* YES cos at toplevel can have some expression !! for instance when *)
2148 (* enter in the dimension of an array *)
2149 (*
2150 | (TIdent s::TMul::TIdent s2::_ , _)
2151 when (take_safe 1 !passed_tok <> [Tstruct] &&
2152 (take_safe 1 !passed_tok <> [Tenum]))
2153 &&
2154 !LP._lexer_hint = Some LP.Toplevel ->
2155 msg_typedef s 20; LP.add_typedef_root s;
2156 TypedefIdent s
2157 *)
2158
2159 (* xx * yy = *)
2160 | (TIdent (s, i1)::ptr , _)
2161 when not_struct_enum before
2162 && ok_typedef s
2163 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2164 ~followed_by_more:(function TEq _ :: _ -> true | _ -> false) ptr
2165 ->
2166
2167 msg_typedef s i1 21; LP.add_typedef_root s;
2168 TypedefIdent (s, i1)
2169
2170
2171 (* xx * yy) AND in paramdecl *)
2172 | (TIdent (s, i1)::ptr , _)
2173 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
2174 && ok_typedef s
2175 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2176 ~followed_by_more:(function TCPar _ :: _ -> true | _ -> false) ptr
2177 ->
2178
2179 msg_typedef s i1 22; LP.add_typedef_root s;
2180 TypedefIdent (s, i1)
2181
2182
2183 (* xx * yy; *) (* wrong ? *)
2184 | (TIdent (s, i1)::ptr ,
2185 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
2186 && ok_typedef s
2187 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2188 ~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
2189 ->
2190
2191 msg_typedef s i1 23; LP.add_typedef_root s;
2192 msg_maybe_dangereous_typedef s;
2193 TypedefIdent (s, i1)
2194
2195
2196 (* xx * yy, and ';' before xx *) (* wrong ? *)
2197 | (TIdent (s, i1)::ptr ,
2198 (TOBrace _| TPtVirg _)::_) when
2199 ok_typedef s
2200 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2201 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2202 ->
2203
2204 msg_typedef s i1 24; LP.add_typedef_root s;
2205 TypedefIdent (s, i1)
2206
2207
2208 (* xx_t * yy *)
2209 | (TIdent (s, i1)::ptr , _)
2210 when s ==~ regexp_typedef && not_struct_enum before
2211 (* struct user_info_t sometimes *)
2212 && ok_typedef s
2213 && pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
2214 ->
2215
2216 msg_typedef s i1 25; LP.add_typedef_root s;
2217 TypedefIdent (s, i1)
2218
2219 (* xx ** yy *) (* wrong ? *)
2220 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2221 when not_struct_enum before
2222 && (LP.current_context() =*= LP.InParameter)
2223 && ok_typedef s
2224 ->
2225
2226 msg_typedef s i1 26; LP.add_typedef_root s;
2227 TypedefIdent (s, i1)
2228
2229 (* xx ** yy *) (* wrong ? *)
2230 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , (TOBrace _ | TPtVirg _)::_)
2231 when not_struct_enum before
2232 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2233 && ok_typedef s
2234 (* christia : this code catches 'a * *b' which is wrong
2235 *)
2236 ->
2237
2238 msg_typedef s i1 26; LP.add_typedef_root s;
2239 TypedefIdent (s, i1)
2240
2241 (* xx *** yy *)
2242 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2243 when not_struct_enum before
2244 && ok_typedef s
2245 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2246 ->
2247
2248 msg_typedef s i1 27; LP.add_typedef_root s;
2249 TypedefIdent (s, i1)
2250
2251 (* xx ** ) *)
2252 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
2253 when not_struct_enum before
2254 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2255 && ok_typedef s
2256 ->
2257
2258 msg_typedef s i1 28; LP.add_typedef_root s;
2259 TypedefIdent (s, i1)
2260
2261
2262
2263 (* ----------------------------------- *)
2264 (* old: why not do like for other rules and start with TIdent ?
2265 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
2266 * new: prefer now start with TIdent because otherwise the add_typedef_root
2267 * may have no effect if in second pass or if have disable the add_typedef.
2268 *)
2269
2270 (* (xx) yy *)
2271 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
2272 (TOPar info)::x::_)
2273 when not (TH.is_stuff_taking_parenthized x) (* &&
2274 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful?
2275 *)
2276 && ok_typedef s
2277 && not (ident x) (* possible K&R declaration *)
2278 ->
2279
2280 msg_typedef s i1 29; LP.add_typedef_root s;
2281 (*TOPar info*)
2282 TypedefIdent (s, i1)
2283
2284
2285 (* (xx) ( yy)
2286 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
2287 *)
2288 (*
2289 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2290 when not (TH.is_stuff_taking_parenthized x)
2291 && ok_typedef s
2292 ->
2293 msg_typedef s 30; LP.add_typedef_root s;
2294 (* TOPar info *)
2295 TypedefIdent (s, i1)
2296 *)
2297 (* special case: = (xx) ( yy) *)
2298 | (TIdent (s, i1)::TCPar _::((TOPar _::_) as rest) ,
2299 (TOPar info)::(TEq _ |TEqEq _)::_)
2300 when ok_typedef s && paren_before_comma rest
2301 ->
2302
2303 msg_typedef s i1 31; LP.add_typedef_root s;
2304 (* TOPar info *)
2305 TypedefIdent (s, i1)
2306
2307
2308 (* (xx * ) yy *)
2309 | (TIdent (s, i1)::ptr, (TOPar info)::_)
2310 when ok_typedef s
2311 && pointer ~followed_by:(function TCPar _ -> true | _ -> false)
2312 ~followed_by_more:(function TIdent _ :: _ -> true | _ -> false) ptr
2313 ->
2314
2315 msg_typedef s i1 32; LP.add_typedef_root s;
2316 (*TOPar info*)
2317 TypedefIdent (s,i1)
2318
2319
2320 (* (xx){ ... } constructor *)
2321 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
2322 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2323 && ok_typedef s
2324 ->
2325
2326 msg_typedef s i1 33; LP.add_typedef_root s;
2327 TypedefIdent (s, i1)
2328
2329
2330 (* can have sizeof on expression
2331 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
2332 msg_typedef s; LP.add_typedef_root s;
2333 Tsizeof
2334 *)
2335
2336
2337 (* ----------------------------------- *)
2338 (* x ( *y )(params), function pointer *)
2339 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2340 when not_struct_enum before
2341 && ok_typedef s
2342 ->
2343
2344 msg_typedef s i1 34; LP.add_typedef_root s;
2345 TypedefIdent (s, i1)
2346
2347 (* x* ( *y )(params), function pointer 2 *)
2348 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2349 when not_struct_enum before
2350 && ok_typedef s
2351 ->
2352
2353 msg_typedef s i1 35; LP.add_typedef_root s;
2354 TypedefIdent (s, i1)
2355
2356
2357 (*-------------------------------------------------------------*)
2358 (* CPP *)
2359 (*-------------------------------------------------------------*)
2360 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2361 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
2362 as x)
2363 ::_, _
2364 ->
2365
2366 (*
2367 if not !Flag_parsing_c.ifdef_to_if
2368 then TCommentCpp (Ast_c.CppIfDirective, ii)
2369 else
2370 *)
2371 (* not !LP._lexer_hint.toplevel *)
2372 if !Flag_parsing_c.ifdef_directive_passing
2373 || (pass >= 2)
2374 then begin
2375
2376 if (LP.current_context () =*= LP.InInitializer)
2377 then begin
2378 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2379 incr Stat.nIfdefInitializer;
2380 end else begin
2381 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
2382 incr Stat.nIfdefPassing;
2383 end;
2384 let x =
2385 match x with
2386 TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef
2387 | TIfdefBool _ -> Token_c.IfDef0
2388 | TIfdefelse _ | TIfdefelif _ -> Token_c.Else
2389 | TEndif _ -> Token_c.Endif
2390 | _ -> Token_c.Other in (* not possible here *)
2391 TCommentCpp (Token_c.CppIfDirective x, ii)
2392 end
2393 else x
2394
2395 | (TUndef (ii) as x)::_, _
2396 ->
2397 if (pass >= 2)
2398 then begin
2399 pr2_cpp("UNDEF: I treat it as comment");
2400 TCommentCpp (Token_c.CppDirective, ii)
2401 end
2402 else x
2403
2404 | (TCppDirectiveOther (ii) as x)::_, _
2405 ->
2406 if (pass >= 2)
2407 then begin
2408 pr2_cpp ("OTHER directive: I treat it as comment");
2409 TCommentCpp (Token_c.CppDirective, ii)
2410 end
2411 else x
2412
2413 (* If ident contain a for_each, then certainly a macro. But to be
2414 * sure should look if there is a '{' after the ')', but it requires
2415 * to count the '('. Because this can be expensive, we do that only
2416 * when the token contains "for_each".
2417 *)
2418 | (TIdent (s, i1)::TOPar _::rest, _)
2419 when not (LP.current_context () =*= LP.InTopLevel)
2420 (* otherwise a function such as static void loopback_enable(int i) {
2421 * will be considered as a loop
2422 *)
2423 ->
2424
2425 if s ==~ regexp_foreach &&
2426 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
2427
2428 then begin
2429 msg_foreach s;
2430 TMacroIterator (s, i1)
2431 end
2432 else TIdent (s, i1)
2433
2434 (* (* christia: here insert support for macros on top level *)
2435 | TIdent (s, ii) :: tl :: _, _ when
2436 can_be_on_top_level tl && LP.current_context () = InTopLevel ->
2437 pr2_cpp ("'" ^ s ^ "' looks like a macro, I treat it as comment");
2438 TCommentCpp (Token_c.CppDirective, ii)
2439 *)
2440
2441 (*-------------------------------------------------------------*)
2442 | v::xs, _ -> v
2443 | _ -> raise (Impossible 93)
2444
2445 let lookahead ~pass a b =
2446 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
2447
2448