dfb18a6b2ca07eec59b051206f2e2300b446077c
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16 open Common
17
18 module TH = Token_helpers
19 module TV = Token_views_c
20 module LP = Lexer_parser
21
22 module Stat = Parsing_stat
23
24 open Parser_c
25
26 open TV
27
28 (*****************************************************************************)
29 (* Some debugging functions *)
30 (*****************************************************************************)
31
32 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
33
34 let pr2_cpp s =
35 if !Flag_parsing_c.debug_cpp
36 then Common.pr2_once ("CPP-" ^ s)
37
38
39 let msg_gen cond is_known printer s =
40 if cond
41 then
42 if not (!Flag_parsing_c.filter_msg)
43 then printer s
44 else
45 if not (is_known s)
46 then printer s
47
48 (* In the following, there are some harcoded names of types or macros
49 * but they are not used by our heuristics! They are just here to
50 * enable to detect false positive by printing only the typedef/macros
51 * that we don't know yet. If we print everything, then we can easily
52 * get lost with too much verbose tracing information. So those
53 * functions "filter" some messages. So our heuristics are still good,
54 * there is no more (or not that much) hardcoded linux stuff.
55 *)
56
57 let is_known_typdef =
58 (fun s ->
59 (match s with
60 | "u_char" | "u_short" | "u_int" | "u_long"
61 | "u8" | "u16" | "u32" | "u64"
62 | "s8" | "s16" | "s32" | "s64"
63 | "__u8" | "__u16" | "__u32" | "__u64"
64 -> true
65
66 | "acpi_handle"
67 | "acpi_status"
68 -> true
69
70 | "FILE"
71 | "DIR"
72 -> true
73
74 | s when s =~ ".*_t$" -> true
75 | _ -> false
76 )
77 )
78
79 (* note: cant use partial application with let msg_typedef =
80 * because it would compute msg_typedef at compile time when
81 * the flag debug_typedef is always false
82 *)
83 let msg_typedef s ii n =
84 incr Stat.nTypedefInfer;
85 msg_gen (!Flag_parsing_c.debug_typedef)
86 is_known_typdef
87 (fun s ->
88 pr2_cpp
89 (Printf.sprintf "TYPEDEF: promoting:(%d) %s on line %d" n s
90 (Ast_c.line_of_info ii))
91 (*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s
92 (Ast_c.line_of_info ii))*)
93 )
94 s
95
96 let msg_maybe_dangereous_typedef s =
97 if not (is_known_typdef s)
98 then
99 pr2
100 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
101
102
103
104 let msg_declare_macro s =
105 incr Stat.nMacroDecl;
106 msg_gen (!Flag_parsing_c.debug_cpp)
107 (fun s ->
108 (match s with
109 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
110 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
111 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
112 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
113 | "SENSOR_DEVICE_ATTR"
114 | "LIST_HEAD"
115 | "DECLARE_WORK" | "DECLARE_TASKLET"
116 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
117 | "DECLARE_BITMAP"
118
119 -> true
120 (*
121 | s when s =~ "^DECLARE_.*" -> true
122 | s when s =~ ".*_ATTR$" -> true
123 | s when s =~ "^DEFINE_.*" -> true
124 *)
125
126 | _ -> false
127 )
128 )
129 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
130 s
131
132
133 let msg_foreach s =
134 incr Stat.nIteratorHeuristic;
135 pr2_cpp ("MACRO: found foreach: " ^ s)
136
137
138 (* ??
139 let msg_debug_macro s =
140 pr2_cpp ("MACRO: found debug-macro: " ^ s)
141 *)
142
143
144 let msg_macro_noptvirg s =
145 incr Stat.nMacroStmt;
146 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
147
148 let msg_macro_toplevel_noptvirg s =
149 incr Stat.nMacroStmt;
150 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
151
152 let msg_macro_noptvirg_single s =
153 incr Stat.nMacroStmt;
154 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
155
156
157
158
159 let msg_macro_higher_order s =
160 incr Stat.nMacroHigherOrder;
161 msg_gen (!Flag_parsing_c.debug_cpp)
162 (fun s ->
163 (match s with
164 | "DBGINFO"
165 | "DBGPX"
166 | "DFLOW"
167 -> true
168 | _ -> false
169 )
170 )
171 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
172 s
173
174
175 let msg_stringification s =
176 incr Stat.nMacroString;
177 msg_gen (!Flag_parsing_c.debug_cpp)
178 (fun s ->
179 (match s with
180 | "REVISION"
181 | "UTS_RELEASE"
182 | "SIZE_STR"
183 | "DMA_STR"
184 -> true
185 (* s when s =~ ".*STR.*" -> true *)
186 | _ -> false
187 )
188 )
189 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
190 s
191
192 let msg_stringification_params s =
193 incr Stat.nMacroString;
194 pr2_cpp ("MACRO: string-macro with params : " ^ s)
195
196
197
198 let msg_apply_known_macro s =
199 incr Stat.nMacroExpand;
200 pr2_cpp ("MACRO: found known macro = " ^ s)
201
202 let msg_apply_known_macro_hint s =
203 incr Stat.nMacroHint;
204 pr2_cpp ("MACRO: found known macro hint = " ^ s)
205
206
207
208
209 let msg_ifdef_bool_passing is_ifdef_positif =
210 incr Stat.nIfdefZero; (* of Version ? *)
211 if is_ifdef_positif
212 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
213 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
214
215
216 let msg_ifdef_mid_something () =
217 incr Stat.nIfdefExprPassing;
218 pr2_cpp "found ifdef-mid-something"
219
220 let msg_ifdef_funheaders () =
221 incr Stat.nIfdefFunheader;
222 ()
223
224 let msg_ifdef_cparen_else () =
225 incr Stat.nIfdefPassing;
226 pr2_cpp("found ifdef-cparen-else")
227
228
229 let msg_attribute s =
230 incr Stat.nMacroAttribute;
231 pr2_cpp("ATTR:" ^ s)
232
233
234
235 (*****************************************************************************)
236 (* The regexp and basic view definitions *)
237 (*****************************************************************************)
238
239 (* opti: better to built then once and for all, especially regexp_foreach *)
240
241 let regexp_macro = Str.regexp
242 "^[A-Z_][A-Z_0-9]*$"
243
244 (* linuxext: *)
245 let regexp_annot = Str.regexp
246 "^__.*$"
247
248 (* linuxext: *)
249 let regexp_declare = Str.regexp
250 ".*DECLARE.*"
251
252 (* linuxext: *)
253 let regexp_foreach = Str.regexp_case_fold
254 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
255
256 let regexp_typedef = Str.regexp
257 ".*_t$"
258
259 let false_typedef = [
260 "printk";
261 ]
262
263
264 let ok_typedef s = not (List.mem s false_typedef)
265
266 let not_annot s =
267 not (s ==~ regexp_annot)
268 let is_macro s =
269 s ==~ regexp_macro
270 let not_macro s =
271 not (is_macro s)
272
273
274
275
276 (*****************************************************************************)
277 (* Helpers *)
278 (*****************************************************************************)
279
280 (* ------------------------------------------------------------------------- *)
281 (* the pair is the status of '()' and '{}', ex: (-1,0)
282 * if too much ')' and good '{}'
283 * could do for [] too ?
284 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
285 * then if have ifdef, then certainly can lead to a problem.
286 *)
287 let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
288 fun xs ->
289 let cnt_paren, cnt_brace = ref 0, ref 0 in
290 xs +> TV.iter_token_ifdef (fun x ->
291 (match x.tok with
292 | x when TH.is_opar x -> incr cnt_paren
293 | TOBrace _ -> incr cnt_brace
294 | x when TH.is_cpar x -> decr cnt_paren
295 | TCBrace _ -> decr cnt_brace
296 | _ -> ()
297 )
298 );
299 !cnt_paren, !cnt_brace
300
301
302 (* ------------------------------------------------------------------------- *)
303 let forLOOKAHEAD = 30
304
305
306 (* look if there is a '{' just after the closing ')', and handling the
307 * possibility to have nested expressions inside nested parenthesis
308 *
309 * todo: use indentation instead of premier(statement) ?
310 *)
311 let rec is_really_foreach xs =
312 let rec is_foreach_aux = function
313 | [] -> false, []
314 | TCPar _::TOBrace _::xs -> true, xs
315 (* the following attempts to handle the cases where there is a
316 single statement in the body of the loop. undoubtedly more
317 cases are needed.
318 todo: premier(statement) - suivant(funcall)
319 *)
320 | TCPar _::TIdent _::xs -> true, xs
321 | TCPar _::Tif _::xs -> true, xs
322 | TCPar _::Twhile _::xs -> true, xs
323 | TCPar _::Tfor _::xs -> true, xs
324 | TCPar _::Tswitch _::xs -> true, xs
325 | TCPar _::Treturn _::xs -> true, xs
326
327
328 | TCPar _::xs -> false, xs
329 | TOPar _::xs ->
330 let (_, xs') = is_foreach_aux xs in
331 is_foreach_aux xs'
332 | x::xs -> is_foreach_aux xs
333 in
334 is_foreach_aux xs +> fst
335
336
337 (* ------------------------------------------------------------------------- *)
338 let set_ifdef_token_parenthize_info cnt x =
339 match x with
340 | TIfdef (tag, _)
341 | TIfdefelse (tag, _)
342 | TIfdefelif (tag, _)
343 | TEndif (tag, _)
344
345 | TIfdefBool (_, tag, _)
346 | TIfdefMisc (_, tag, _)
347 | TIfdefVersion (_, tag, _) ->
348 tag := Some cnt;
349
350 | _ -> raise (Impossible 89)
351
352
353
354 let ifdef_paren_cnt = ref 0
355
356
357 let rec set_ifdef_parenthize_info xs =
358 xs +> List.iter (function
359 | NotIfdefLine xs -> ()
360 | Ifdefbool (_, xxs, info_ifdef)
361 | Ifdef (xxs, info_ifdef) ->
362
363 incr ifdef_paren_cnt;
364 let total_directives = List.length info_ifdef in
365
366 info_ifdef +> List.iter (fun x ->
367 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
368 x.tok);
369 xxs +> List.iter set_ifdef_parenthize_info
370 )
371
372
373 (*****************************************************************************)
374 (* The parsing hack for #define *)
375 (*****************************************************************************)
376
377 (* To parse macro definitions I need to do some tricks
378 * as some information can be get only at the lexing level. For instance
379 * the space after the name of the macro in '#define foo (x)' is meaningful
380 * but the grammar can not get this information. So define_ident below
381 * look at such space and generate a special TOpardefine. In a similar
382 * way macro definitions can contain some antislash and newlines
383 * and the grammar need to know where the macro ends (which is
384 * a line-level and so low token-level information). Hence the
385 * function 'define_line' below and the TDefEol.
386 *
387 * update: TDefEol is handled in a special way at different places,
388 * a little bit like EOF, especially for error recovery, so this
389 * is an important token that should not be retagged!
390 *
391 *
392 * ugly hack, a better solution perhaps would be to erase TDefEOL
393 * from the Ast and list of tokens in parse_c.
394 *
395 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
396 *
397 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
398 * is where is the \n TCommentSpace. Normally there is always a last token
399 * to synchronize on, either EOF or the token of the next toplevel.
400 * In the case of the #define we got in list of token
401 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
402 * not synchronize on it and so we will not print the "\n".
403 * A solution would be to put the TDefEOL before the "\n".
404 * (jll: tried to do this, see the comment "Put end of line..." below)
405 *
406 * todo?: could put a ExpandedTok for that ?
407 *)
408 let mark_end_define ii =
409 let ii' =
410 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
411 Common.str = "";
412 Common.charpos = Ast_c.pos_of_info ii + 1
413 };
414 cocci_tag = ref Ast_c.emptyAnnot;
415 annots_tag = Token_annot.empty;
416 comments_tag = ref Ast_c.emptyComments;
417 }
418 in
419 TDefEOL (ii')
420
421 (* put the TDefEOL at the good place *)
422 let rec define_line_1 acc xs =
423 match xs with
424 | [] -> List.rev acc
425 | TDefine ii::xs ->
426 let line = Ast_c.line_of_info ii in
427 let acc = (TDefine ii) :: acc in
428 define_line_2 acc line ii xs
429 | TUndef ii::xs ->
430 let line = Ast_c.line_of_info ii in
431 let acc = (TUndef ii) :: acc in
432 define_line_2 acc line ii xs
433 | TCppEscapedNewline ii::xs ->
434 pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at");
435 pr2 (Ast_c.strloc_of_info ii);
436 let acc = (TCommentSpace ii) :: acc in
437 define_line_1 acc xs
438 | x::xs -> define_line_1 (x::acc) xs
439
440 and define_line_2 acc line lastinfo xs =
441 match xs with
442 | [] ->
443 (* should not happened, should meet EOF before *)
444 pr2 "PB: WEIRD";
445 List.rev (mark_end_define lastinfo::acc)
446 | x::xs ->
447 let line' = TH.line_of_tok x in
448 let info = TH.info_of_tok x in
449
450 (match x with
451 | EOF ii ->
452 let acc = (mark_end_define lastinfo) :: acc in
453 let acc = (EOF ii) :: acc in
454 define_line_1 acc xs
455 | TCppEscapedNewline ii ->
456 if (line' <> line) then pr2 "PB: WEIRD: not same line number";
457 let acc = (TCommentSpace ii) :: acc in
458 define_line_2 acc (line+1) info xs
459 | x ->
460 if line' =|= line
461 then define_line_2 (x::acc) line info xs
462 else
463 (* Put end of line token before the newline. A newline at least
464 must be there because the line changed and because we saw a
465 #define previously to get to this function at all *)
466 define_line_1
467 ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc)))
468 (x::xs)
469 )
470
471 let rec define_ident acc xs =
472 match xs with
473 | [] -> List.rev acc
474 | TUndef ii::xs ->
475 let acc = TUndef ii :: acc in
476 (match xs with
477 TCommentSpace i1::TIdent (s,i2)::xs ->
478 let acc = (TCommentSpace i1) :: acc in
479 let acc = (TIdentDefine (s,i2)) :: acc in
480 define_ident acc xs
481 | _ ->
482 pr2 "WEIRD: weird #define body";
483 define_ident acc xs
484 )
485 | TDefine ii::xs ->
486 let acc = TDefine ii :: acc in
487 (match xs with
488 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
489 (* Change also the kind of TIdent to avoid bad interaction
490 * with other parsing_hack tricks. For instant if keep TIdent then
491 * the stringication algo can believe the TIdent is a string-macro.
492 * So simpler to change the kind of the ident too.
493 *)
494 (* if TOParDefine sticked to the ident, then
495 * it's a macro-function. Change token to avoid ambiguity
496 * between #define foo(x) and #define foo (x)
497 *)
498 let acc = (TCommentSpace i1) :: acc in
499 let acc = (TIdentDefine (s,i2)) :: acc in
500 let acc = (TOParDefine i3) :: acc in
501 define_ident acc xs
502
503 | TCommentSpace i1::TIdent (s,i2)::xs ->
504 let acc = (TCommentSpace i1) :: acc in
505 let acc = (TIdentDefine (s,i2)) :: acc in
506 define_ident acc xs
507
508 (* bugfix: ident of macro (as well as params, cf below) can be tricky
509 * note, do we need to subst in the body of the define ? no cos
510 * here the issue is the name of the macro, as in #define inline,
511 * so obviously the name of this macro will not be used in its
512 * body (it would be a recursive macro, which is forbidden).
513 *)
514
515 | TCommentSpace i1::t::xs
516 when TH.str_of_tok t ==~ Common.regexp_alpha
517 ->
518 let s = TH.str_of_tok t in
519 let ii = TH.info_of_tok t in
520 pr2 (spf "remapping: %s to an ident in macro name" s);
521 let acc = (TCommentSpace i1) :: acc in
522 let acc = (TIdentDefine (s,ii)) :: acc in
523 define_ident acc xs
524
525 | TCommentSpace _::_::xs
526 | xs ->
527 pr2 "WEIRD: weird #define body";
528 define_ident acc xs
529 )
530 | x::xs ->
531 let acc = x :: acc in
532 define_ident acc xs
533
534
535
536 let fix_tokens_define2 xs =
537 define_ident [] (define_line_1 [] xs)
538
539 let fix_tokens_define a =
540 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
541
542
543
544
545
546 (* ------------------------------------------------------------------------- *)
547 (* Other parsing hacks related to cpp, Include/Define hacks *)
548 (* ------------------------------------------------------------------------- *)
549
550 (* Sometimes I prefer to generate a single token for a list of things in the
551 * lexer so that if I have to passed them, like for passing TInclude then
552 * it's easy. Also if I don't do a single token, then I need to
553 * parse the rest which may not need special stuff, like detecting
554 * end of line which the parser is not really ready for. So for instance
555 * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just
556 * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ?
557 * but this kind of token is valid only after a #include and the
558 * lexing and parsing rules are different for such tokens so not that
559 * easy to parse such things in parser_c.mly. Hence the following hacks.
560 *
561 * less?: maybe could get rid of this like I get rid of some of fix_define.
562 *)
563
564 (* helpers *)
565
566 (* used to generate new token from existing one *)
567 let new_info posadd str ii =
568 { Ast_c.pinfo =
569 Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
570 charpos = Ast_c.pos_of_info ii + posadd;
571 str = str;
572 column = Ast_c.col_of_info ii + posadd;
573 };
574 (* must generate a new ref each time, otherwise share *)
575 cocci_tag = ref Ast_c.emptyAnnot;
576 annots_tag = Token_annot.empty;
577 comments_tag = ref Ast_c.emptyComments;
578 }
579
580
581 let rec comment_until_defeol xs =
582 match xs with
583 | [] ->
584 (* job not done in Cpp_token_c.define_parse ? *)
585 failwith "cant find end of define token TDefEOL"
586 | x::xs ->
587 (match x with
588 | Parser_c.TDefEOL i ->
589 Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x)
590 ::xs
591 | _ ->
592 let x' =
593 (* bugfix: otherwise may lose a TComment token *)
594 if TH.is_real_comment x
595 then x
596 else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x)
597 in
598 x'::comment_until_defeol xs
599 )
600
601 let drop_until_defeol xs =
602 List.tl
603 (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs)
604
605
606
607 (* ------------------------------------------------------------------------- *)
608 (* returns a pair (replaced token, list of next tokens) *)
609 (* ------------------------------------------------------------------------- *)
610
611 let tokens_include (info, includes, filename, inifdef) =
612 Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef),
613 [Parser_c.TIncludeFilename
614 (filename, (new_info (String.length includes) filename info))
615 ]
616
617
618
619
620 (*****************************************************************************)
621 (* CPP handling: macros, ifdefs, macros defs *)
622 (*****************************************************************************)
623
624 (* ------------------------------------------------------------------------- *)
625 (* special skip_start skip_end handling *)
626 (* ------------------------------------------------------------------------- *)
627
628 (* note: after this normally the token list should not contain any more the
629 * TCommentSkipTagStart and End tokens.
630 *)
631 let rec commentize_skip_start_to_end xs =
632 match xs with
633 | [] -> ()
634 | x::xs ->
635 (match x with
636 | {tok = TCommentSkipTagStart info} ->
637 (try
638 let (before, x2, after) =
639 xs +> Common.split_when (function
640 | {tok = TCommentSkipTagEnd _ } -> true
641 | _ -> false
642 )
643 in
644 let topass = x::x2::before in
645 topass +> List.iter (fun tok ->
646 TV.set_as_comment Token_c.CppPassingExplicit tok
647 );
648 commentize_skip_start_to_end after
649 with Not_found ->
650 failwith "could not find end of skip_start special comment"
651 )
652 | {tok = TCommentSkipTagEnd info} ->
653 failwith "found skip_end comment but no skip_start"
654 | _ ->
655 commentize_skip_start_to_end xs
656 )
657
658
659
660
661 (* ------------------------------------------------------------------------- *)
662 (* ifdef keeping/passing *)
663 (* ------------------------------------------------------------------------- *)
664
665 (* #if 0, #if 1, #if LINUX_VERSION handling *)
666 let rec find_ifdef_bool xs =
667 xs +> List.iter (function
668 | NotIfdefLine _ -> ()
669 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
670
671 msg_ifdef_bool_passing is_ifdef_positif;
672
673 (match xxs with
674 | [] -> raise (Impossible 90)
675 | firstclause::xxs ->
676 info_ifdef_stmt +>
677 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
678
679 if is_ifdef_positif
680 then xxs +> List.iter
681 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
682 else begin
683 firstclause +>
684 iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal);
685 (match List.rev xxs with
686 (* keep only last *)
687 | last::startxs ->
688 startxs +> List.iter
689 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingNormal))
690 | [] -> (* not #else *) ()
691 );
692 end
693 );
694
695 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
696 )
697
698
699
700 let thresholdIfdefSizeMid = 6
701
702 (* infer ifdef involving not-closed expressions/statements *)
703 let rec find_ifdef_mid xs =
704 xs +> List.iter (function
705 | NotIfdefLine _ -> ()
706 | Ifdef (xxs, info_ifdef_stmt) ->
707 (match xxs with
708 | [] -> raise (Impossible 91)
709 | [first] -> ()
710 | first::second::rest ->
711 (* don't analyse big ifdef *)
712 if xxs +> List.for_all
713 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
714 (* don't want nested ifdef *)
715 xxs +> List.for_all (fun xs ->
716 xs +> List.for_all
717 (function NotIfdefLine _ -> true | _ -> false)
718 )
719
720 then
721 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
722 let cnt1, cnt2 = List.hd counts in
723 if cnt1 <> 0 || cnt2 <> 0 &&
724 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
725 (*
726 if counts +> List.exists (fun (cnt1, cnt2) ->
727 cnt1 <> 0 || cnt2 <> 0
728 )
729 *)
730 then begin
731 msg_ifdef_mid_something();
732
733 (* keep only first, treat the rest as comment *)
734 info_ifdef_stmt +>
735 List.iter
736 (TV.save_as_comment (function x -> Token_c.CppIfDirective x));
737 (second::rest) +> List.iter
738 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
739 end
740
741 );
742 List.iter find_ifdef_mid xxs
743
744 (* no need complex analysis for ifdefbool *)
745 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
746 List.iter find_ifdef_mid xxs
747
748
749 )
750
751
752 let thresholdFunheaderLimit = 4
753
754 (* ifdef defining alternate function header, type *)
755 let rec find_ifdef_funheaders = function
756 | [] -> ()
757 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
758
759 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
760 | Ifdef
761 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
762 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
763 ], info_ifdef_stmt
764 )
765 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
766 ::xs
767 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
768 List.length ifdefblock2 <= thresholdFunheaderLimit
769 ->
770 find_ifdef_funheaders xs;
771
772 msg_ifdef_funheaders ();
773 info_ifdef_stmt +>
774 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
775 let all_toks = [xline2] @ line2 in
776 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError) ;
777 ifdefblock2 +> iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
778
779 (* ifdef with nested ifdef *)
780 | Ifdef
781 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
782 [Ifdef
783 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
784 [NotIfdefLine (({col = 0} as xline3)::line3)];
785 ], info_ifdef_stmt2
786 )
787 ]
788 ], info_ifdef_stmt
789 )
790 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
791 ::xs
792 ->
793 find_ifdef_funheaders xs;
794
795 msg_ifdef_funheaders ();
796 info_ifdef_stmt +>
797 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
798 info_ifdef_stmt2 +>
799 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
800 let all_toks = [xline2;xline3] @ line2 @ line3 in
801 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError);
802
803 (* ifdef with elseif *)
804 | Ifdef
805 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
806 [NotIfdefLine (({col = 0} as xline2)::line2)];
807 [NotIfdefLine (({col = 0} as xline3)::line3)];
808 ], info_ifdef_stmt
809 )
810 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
811 ::xs
812 ->
813 find_ifdef_funheaders xs;
814
815 msg_ifdef_funheaders ();
816 info_ifdef_stmt +>
817 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
818 let all_toks = [xline2;xline3] @ line2 @ line3 in
819 all_toks +> List.iter (TV.set_as_comment Token_c.CppPassingCosWouldGetError)
820
821 (* recurse *)
822 | Ifdef (xxs,info_ifdef_stmt)::xs
823 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
824 List.iter find_ifdef_funheaders xxs;
825 find_ifdef_funheaders xs
826
827
828
829 (* ?? *)
830 let rec adjust_inifdef_include xs =
831 xs +> List.iter (function
832 | NotIfdefLine _ -> ()
833 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
834 xxs +> List.iter (iter_token_ifdef (fun tokext ->
835 match tokext.tok with
836 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
837 inifdef_ref := true;
838 | _ -> ()
839 ));
840 )
841
842
843
844
845
846
847
848 let rec find_ifdef_cparen_else xs =
849 let rec aux xs =
850 xs +> List.iter (function
851 | NotIfdefLine _ -> ()
852 | Ifdef (xxs, info_ifdef_stmt) ->
853 (match xxs with
854 | [] -> raise (Impossible 92)
855 | [first] -> ()
856 | first::second::rest ->
857
858 (* found a closing ')' just after the #else *)
859
860 (* Too bad ocaml does not support better list pattern matching
861 * a la Prolog-III where can match the end of lists.
862 *)
863 let condition =
864 if List.length first = 0 then false
865 else
866 let last_line = Common.last first in
867 match last_line with
868 | NotIfdefLine xs ->
869 if List.length xs = 0 then false
870 else
871 let last_tok = Common.last xs in
872 TH.is_cpar last_tok.tok
873 | Ifdef _ | Ifdefbool _ -> false
874 in
875 if condition then begin
876 msg_ifdef_cparen_else();
877
878 (* keep only first, treat the rest as comment *)
879 info_ifdef_stmt +>
880 List.iter (TV.save_as_comment (fun x -> Token_c.CppIfDirective x));
881 (second::rest) +> List.iter
882 (iter_token_ifdef (TV.set_as_comment Token_c.CppPassingCosWouldGetError));
883 end
884
885 );
886 List.iter aux xxs
887
888 (* no need complex analysis for ifdefbool *)
889 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
890 List.iter aux xxs
891 )
892 in aux xs
893
894
895 (* ------------------------------------------------------------------------- *)
896 (* cpp-builtin part2, macro, using standard.h or other defs *)
897 (* ------------------------------------------------------------------------- *)
898
899 (* now in cpp_token_c.ml *)
900
901 (* ------------------------------------------------------------------------- *)
902 (* stringification *)
903 (* ------------------------------------------------------------------------- *)
904
905 let rec find_string_macro_paren xs =
906 match xs with
907 | [] -> ()
908 | Parenthised(xxs, info_parens)::xs ->
909 xxs +> List.iter (fun xs ->
910 if xs +> List.exists
911 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
912 xs +> List.for_all
913 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
914 true | _ -> false)
915 then
916 xs +> List.iter (fun tok ->
917 match tok with
918 | PToken({tok = TIdent (s,_)} as id) ->
919
920 msg_stringification s;
921 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
922 | _ -> ()
923 )
924 else
925 find_string_macro_paren xs
926 );
927 find_string_macro_paren xs
928 | PToken(tok)::xs ->
929 find_string_macro_paren xs
930
931
932 (* ------------------------------------------------------------------------- *)
933 (* macro2 *)
934 (* ------------------------------------------------------------------------- *)
935
936 (* don't forget to recurse in each case *)
937 let rec find_macro_paren xs =
938 match xs with
939 | [] -> ()
940
941 (* attribute *)
942 | PToken ({tok = Tattribute _} as id)
943 ::Parenthised (xxs,info_parens)
944 ::xs
945 ->
946 pr2_cpp ("MACRO: __attribute detected ");
947 [Parenthised (xxs, info_parens)] +>
948 iter_token_paren (TV.set_as_comment Token_c.CppAttr);
949 TV.set_as_comment Token_c.CppAttr id;
950 find_macro_paren xs
951
952 | PToken ({tok = TattributeNoarg _} as id)
953 ::xs
954 ->
955 pr2_cpp ("MACRO: __attributenoarg detected ");
956 TV.set_as_comment Token_c.CppAttr id;
957 find_macro_paren xs
958
959 (*
960 (* attribute cpp, __xxx id *)
961 | PToken ({tok = TIdent (s,i1)} as id)
962 ::PToken ({tok = TIdent (s2, i2)} as id2)
963 ::xs when s ==~ regexp_annot
964 ->
965 msg_attribute s;
966 id.tok <- TMacroAttr (s, i1);
967 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
968
969 (* attribute cpp, id __xxx *)
970 | PToken ({tok = TIdent (s,i1)} as _id)
971 ::PToken ({tok = TIdent (s2, i2)} as id2)
972 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
973 ->
974 msg_attribute s2;
975 id2.tok <- TMacroAttr (s2, i2);
976 find_macro_paren xs
977
978 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
979 ::PToken ({tok = TIdent (s,i1)} as attr)
980 ::xs when s ==~ regexp_annot
981 ->
982 pr2_cpp ("storage attribute: " ^ s);
983 attr.tok <- TMacroAttrStorage (s,i1);
984 (* recurse, may have other storage attributes *)
985 find_macro_paren (PToken (tok1)::xs)
986
987 *)
988
989 (* storage attribute *)
990 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
991 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
992 ->
993 pr2_cpp ("storage attribute: " ^ s);
994 attr.tok <- TMacroAttrStorage (s,i1);
995 (* recurse, may have other storage attributes *)
996 find_macro_paren (PToken (tok1)::xs)
997
998
999 (* stringification
1000 *
1001 * the order of the matching clause is important
1002 *
1003 *)
1004
1005 (* string macro with params, before case *)
1006 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1007 ::Parenthised (xxs, info_parens)
1008 ::xs ->
1009
1010 msg_stringification_params s;
1011 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1012 [Parenthised (xxs, info_parens)] +>
1013 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
1014 find_macro_paren xs
1015
1016 (* after case *)
1017 | PToken ({tok = TIdent (s,_)} as id)
1018 ::Parenthised (xxs, info_parens)
1019 ::PToken ({tok = (TString _ | TMacroString _)})
1020 ::xs ->
1021
1022 msg_stringification_params s;
1023 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1024 [Parenthised (xxs, info_parens)] +>
1025 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
1026 find_macro_paren xs
1027
1028
1029 (* for the case where the string is not inside a funcall, but
1030 * for instance in an initializer.
1031 *)
1032
1033 (* string macro variable, before case *)
1034 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1035 ::xs when not !Flag.c_plus_plus ->
1036
1037 msg_stringification s;
1038 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1039 find_macro_paren xs
1040
1041 (* after case *)
1042 | PToken ({tok = TIdent (s,_)} as id)
1043 ::PToken ({tok = (TString _ | TMacroString _)})
1044 ::xs ->
1045
1046 msg_stringification s;
1047 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1048 find_macro_paren xs
1049
1050
1051
1052
1053
1054 (* recurse *)
1055 | (PToken x)::xs -> find_macro_paren xs
1056 | (Parenthised (xxs, info_parens))::xs ->
1057 xxs +> List.iter find_macro_paren;
1058 find_macro_paren xs
1059
1060
1061
1062
1063
1064 (* don't forget to recurse in each case *)
1065 let rec find_macro_lineparen xs =
1066 match xs with
1067 | [] -> ()
1068
1069 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
1070 | (Line
1071 (
1072 [PToken ({tok = Tstatic _});
1073 PToken ({tok = TIdent (s,_)} as macro);
1074 Parenthised (xxs,info_parens);
1075 PToken ({tok = TPtVirg _});
1076 ]
1077 ))
1078 ::xs
1079 when (s ==~ regexp_macro) ->
1080
1081 msg_declare_macro s;
1082 let info = TH.info_of_tok macro.tok in
1083 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1084
1085 find_macro_lineparen (xs)
1086
1087 (* the static const case *)
1088 | (Line
1089 (
1090 [PToken ({tok = Tstatic _});
1091 PToken ({tok = Tconst _} as const);
1092 PToken ({tok = TIdent (s,_)} as macro);
1093 Parenthised (xxs,info_parens);
1094 PToken ({tok = TPtVirg _});
1095 ]
1096 (*as line1*)
1097
1098 ))
1099 ::xs
1100 when (s ==~ regexp_macro) ->
1101
1102 msg_declare_macro s;
1103 let info = TH.info_of_tok macro.tok in
1104 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1105
1106 (* need retag this const, otherwise ambiguity in grammar
1107 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1108 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1109 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1110 storage_class_spec : Tstatic . (137)
1111 *)
1112 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1113
1114 find_macro_lineparen (xs)
1115
1116
1117 (* same but without trailing ';'
1118 *
1119 * I do not put the final ';' because it can be on a multiline and
1120 * because of the way mk_line is coded, we will not have access to
1121 * this ';' on the next line, even if next to the ')' *)
1122 | (Line
1123 ([PToken ({tok = Tstatic _});
1124 PToken ({tok = TIdent (s,_)} as macro);
1125 Parenthised (xxs,info_parens);
1126 ]
1127 ))
1128 ::xs
1129 when s ==~ regexp_macro ->
1130
1131 msg_declare_macro s;
1132 let info = TH.info_of_tok macro.tok in
1133 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1134
1135 find_macro_lineparen (xs)
1136
1137
1138 (* on multiple lines *)
1139 | (Line
1140 (
1141 (PToken ({tok = Tstatic _})::[]
1142 )))
1143 ::(Line
1144 (
1145 [PToken ({tok = TIdent (s,_)} as macro);
1146 Parenthised (xxs,info_parens);
1147 PToken ({tok = TPtVirg _});
1148 ]
1149 )
1150 )
1151 ::xs
1152 when (s ==~ regexp_macro) ->
1153
1154 msg_declare_macro s;
1155 let info = TH.info_of_tok macro.tok in
1156 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1157
1158 find_macro_lineparen (xs)
1159
1160
1161 | (Line (* initializer case *)
1162 (
1163 PToken ({tok = Tstatic _}) ::
1164 PToken ({tok = TIdent (s,_)} as macro) ::
1165 Parenthised (xxs,info_parens) ::
1166 PToken ({tok = TEq _}) :: rest
1167 ))
1168 ::xs
1169 when (s ==~ regexp_macro) ->
1170
1171 msg_declare_macro s;
1172 let info = TH.info_of_tok macro.tok in
1173 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1174
1175 (* continue with the rest of the line *)
1176 find_macro_lineparen ((Line(rest))::xs)
1177
1178
1179 | (Line (* multi-line initializer case *)
1180 (
1181 (PToken ({tok = Tstatic _})::[]
1182 )))
1183 ::(Line
1184 (
1185 PToken ({tok = Tstatic _}) ::
1186 PToken ({tok = TIdent (s,_)} as macro) ::
1187 Parenthised (xxs,info_parens) ::
1188 PToken ({tok = TEq _}) :: rest
1189 ))
1190 ::xs
1191 when (s ==~ regexp_macro) ->
1192
1193 msg_declare_macro s;
1194 let info = TH.info_of_tok macro.tok in
1195 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1196
1197 (* continue with the rest of the line *)
1198 find_macro_lineparen ((Line(rest))::xs)
1199
1200
1201 (* linuxext: ex: DECLARE_BITMAP();
1202 *
1203 * Here I use regexp_declare and not regexp_macro because
1204 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1205 * Here we don't have the preceding 'static' so only way to
1206 * not have positive is to restrict to .*DECLARE.* macros.
1207 *
1208 * but there is a grammar rule for that, so don't need this case anymore
1209 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
1210 * on a argument_list
1211 *)
1212
1213 | (Line
1214 ([PToken ({tok = TIdent (s,_)} as macro);
1215 Parenthised (xxs,info_parens);
1216 PToken ({tok = TPtVirg _});
1217 ]
1218 ))
1219 ::xs
1220 when (s ==~ regexp_declare) ->
1221
1222 msg_declare_macro s;
1223 let info = TH.info_of_tok macro.tok in
1224 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1225
1226 find_macro_lineparen (xs)
1227
1228
1229 (* toplevel macros.
1230 * module_init(xxx)
1231 *
1232 * Could also transform the TIdent in a TMacroTop but can have false
1233 * positive, so easier to just change the TCPar and so just solve
1234 * the end-of-stream pb of ocamlyacc
1235 *)
1236 | (Line
1237 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1238 Parenthised (xxs,info_parens);
1239 ] as _line1
1240 ))
1241 ::xs when col1 =|= 0
1242 ->
1243 let condition =
1244 (* to reduce number of false positive *)
1245 (match xs with
1246 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
1247 TH.is_eof other.tok || (col2 =|= 0 &&
1248 (match other.tok with
1249 | TOBrace _ -> false (* otherwise would match funcdecl *)
1250 | TCBrace _ when ctx <> InFunction -> false
1251 | TPtVirg _
1252 | TDotDot _
1253 -> false
1254 | tok when TH.is_binary_operator tok -> false
1255
1256 | _ -> true
1257 )
1258 )
1259 | _ -> false
1260 )
1261 in
1262 if condition
1263 then begin
1264
1265 msg_macro_toplevel_noptvirg s;
1266 (* just to avoid the end-of-stream pb of ocamlyacc *)
1267 let tcpar = Common.last info_parens in
1268 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
1269
1270 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
1271
1272 end;
1273
1274 find_macro_lineparen (xs)
1275
1276
1277
1278 (* macro with parameters
1279 * ex: DEBUG()
1280 * return x;
1281 *)
1282 | (Line
1283 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1284 Parenthised (xxs,info_parens);
1285 ] as _line1
1286 ))
1287 ::(Line
1288 (PToken ({col = col2 } as other)::restline2
1289 ) as line2)
1290 ::xs
1291 (* when s ==~ regexp_macro *)
1292 ->
1293 (* This can give a false positive for K&R functions if the function
1294 name is in the same column as the first parameter declaration. *)
1295 let condition =
1296 (col1 =|= col2 &&
1297 (match other.tok with
1298 | TOBrace _ -> false (* otherwise would match funcdecl *)
1299 | TCBrace _ when ctx <> InFunction -> false
1300 | TPtVirg _
1301 | TDotDot _
1302 -> false
1303 | tok when TH.is_binary_operator tok -> false
1304
1305 | _ -> true
1306 )
1307 )
1308 ||
1309 (col2 <= col1 &&
1310 (match other.tok, restline2 with
1311 | TCBrace _, _ when ctx =*= InFunction -> true
1312 | Treturn _, _ -> true
1313 | Tif _, _ -> true
1314 | Telse _, _ -> true
1315
1316 (* case of label, usually put in first line *)
1317 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
1318 true
1319
1320
1321 | _ -> false
1322 )
1323 )
1324
1325 in
1326
1327 if condition
1328 then
1329 if col1 =|= 0 then ()
1330 else begin
1331 msg_macro_noptvirg s;
1332 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1333 [Parenthised (xxs, info_parens)] +>
1334 iter_token_paren (TV.set_as_comment Token_c.CppMacro);
1335 end;
1336
1337 find_macro_lineparen (line2::xs)
1338
1339 (* linuxext:? single macro
1340 * ex: LOCK
1341 * foo();
1342 * UNLOCK
1343 *
1344 * todo: factorize code with previous rule ?
1345 *)
1346 | (Line
1347 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1348 ] as _line1
1349 ))
1350 ::(Line
1351 (PToken ({col = col2 } as other)::restline2
1352 ) as line2)
1353 ::xs ->
1354 (* when s ==~ regexp_macro *)
1355
1356 let condition =
1357 (col1 =|= col2 &&
1358 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1359 (match other.tok with
1360 | TPtVirg _ -> false
1361 | TOr _ -> false
1362 | TCBrace _ when ctx <> InFunction -> false
1363 | tok when TH.is_binary_operator tok -> false
1364
1365 | _ -> true
1366 )) ||
1367 (col2 <= col1 &&
1368 (match other.tok with
1369 | TCBrace _ when ctx =*= InFunction -> true
1370 | Treturn _ -> true
1371 | Tif _ -> true
1372 | Telse _ -> true
1373 | _ -> false
1374 ))
1375 in
1376
1377 if condition
1378 then begin
1379 msg_macro_noptvirg_single s;
1380 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1381 end;
1382 find_macro_lineparen (line2::xs)
1383
1384 | x::xs ->
1385 find_macro_lineparen xs
1386
1387
1388
1389 (* ------------------------------------------------------------------------- *)
1390 (* define tobrace init *)
1391 (* ------------------------------------------------------------------------- *)
1392
1393 let rec find_define_init_brace_paren xs =
1394 let rec aux xs =
1395 match xs with
1396 | [] -> ()
1397
1398 (* mainly for firefox *)
1399 | (PToken {tok = TDefine _})
1400 ::(PToken {tok = TIdentDefine (s,_)})
1401 ::(PToken ({tok = TOBrace i1} as tokbrace))
1402 ::(PToken tok2)
1403 ::(PToken tok3)
1404 ::xs ->
1405 let is_init =
1406 match tok2.tok, tok3.tok with
1407 | TInt _, TComma _ -> true
1408 | TString _, TComma _ -> true
1409 | TIdent _, TComma _ -> true
1410 | _ -> false
1411
1412 in
1413 if is_init
1414 then begin
1415 pr2_cpp("found define initializer: " ^s);
1416 tokbrace.tok <- TOBraceDefineInit i1;
1417 end;
1418
1419 aux xs
1420
1421 (* mainly for linux, especially in sound/ *)
1422 | (PToken {tok = TDefine _})
1423 ::(PToken {tok = TIdentDefine (s,_)})
1424 ::(Parenthised(xxx, info_parens))
1425 ::(PToken ({tok = TOBrace i1} as tokbrace))
1426 ::(PToken tok2)
1427 ::(PToken tok3)
1428 ::xs ->
1429 let is_init =
1430 match tok2.tok, tok3.tok with
1431 | TInt _, TComma _ -> true
1432 | TDot _, TIdent _ -> true
1433 | TIdent _, TComma _ -> true
1434 | _ -> false
1435
1436 in
1437 if is_init
1438 then begin
1439 pr2_cpp("found define initializer with param: " ^ s);
1440 tokbrace.tok <- TOBraceDefineInit i1;
1441 end;
1442
1443 aux xs
1444
1445
1446
1447 (* recurse *)
1448 | (PToken x)::xs -> aux xs
1449 | (Parenthised (xxs, info_parens))::xs ->
1450 (* not need for tobrace init:
1451 * xxs +> List.iter aux;
1452 *)
1453 aux xs
1454 in
1455 aux xs
1456
1457
1458 (* ------------------------------------------------------------------------- *)
1459 (* action *)
1460 (* ------------------------------------------------------------------------- *)
1461
1462 (* obsolete now with macro expansion ? get some regression if comment.
1463 * todo: if do bad decision here, then it can influence other phases
1464 * and make it hard to parse. So maybe when have a parse error, should
1465 * undo some of the guess those heuristics have done, and restore
1466 * the original token value.
1467 *)
1468
1469 let rec find_actions = function
1470 | [] -> ()
1471
1472 | PToken ({tok = TIdent (s,ii)})
1473 ::Parenthised (xxs,info_parens)
1474 ::xs ->
1475 find_actions xs;
1476 xxs +> List.iter find_actions;
1477 let modified = find_actions_params xxs in
1478 if modified
1479 then msg_macro_higher_order s
1480
1481 | x::xs ->
1482 find_actions xs
1483
1484 and find_actions_params xxs =
1485 xxs +> List.fold_left (fun acc xs ->
1486 let toks = tokens_of_paren xs in
1487 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1488 (* undo: && List.length toks > 1
1489 * good for sparse, not good for linux
1490 *)
1491 then begin
1492 xs +> iter_token_paren (fun x ->
1493 if TH.is_eof x.tok
1494 then
1495 (* certainly because paren detection had a pb because of
1496 * some ifdef-exp. Do similar additional checking than
1497 * what is done in TV.set_as_comment.
1498 *)
1499 pr2 "PB: weird, I try to tag an EOF token as an action"
1500 else
1501 (* cf tests-bis/no_cpar_macro.c *)
1502 if TH.is_eom x.tok
1503 then
1504 pr2 "PB: weird, I try to tag an EOM token as an action"
1505 else
1506 x.tok <- TAction (TH.info_of_tok x.tok);
1507 );
1508 true (* modified *)
1509 end
1510 else acc
1511 ) false
1512
1513
1514
1515 (* ------------------------------------------------------------------------- *)
1516 (* main fix cpp function *)
1517 (* ------------------------------------------------------------------------- *)
1518
1519 let filter_cpp_stuff xs =
1520 List.filter
1521 (function x ->
1522 (match x.tok with
1523 | tok when TH.is_comment tok -> false
1524 (* don't want drop the define, or if drop, have to drop
1525 * also its body otherwise the line heuristics may be lost
1526 * by not finding the TDefine in column 0 but by finding
1527 * a TDefineIdent in a column > 0
1528 *)
1529 | Parser_c.TDefine _ -> true
1530 | tok when TH.is_cpp_instruction tok -> false
1531 | _ -> true
1532 ))
1533 xs
1534
1535 let insert_virtual_positions l =
1536 let strlen x = String.length (Ast_c.str_of_info x) in
1537 let rec loop prev offset acc = function
1538 [] -> List.rev acc
1539 | x::xs ->
1540 let ii = TH.info_of_tok x in
1541 let inject pi =
1542 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1543 match Ast_c.pinfo_of_info ii with
1544 Ast_c.OriginTok pi ->
1545 let prev = Ast_c.parse_info_of_info ii in
1546 loop prev (strlen ii) (x::acc) xs
1547 | Ast_c.ExpandedTok (pi,_) ->
1548 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
1549 loop prev (offset + (strlen ii)) (x'::acc) xs
1550 | Ast_c.FakeTok (s,_) ->
1551 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
1552 loop prev (offset + (strlen ii)) (x'::acc) xs
1553 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1554 let rec skip_fake = function
1555 | [] -> []
1556 | x::xs ->
1557 let ii = TH.info_of_tok x in
1558 match Ast_c.pinfo_of_info ii with
1559 | Ast_c.OriginTok pi ->
1560 let prev = Ast_c.parse_info_of_info ii in
1561 let res = loop prev (strlen ii) [] xs in
1562 x::res
1563 | _ -> x::skip_fake xs in
1564 skip_fake l
1565
1566 (* ------------------------------------------------------------------------- *)
1567
1568 let fix_tokens_cpp2 ~macro_defs tokens =
1569 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
1570
1571 begin
1572 (* the order is important, if you put the action heuristic first,
1573 * then because of ifdef, can have not closed paren
1574 * and so may believe that higher order macro
1575 * and it will eat too much tokens. So important to do
1576 * first the ifdef.
1577 *
1578 * I recompute multiple times cleaner cos the mutable
1579 * can have be changed and so may have more comments
1580 * in the token original list.
1581 *
1582 *)
1583
1584 commentize_skip_start_to_end !tokens2;
1585
1586 (* ifdef *)
1587 let cleaner = !tokens2 +> List.filter (fun x ->
1588 (* is_comment will also filter the TCommentCpp created in
1589 * commentize_skip_start_to_end *)
1590 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1591 ) in
1592 let ifdef_grouped = TV.mk_ifdef cleaner in
1593 set_ifdef_parenthize_info ifdef_grouped;
1594
1595 find_ifdef_funheaders ifdef_grouped;
1596 find_ifdef_bool ifdef_grouped;
1597 find_ifdef_mid ifdef_grouped;
1598 (* change order ? maybe cparen_else heuristic make some of the funheaders
1599 * heuristics irrelevant ?
1600 *)
1601 find_ifdef_cparen_else ifdef_grouped;
1602 adjust_inifdef_include ifdef_grouped;
1603
1604
1605 (* macro 1 *)
1606 let cleaner = !tokens2 +> filter_cpp_stuff in
1607
1608 let paren_grouped = TV.mk_parenthised cleaner in
1609 Cpp_token_c.apply_macro_defs
1610 ~msg_apply_known_macro
1611 ~msg_apply_known_macro_hint
1612 macro_defs paren_grouped;
1613 (* because the before field is used by apply_macro_defs *)
1614 tokens2 := TV.rebuild_tokens_extented !tokens2;
1615
1616 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1617 * that after the "ifdef-simplification" phase.
1618 *)
1619 let cleaner = !tokens2 +> List.filter (fun x ->
1620 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1621 ) in
1622
1623 let brace_grouped = TV.mk_braceised cleaner in
1624 set_context_tag brace_grouped;
1625
1626 (* macro *)
1627 let cleaner = !tokens2 +> filter_cpp_stuff in
1628
1629 let paren_grouped = TV.mk_parenthised cleaner in
1630 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
1631 find_define_init_brace_paren paren_grouped;
1632 find_string_macro_paren paren_grouped;
1633 find_macro_lineparen line_paren_grouped;
1634 find_macro_paren paren_grouped;
1635
1636
1637 (* obsolete: actions ? not yet *)
1638 let cleaner = !tokens2 +> filter_cpp_stuff in
1639 let paren_grouped = TV.mk_parenthised cleaner in
1640 find_actions paren_grouped;
1641
1642
1643
1644 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
1645 end
1646
1647 let time_hack1 ~macro_defs a =
1648 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
1649
1650 let fix_tokens_cpp ~macro_defs a =
1651 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
1652
1653
1654
1655 let can_be_on_top_level tl =
1656 match tl with
1657 | Tstruct _
1658 | Ttypedef _
1659 | TDefine _
1660 | TIfdef _
1661 | TIfdefelse _
1662 | TIfdefelif _
1663 | TIfdefBool _
1664 | TIfdefMisc _
1665 | TIfdefVersion _
1666 | TEndif _ -> true
1667 | _ -> false
1668
1669
1670 (*****************************************************************************)
1671 (* Lexing with lookahead *)
1672 (*****************************************************************************)
1673
1674 (* Why using yet another parsing_hack technique ? The fix_xxx where do
1675 * some pre-processing on the full list of tokens is not enough ?
1676 * No cos sometimes we need more contextual info, and even if
1677 * set_context() tries to give some contextual info, it's not completely
1678 * accurate so the following code give yet another alternative, yet another
1679 * chance to transform some tokens.
1680 *
1681 * todo?: maybe could try to get rid of this technique. Maybe a better
1682 * set_context() would make possible to move this code using a fix_xx
1683 * technique.
1684 *
1685 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
1686 * it is more general to do it via my LALR(k) tech. Because here we can
1687 * transform some token give some context information. So sometimes it
1688 * makes sense to transform a token in one context, sometimes not, and
1689 * lex can not provide us this context information. Note that the order
1690 * in the pattern matching in lookahead is important. Do not cut/paste.
1691 *
1692 * Note that in next there is only "clean" tokens, there is no comment
1693 * or space tokens. This is done by the caller.
1694 *
1695 *)
1696
1697 open Lexer_parser (* for the fields of lexer_hint type *)
1698
1699 let not_struct_enum = function
1700 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1701 | _ -> true
1702
1703 let pointer ?(followed_by=fun _ -> true)
1704 ?(followed_by_more=fun _ -> true) ts =
1705 let rec loop ts =
1706 match ts with
1707 | TMul _ :: rest -> loop rest
1708 | TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
1709 | t :: ts' -> followed_by t && followed_by_more ts'
1710 | [] -> failwith "unexpected end of token stream" in
1711 match ts with
1712 | TMul _ :: rest -> loop rest
1713 | TAnd _ :: rest when !Flag.c_plus_plus -> loop rest
1714 | _ -> false
1715
1716 let ident = function
1717 TIdent _ -> true
1718 | _ -> false
1719
1720 let is_type = function
1721 | TypedefIdent _
1722 | Tvoid _
1723 | Tchar _
1724 | Tfloat _
1725 | Tdouble _
1726
1727 | Tsize_t _
1728 | Tssize_t _
1729 | Tptrdiff_t _
1730
1731 | Tint _
1732 | Tlong _
1733 | Tshort _ -> true
1734 | _ -> false
1735
1736 let is_cparen = function (TCPar _) -> true | _ -> false
1737 let is_oparen = function (TOPar _) -> true | _ -> false
1738
1739 let rec not_has_type_before f xs =
1740 match xs with
1741 | [] -> raise (Impossible 666)
1742 | x :: xs ->
1743 if f x then
1744 true
1745 else if is_type x then
1746 false
1747 else
1748 not_has_type_before f xs
1749
1750 (* This function is inefficient, because it will look over a K&R header,
1751 or function prototype multiple times. At least when we see a , and are in a
1752 parameter list, we know we will eventually see a close paren, and it
1753 should come fairly soon. *)
1754 let k_and_r l =
1755 let l1 = drop_until is_cparen l in
1756 match l1 with
1757 (TCPar _) :: (TOCro _) :: _ -> false
1758 | (TCPar _) :: _ -> true
1759 | _ -> false
1760
1761 (* (a)(b) is ambiguous, because (a) could be a function name or a cast.
1762 At this point, we just see an ident for a; we don't know if it is eg a local
1763 variable. This function sees at least if b is the only argument, ie there
1764 are no commas at top level *)
1765 let paren_before_comma l =
1766 let rec loop level = function
1767 [] -> false
1768 | (TComma _)::_ when level = 1 -> false
1769 | (TCPar _)::_ when level = 1 -> true
1770 | (TCPar _)::rest -> loop (level-1) rest
1771 | (TOPar _)::rest -> loop (level+1) rest
1772 | x::rest -> loop level rest in
1773 loop 0 l
1774
1775 let lookahead2 ~pass next before =
1776 match (next, before) with
1777
1778 (* c++ hacks *)
1779 (* yy xx( and in function *)
1780 | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_
1781 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1782 pr2_cpp("constructed_object: " ^s);
1783 TOParCplusplusInit i1
1784 | TOPar i1::_, TIdent(s,i2)::ptr
1785 when !Flag.c_plus_plus
1786 && pointer ~followed_by:(function TypedefIdent _ -> true | _ -> false) ptr
1787 && (LP.current_context () = (LP.InFunction)) ->
1788 pr2_cpp("constructed_object: " ^s);
1789 TOParCplusplusInit i1
1790 | TypedefIdent(s,i)::TOPar i1::_,_
1791 when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) ->
1792 TIdent(s,i)
1793
1794 (*-------------------------------------------------------------*)
1795 (* typedef inference, parse_typedef_fix3 *)
1796 (*-------------------------------------------------------------*)
1797 (* xx xx *)
1798 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
1799 && ok_typedef s
1800 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
1801 ->
1802 (* parse_typedef_fix3:
1803 * acpi_object acpi_object;
1804 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
1805 * Le parser en interne a deja appelé le prochain token pour pouvoir
1806 * decider des choses.
1807 * => special case in lexer_heuristic, again
1808 *)
1809 if !Flag_parsing_c.debug_typedef
1810 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
1811
1812
1813 LP.disable_typedef();
1814 msg_typedef s i1 1; LP.add_typedef_root s;
1815 TypedefIdent (s, i1)
1816
1817 (* christia *)
1818
1819 (* delete[] *)
1820 | (TOCro i1 :: _, Tdelete _ :: _)
1821 when !Flag.c_plus_plus ->
1822 TCommentCpp (Token_c.CppDirective, i1)
1823 (* delete[] *)
1824 | (TCCro i1 :: _, Tdelete _ :: _)
1825 when !Flag.c_plus_plus ->
1826 TCommentCpp (Token_c.CppDirective, i1)
1827
1828 (* extern "_" tt *)
1829 | ((TString ((s, _), i1) | TMacroString (s, i1)) :: _ , Textern _ :: _)
1830 when !Flag.c_plus_plus ->
1831 TCommentCpp (Token_c.CppDirective, i1)
1832
1833 (* ) const { *)
1834 | (Tconst i1 :: TOBrace _ :: _ , TCPar _ :: _)
1835 when !Flag.c_plus_plus ->
1836 TCommentCpp (Token_c.CppDirective, i1)
1837
1838 (* xx const tt *)
1839 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::type_::_ , _)
1840 when not_struct_enum before
1841 && is_type type_ ->
1842 TCommentCpp (Token_c.CppDirective, i1)
1843
1844 (* xx struct *)
1845 | (TIdent (s, i1)::Tstruct _::_ , _)
1846 when not_struct_enum before ->
1847 TCommentCpp (Token_c.CppDirective, i1)
1848
1849 (* xx tt *)
1850 | (TIdent (s, i1)::type_::_ , _)
1851 when not_struct_enum before
1852 && is_type type_ ->
1853 TCommentCpp (Token_c.CppDirective, i1)
1854
1855 (* tt xx yy *)
1856 | (TIdent (s, i1)::TIdent (s2, i2)::_ , seen::_)
1857 when not_struct_enum before
1858 && is_type seen ->
1859 if is_macro s2 then
1860 TIdent (s, i1)
1861 else
1862 TCommentCpp (Token_c.CppDirective, i1)
1863
1864 | (TIdent (s2, i2)::_ , TIdent (s, i1)::seen::_)
1865 when not_struct_enum before
1866 && is_macro s2 && is_type seen ->
1867 TCommentCpp (Token_c.CppDirective, i2)
1868
1869 (* tt xx * *)
1870 | (TIdent (s, i1)::ptr , seen::_)
1871 when not_struct_enum before
1872 && pointer ptr && is_type seen ->
1873 TCommentCpp (Token_c.CppDirective, i1)
1874
1875 (* tt * xx yy *)
1876 | (TIdent (s, i1)::TIdent(s2, i2)::_ , ptr)
1877 when not_struct_enum before
1878 && pointer ptr ->
1879 if is_macro s2 then
1880 TIdent (s, i1)
1881 else
1882 TCommentCpp (Token_c.CppDirective, i1)
1883
1884 (* tt * xx yy *)
1885 | (TIdent(s2, i2)::_ , TIdent (s, i1)::ptr)
1886 when not_struct_enum before
1887 && is_macro s2 && pointer ptr ->
1888 TCommentCpp (Token_c.CppDirective, i2)
1889
1890 (* exception to next rule *)
1891 | (TIdent(s2, i2)::TOPar _ :: _ , TIdent(s, i1)::seen::_)
1892 when not_struct_enum before
1893 && is_macro s2 && is_type seen ->
1894 TIdent(s2, i2)
1895 (* tt xx yy *)
1896 | (TIdent(s2, i2)::_ , TIdent(s, i1)::seen::_)
1897 when not_struct_enum before
1898 && is_macro s2 && is_type seen ->
1899 TCommentCpp (Token_c.CppDirective, i2)
1900
1901 (* xx * yy AND in paramdecl *)
1902 | (TIdent (s, i1)::ptr , _)
1903 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1904 && pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
1905 && ok_typedef s ->
1906 msg_typedef s i1 14; LP.add_typedef_root s;
1907 TypedefIdent (s, i1)
1908
1909 (* xx MM ( *)
1910 | (TIdent (s, i1)::TIdent (s2, i2)::TOPar _::_ , type_::_) when not_struct_enum before
1911 && ok_typedef s && is_macro s2 && is_type type_
1912 ->
1913 TIdent (s, i1)
1914 (* xx yy *)
1915 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
1916 && ok_typedef s
1917 ->
1918 (* && not_annot s2 BUT lead to false positive*)
1919 msg_typedef s i1 2; LP.add_typedef_root s;
1920 TypedefIdent (s, i1)
1921
1922
1923 (* xx inline *)
1924 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
1925 && ok_typedef s
1926 ->
1927
1928 msg_typedef s i1 3; LP.add_typedef_root s;
1929 TypedefIdent (s, i1)
1930
1931
1932 (* [,(] xx [,)] AND param decl *)
1933 | (TIdent (s, i1)::(((TComma _|TCPar _)::_) as rest) ,
1934 ((TComma _ |TOPar _)::_ as bef))
1935 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1936 && k_and_r rest
1937 && not_has_type_before is_cparen rest
1938 && not_has_type_before is_oparen bef
1939 ->
1940
1941 TKRParam(s,i1)
1942
1943 | (TIdent (s, i1)::((TComma _|TCPar _)::_) , (TComma _ |TOPar _)::_ )
1944 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1945 && ok_typedef s
1946 ->
1947
1948 msg_typedef s i1 4; LP.add_typedef_root s;
1949 TypedefIdent (s, i1)
1950
1951 (* xx* [,)] *)
1952 (* specialcase: [,(] xx* [,)] *)
1953 | (TIdent (s, i1)::ptr , (*(TComma _|TOPar _)::*)_ )
1954 when pointer ~followed_by:(function TComma _ |TCPar _ -> true | _ -> false) ptr
1955 && not_struct_enum before
1956 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1957 && ok_typedef s
1958 ->
1959
1960 msg_typedef s i1 5; LP.add_typedef_root s;
1961 TypedefIdent (s, i1)
1962
1963
1964 (* xx** [,)] *)
1965 (* specialcase: [,(] xx** [,)] *)
1966 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1967 when not_struct_enum before
1968 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1969 && ok_typedef s
1970 ->
1971
1972 msg_typedef s i1 6; LP.add_typedef_root s;
1973 TypedefIdent (s, i1)
1974
1975
1976
1977 (* xx const * USELESS because of next rule ? *)
1978 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1979 when not_struct_enum before
1980 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1981 && ok_typedef s
1982 ->
1983
1984 msg_typedef s i1 7; LP.add_typedef_root s;
1985 TypedefIdent (s, i1)
1986
1987 (* xx const *)
1988 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
1989 when not_struct_enum before
1990 && ok_typedef s
1991 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1992 ->
1993
1994 msg_typedef s i1 8; LP.add_typedef_root s;
1995 TypedefIdent (s, i1)
1996
1997
1998 (* xx * const *)
1999 | (TIdent (s, i1)::ptr , _ )
2000 when pointer ~followed_by:(function Tconst _ | Tvolatile _ | Trestrict _ -> true | _ -> false) ptr
2001 && not_struct_enum before
2002 && ok_typedef s
2003 ->
2004
2005 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2006
2007 msg_typedef s i1 9; LP.add_typedef_root s;
2008 TypedefIdent (s, i1)
2009
2010
2011 (* ( const xx) *)
2012 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
2013 ok_typedef s ->
2014
2015 msg_typedef s i1 10; LP.add_typedef_root s;
2016 TypedefIdent (s, i1)
2017
2018
2019
2020 (* ( xx ) [sizeof, ~] *)
2021 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
2022 when not_struct_enum before
2023 && ok_typedef s
2024 ->
2025
2026 msg_typedef s i1 11; LP.add_typedef_root s;
2027 TypedefIdent (s, i1)
2028
2029 (* [(,] xx [ AND parameterdeclaration *)
2030 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
2031 when (LP.current_context() =*= LP.InParameter)
2032 && ok_typedef s
2033 ->
2034
2035 msg_typedef s i1 12; LP.add_typedef_root s;
2036 TypedefIdent (s, i1)
2037
2038 (*------------------------------------------------------------*)
2039 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
2040 (* but if have a '=', or ',' I think not *)
2041 (*------------------------------------------------------------*)
2042
2043 (* static xx * yy *)
2044 | (TIdent (s, i1)::ptr ,
2045 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
2046 pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
2047 && ok_typedef s
2048 ->
2049
2050 msg_typedef s i1 13; LP.add_typedef_root s;
2051 TypedefIdent (s, i1)
2052
2053 (* TODO xx * yy ; AND in start of compound element *)
2054
2055
2056 (* xx * yy, AND in paramdecl *)
2057 | (TIdent (s, i1)::ptr , _)
2058 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
2059 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2060 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2061 && ok_typedef s
2062 ->
2063
2064 msg_typedef s i1 14; LP.add_typedef_root s;
2065 TypedefIdent (s, i1)
2066
2067
2068 (* xx * yy ; AND in Toplevel, except when have = before *)
2069 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
2070 TIdent (s, i1)
2071 | (TIdent (s, i1)::ptr , _)
2072 when not_struct_enum before
2073 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2074 ~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
2075 && (LP.is_top_or_struct (LP.current_context ()))
2076 ->
2077
2078 msg_typedef s i1 15; LP.add_typedef_root s;
2079 TypedefIdent (s, i1)
2080
2081 (* xx * yy , AND in Toplevel *)
2082 | (TIdent (s, i1)::ptr , _)
2083 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
2084 && ok_typedef s
2085 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2086 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2087 ->
2088
2089 msg_typedef s i1 16; LP.add_typedef_root s;
2090 TypedefIdent (s, i1)
2091
2092 (* xx * yy ( AND in Toplevel *)
2093 | (TIdent (s, i1)::ptr , _)
2094 when not_struct_enum before
2095 && (LP.is_top_or_struct (LP.current_context ()))
2096 && ok_typedef s
2097 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2098 ~followed_by_more:(function TOPar _ :: _ -> true | _ -> false) ptr
2099 ->
2100
2101 msg_typedef s i1 17; LP.add_typedef_root s;
2102 TypedefIdent (s, i1)
2103
2104 (* xx * yy [ *)
2105 (* todo? enough ? cos in struct def we can have some expression ! *)
2106 | (TIdent (s, i1)::ptr , _)
2107 when not_struct_enum before
2108 && (LP.is_top_or_struct (LP.current_context ()))
2109 && ok_typedef s
2110 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2111 ~followed_by_more:(function TOCro _ :: _ -> true | _ -> false) ptr
2112 ->
2113
2114 msg_typedef s i1 18; LP.add_typedef_root s;
2115 TypedefIdent (s, i1)
2116
2117 (* u16: 10; in struct *)
2118 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
2119 when (LP.is_top_or_struct (LP.current_context ()))
2120 && ok_typedef s
2121 ->
2122
2123 msg_typedef s i1 19; LP.add_typedef_root s;
2124 TypedefIdent (s, i1)
2125
2126
2127 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
2128 (* YES cos at toplevel can have some expression !! for instance when *)
2129 (* enter in the dimension of an array *)
2130 (*
2131 | (TIdent s::TMul::TIdent s2::_ , _)
2132 when (take_safe 1 !passed_tok <> [Tstruct] &&
2133 (take_safe 1 !passed_tok <> [Tenum]))
2134 &&
2135 !LP._lexer_hint = Some LP.Toplevel ->
2136 msg_typedef s 20; LP.add_typedef_root s;
2137 TypedefIdent s
2138 *)
2139
2140 (* xx * yy = *)
2141 | (TIdent (s, i1)::ptr , _)
2142 when not_struct_enum before
2143 && ok_typedef s
2144 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2145 ~followed_by_more:(function TEq _ :: _ -> true | _ -> false) ptr
2146 ->
2147
2148 msg_typedef s i1 21; LP.add_typedef_root s;
2149 TypedefIdent (s, i1)
2150
2151
2152 (* xx * yy) AND in paramdecl *)
2153 | (TIdent (s, i1)::ptr , _)
2154 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
2155 && ok_typedef s
2156 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2157 ~followed_by_more:(function TCPar _ :: _ -> true | _ -> false) ptr
2158 ->
2159
2160 msg_typedef s i1 22; LP.add_typedef_root s;
2161 TypedefIdent (s, i1)
2162
2163
2164 (* xx * yy; *) (* wrong ? *)
2165 | (TIdent (s, i1)::ptr ,
2166 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
2167 && ok_typedef s
2168 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2169 ~followed_by_more:(function TPtVirg _ :: _ -> true | _ -> false) ptr
2170 ->
2171
2172 msg_typedef s i1 23; LP.add_typedef_root s;
2173 msg_maybe_dangereous_typedef s;
2174 TypedefIdent (s, i1)
2175
2176
2177 (* xx * yy, and ';' before xx *) (* wrong ? *)
2178 | (TIdent (s, i1)::ptr ,
2179 (TOBrace _| TPtVirg _)::_) when
2180 ok_typedef s
2181 && pointer ~followed_by:(function TIdent _ -> true | _ -> false)
2182 ~followed_by_more:(function TComma _ :: _ -> true | _ -> false) ptr
2183 ->
2184
2185 msg_typedef s i1 24; LP.add_typedef_root s;
2186 TypedefIdent (s, i1)
2187
2188
2189 (* xx_t * yy *)
2190 | (TIdent (s, i1)::ptr , _)
2191 when s ==~ regexp_typedef && not_struct_enum before
2192 (* struct user_info_t sometimes *)
2193 && ok_typedef s
2194 && pointer ~followed_by:(function TIdent _ -> true | _ -> false) ptr
2195 ->
2196
2197 msg_typedef s i1 25; LP.add_typedef_root s;
2198 TypedefIdent (s, i1)
2199
2200 (* xx ** yy *) (* wrong ? *)
2201 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2202 when not_struct_enum before
2203 && (LP.current_context() =*= LP.InParameter)
2204 && ok_typedef s
2205 ->
2206
2207 msg_typedef s i1 26; LP.add_typedef_root s;
2208 TypedefIdent (s, i1)
2209
2210 (* xx ** yy *) (* wrong ? *)
2211 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , (TOBrace _ | TPtVirg _)::_)
2212 when not_struct_enum before
2213 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2214 && ok_typedef s
2215 (* christia : this code catches 'a * *b' which is wrong
2216 *)
2217 ->
2218
2219 msg_typedef s i1 26; LP.add_typedef_root s;
2220 TypedefIdent (s, i1)
2221
2222 (* xx *** yy *)
2223 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2224 when not_struct_enum before
2225 && ok_typedef s
2226 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2227 ->
2228
2229 msg_typedef s i1 27; LP.add_typedef_root s;
2230 TypedefIdent (s, i1)
2231
2232 (* xx ** ) *)
2233 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
2234 when not_struct_enum before
2235 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2236 && ok_typedef s
2237 ->
2238
2239 msg_typedef s i1 28; LP.add_typedef_root s;
2240 TypedefIdent (s, i1)
2241
2242
2243
2244 (* ----------------------------------- *)
2245 (* old: why not do like for other rules and start with TIdent ?
2246 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
2247 * new: prefer now start with TIdent because otherwise the add_typedef_root
2248 * may have no effect if in second pass or if have disable the add_typedef.
2249 *)
2250
2251 (* (xx) yy *)
2252 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
2253 (TOPar info)::x::_)
2254 when not (TH.is_stuff_taking_parenthized x) (* &&
2255 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful?
2256 *)
2257 && ok_typedef s
2258 && not (ident x) (* possible K&R declaration *)
2259 ->
2260
2261 msg_typedef s i1 29; LP.add_typedef_root s;
2262 (*TOPar info*)
2263 TypedefIdent (s, i1)
2264
2265
2266 (* (xx) ( yy)
2267 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
2268 *)
2269 (*
2270 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2271 when not (TH.is_stuff_taking_parenthized x)
2272 && ok_typedef s
2273 ->
2274 msg_typedef s 30; LP.add_typedef_root s;
2275 (* TOPar info *)
2276 TypedefIdent (s, i1)
2277 *)
2278 (* special case: = (xx) ( yy) *)
2279 | (TIdent (s, i1)::TCPar _::((TOPar _::_) as rest) ,
2280 (TOPar info)::(TEq _ |TEqEq _)::_)
2281 when ok_typedef s && paren_before_comma rest
2282 ->
2283
2284 msg_typedef s i1 31; LP.add_typedef_root s;
2285 (* TOPar info *)
2286 TypedefIdent (s, i1)
2287
2288
2289 (* (xx * ) yy *)
2290 | (TIdent (s, i1)::ptr, (TOPar info)::_)
2291 when ok_typedef s
2292 && pointer ~followed_by:(function TCPar _ -> true | _ -> false)
2293 ~followed_by_more:(function TIdent _ :: _ -> true | _ -> false) ptr
2294 ->
2295
2296 msg_typedef s i1 32; LP.add_typedef_root s;
2297 (*TOPar info*)
2298 TypedefIdent (s,i1)
2299
2300
2301 (* (xx){ ... } constructor *)
2302 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
2303 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2304 && ok_typedef s
2305 ->
2306
2307 msg_typedef s i1 33; LP.add_typedef_root s;
2308 TypedefIdent (s, i1)
2309
2310
2311 (* can have sizeof on expression
2312 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
2313 msg_typedef s; LP.add_typedef_root s;
2314 Tsizeof
2315 *)
2316
2317
2318 (* ----------------------------------- *)
2319 (* x ( *y )(params), function pointer *)
2320 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2321 when not_struct_enum before
2322 && ok_typedef s
2323 ->
2324
2325 msg_typedef s i1 34; LP.add_typedef_root s;
2326 TypedefIdent (s, i1)
2327
2328 (* x* ( *y )(params), function pointer 2 *)
2329 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2330 when not_struct_enum before
2331 && ok_typedef s
2332 ->
2333
2334 msg_typedef s i1 35; LP.add_typedef_root s;
2335 TypedefIdent (s, i1)
2336
2337
2338 (*-------------------------------------------------------------*)
2339 (* CPP *)
2340 (*-------------------------------------------------------------*)
2341 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2342 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
2343 as x)
2344 ::_, _
2345 ->
2346
2347 (*
2348 if not !Flag_parsing_c.ifdef_to_if
2349 then TCommentCpp (Ast_c.CppIfDirective, ii)
2350 else
2351 *)
2352 (* not !LP._lexer_hint.toplevel *)
2353 if !Flag_parsing_c.ifdef_directive_passing
2354 || (pass >= 2)
2355 then begin
2356
2357 if (LP.current_context () =*= LP.InInitializer)
2358 then begin
2359 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2360 incr Stat.nIfdefInitializer;
2361 end else begin
2362 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
2363 incr Stat.nIfdefPassing;
2364 end;
2365 let x =
2366 match x with
2367 TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef
2368 | TIfdefBool _ -> Token_c.IfDef0
2369 | TIfdefelse _ | TIfdefelif _ -> Token_c.Else
2370 | TEndif _ -> Token_c.Endif
2371 | _ -> Token_c.Other in (* not possible here *)
2372 TCommentCpp (Token_c.CppIfDirective x, ii)
2373 end
2374 else x
2375
2376 | (TUndef (ii) as x)::_, _
2377 ->
2378 if (pass >= 2)
2379 then begin
2380 pr2_cpp("UNDEF: I treat it as comment");
2381 TCommentCpp (Token_c.CppDirective, ii)
2382 end
2383 else x
2384
2385 | (TCppDirectiveOther (ii) as x)::_, _
2386 ->
2387 if (pass >= 2)
2388 then begin
2389 pr2_cpp ("OTHER directive: I treat it as comment");
2390 TCommentCpp (Token_c.CppDirective, ii)
2391 end
2392 else x
2393
2394 (* If ident contain a for_each, then certainly a macro. But to be
2395 * sure should look if there is a '{' after the ')', but it requires
2396 * to count the '('. Because this can be expensive, we do that only
2397 * when the token contains "for_each".
2398 *)
2399 | (TIdent (s, i1)::TOPar _::rest, _)
2400 when not (LP.current_context () =*= LP.InTopLevel)
2401 (* otherwise a function such as static void loopback_enable(int i) {
2402 * will be considered as a loop
2403 *)
2404 ->
2405
2406 if s ==~ regexp_foreach &&
2407 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
2408
2409 then begin
2410 msg_foreach s;
2411 TMacroIterator (s, i1)
2412 end
2413 else TIdent (s, i1)
2414
2415 (* (* christia: here insert support for macros on top level *)
2416 | TIdent (s, ii) :: tl :: _, _ when
2417 can_be_on_top_level tl && LP.current_context () = InTopLevel ->
2418 pr2_cpp ("'" ^ s ^ "' looks like a macro, I treat it as comment");
2419 TCommentCpp (Token_c.CppDirective, ii)
2420 *)
2421
2422 (*-------------------------------------------------------------*)
2423 | v::xs, _ -> v
2424 | _ -> raise (Impossible 93)
2425
2426 let lookahead ~pass a b =
2427 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
2428
2429