Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15 open Common
16
17 module TH = Token_helpers
18 module TV = Token_views_c
19 module LP = Lexer_parser
20
21 module Stat = Parsing_stat
22
23 open Parser_c
24
25 open TV
26
27 (*****************************************************************************)
28 (* Some debugging functions *)
29 (*****************************************************************************)
30
31 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
32
33 let pr2_cpp s =
34 if !Flag_parsing_c.debug_cpp
35 then Common.pr2_once ("CPP-" ^ s)
36
37
38 let msg_gen cond is_known printer s =
39 if cond
40 then
41 if not (!Flag_parsing_c.filter_msg)
42 then printer s
43 else
44 if not (is_known s)
45 then printer s
46
47
48 (* In the following, there are some harcoded names of types or macros
49 * but they are not used by our heuristics! They are just here to
50 * enable to detect false positive by printing only the typedef/macros
51 * that we don't know yet. If we print everything, then we can easily
52 * get lost with too much verbose tracing information. So those
53 * functions "filter" some messages. So our heuristics are still good,
54 * there is no more (or not that much) hardcoded linux stuff.
55 *)
56
57 let is_known_typdef =
58 (fun s ->
59 (match s with
60 | "u_char" | "u_short" | "u_int" | "u_long"
61 | "u8" | "u16" | "u32" | "u64"
62 | "s8" | "s16" | "s32" | "s64"
63 | "__u8" | "__u16" | "__u32" | "__u64"
64 -> true
65
66 | "acpi_handle"
67 | "acpi_status"
68 -> true
69
70 | "FILE"
71 | "DIR"
72 -> true
73
74 | s when s =~ ".*_t$" -> true
75 | _ -> false
76 )
77 )
78
79 (* note: cant use partial application with let msg_typedef =
80 * because it would compute msg_typedef at compile time when
81 * the flag debug_typedef is always false
82 *)
83 let msg_typedef s =
84 incr Stat.nTypedefInfer;
85 msg_gen (!Flag_parsing_c.debug_typedef)
86 is_known_typdef
87 (fun s ->
88 pr2_cpp ("TYPEDEF: promoting: " ^ s)
89 )
90 s
91
92 let msg_maybe_dangereous_typedef s =
93 if not (is_known_typdef s)
94 then
95 pr2
96 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
97
98
99
100 let msg_declare_macro s =
101 incr Stat.nMacroDecl;
102 msg_gen (!Flag_parsing_c.debug_cpp)
103 (fun s ->
104 (match s with
105 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
106 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
107 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
108 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
109 | "SENSOR_DEVICE_ATTR"
110 | "LIST_HEAD"
111 | "DECLARE_WORK" | "DECLARE_TASKLET"
112 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
113 | "DECLARE_BITMAP"
114
115 -> true
116 (*
117 | s when s =~ "^DECLARE_.*" -> true
118 | s when s =~ ".*_ATTR$" -> true
119 | s when s =~ "^DEFINE_.*" -> true
120 *)
121
122 | _ -> false
123 )
124 )
125 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
126 s
127
128
129 let msg_foreach s =
130 incr Stat.nIteratorHeuristic;
131 pr2_cpp ("MACRO: found foreach: " ^ s)
132
133
134 (* ??
135 let msg_debug_macro s =
136 pr2_cpp ("MACRO: found debug-macro: " ^ s)
137 *)
138
139
140 let msg_macro_noptvirg s =
141 incr Stat.nMacroStmt;
142 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
143
144 let msg_macro_toplevel_noptvirg s =
145 incr Stat.nMacroStmt;
146 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
147
148 let msg_macro_noptvirg_single s =
149 incr Stat.nMacroStmt;
150 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
151
152
153
154
155 let msg_macro_higher_order s =
156 incr Stat.nMacroHigherOrder;
157 msg_gen (!Flag_parsing_c.debug_cpp)
158 (fun s ->
159 (match s with
160 | "DBGINFO"
161 | "DBGPX"
162 | "DFLOW"
163 -> true
164 | _ -> false
165 )
166 )
167 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
168 s
169
170
171 let msg_stringification s =
172 incr Stat.nMacroString;
173 msg_gen (!Flag_parsing_c.debug_cpp)
174 (fun s ->
175 (match s with
176 | "REVISION"
177 | "UTS_RELEASE"
178 | "SIZE_STR"
179 | "DMA_STR"
180 -> true
181 (* s when s =~ ".*STR.*" -> true *)
182 | _ -> false
183 )
184 )
185 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
186 s
187
188 let msg_stringification_params s =
189 incr Stat.nMacroString;
190 pr2_cpp ("MACRO: string-macro with params : " ^ s)
191
192
193
194 let msg_apply_known_macro s =
195 incr Stat.nMacroExpand;
196 pr2_cpp ("MACRO: found known macro = " ^ s)
197
198 let msg_apply_known_macro_hint s =
199 incr Stat.nMacroHint;
200 pr2_cpp ("MACRO: found known macro hint = " ^ s)
201
202
203
204
205 let msg_ifdef_bool_passing is_ifdef_positif =
206 incr Stat.nIfdefZero; (* of Version ? *)
207 if is_ifdef_positif
208 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
209 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
210
211
212 let msg_ifdef_mid_something () =
213 incr Stat.nIfdefExprPassing;
214 pr2_cpp "found ifdef-mid-something"
215
216 let msg_ifdef_funheaders () =
217 incr Stat.nIfdefFunheader;
218 ()
219
220 let msg_ifdef_cparen_else () =
221 incr Stat.nIfdefPassing;
222 pr2_cpp("found ifdef-cparen-else")
223
224
225 let msg_attribute s =
226 incr Stat.nMacroAttribute;
227 pr2_cpp("ATTR:" ^ s)
228
229
230
231 (*****************************************************************************)
232 (* The regexp and basic view definitions *)
233 (*****************************************************************************)
234
235 (* opti: better to built then once and for all, especially regexp_foreach *)
236
237 let regexp_macro = Str.regexp
238 "^[A-Z_][A-Z_0-9]*$"
239
240 (* linuxext: *)
241 let regexp_annot = Str.regexp
242 "^__.*$"
243
244 (* linuxext: *)
245 let regexp_declare = Str.regexp
246 ".*DECLARE.*"
247
248 (* linuxext: *)
249 let regexp_foreach = Str.regexp_case_fold
250 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
251
252 let regexp_typedef = Str.regexp
253 ".*_t$"
254
255 let false_typedef = [
256 "printk";
257 ]
258
259
260 let ok_typedef s = not (List.mem s false_typedef)
261
262 let not_annot s =
263 not (s ==~ regexp_annot)
264
265
266
267
268 (*****************************************************************************)
269 (* Helpers *)
270 (*****************************************************************************)
271
272 (* ------------------------------------------------------------------------- *)
273 (* the pair is the status of '()' and '{}', ex: (-1,0)
274 * if too much ')' and good '{}'
275 * could do for [] too ?
276 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
277 * then if have ifdef, then certainly can lead to a problem.
278 *)
279 let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))=
280 fun xs ->
281 let cnt_paren, cnt_brace = ref 0, ref 0 in
282 xs +> TV.iter_token_ifdef (fun x ->
283 (match x.tok with
284 | x when TH.is_opar x -> incr cnt_paren
285 | TOBrace _ -> incr cnt_brace
286 | x when TH.is_cpar x -> decr cnt_paren
287 | TCBrace _ -> decr cnt_brace
288 | _ -> ()
289 )
290 );
291 !cnt_paren, !cnt_brace
292
293
294 (* ------------------------------------------------------------------------- *)
295 let forLOOKAHEAD = 30
296
297
298 (* look if there is a '{' just after the closing ')', and handling the
299 * possibility to have nested expressions inside nested parenthesis
300 *
301 * todo: use indentation instead of premier(statement) ?
302 *)
303 let rec is_really_foreach xs =
304 let rec is_foreach_aux = function
305 | [] -> false, []
306 | TCPar _::TOBrace _::xs -> true, xs
307 (* the following attempts to handle the cases where there is a
308 single statement in the body of the loop. undoubtedly more
309 cases are needed.
310 todo: premier(statement) - suivant(funcall)
311 *)
312 | TCPar _::TIdent _::xs -> true, xs
313 | TCPar _::Tif _::xs -> true, xs
314 | TCPar _::Twhile _::xs -> true, xs
315 | TCPar _::Tfor _::xs -> true, xs
316 | TCPar _::Tswitch _::xs -> true, xs
317 | TCPar _::Treturn _::xs -> true, xs
318
319
320 | TCPar _::xs -> false, xs
321 | TOPar _::xs ->
322 let (_, xs') = is_foreach_aux xs in
323 is_foreach_aux xs'
324 | x::xs -> is_foreach_aux xs
325 in
326 is_foreach_aux xs +> fst
327
328
329 (* ------------------------------------------------------------------------- *)
330 let set_ifdef_token_parenthize_info cnt x =
331 match x with
332 | TIfdef (tag, _)
333 | TIfdefelse (tag, _)
334 | TIfdefelif (tag, _)
335 | TEndif (tag, _)
336
337 | TIfdefBool (_, tag, _)
338 | TIfdefMisc (_, tag, _)
339 | TIfdefVersion (_, tag, _)
340 ->
341 tag := Some cnt;
342
343 | _ -> raise Impossible
344
345
346
347 let ifdef_paren_cnt = ref 0
348
349
350 let rec set_ifdef_parenthize_info xs =
351 xs +> List.iter (function
352 | NotIfdefLine xs -> ()
353 | Ifdefbool (_, xxs, info_ifdef)
354 | Ifdef (xxs, info_ifdef) ->
355
356 incr ifdef_paren_cnt;
357 let total_directives = List.length info_ifdef in
358
359 info_ifdef +> List.iter (fun x ->
360 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
361 x.tok);
362 xxs +> List.iter set_ifdef_parenthize_info
363 )
364
365
366 (*****************************************************************************)
367 (* CPP handling: macros, ifdefs, macros defs *)
368 (*****************************************************************************)
369
370 (* ------------------------------------------------------------------------- *)
371 (* special skip_start skip_end handling *)
372 (* ------------------------------------------------------------------------- *)
373
374 (* note: after this normally the token list should not contain any more the
375 * TCommentSkipTagStart and End tokens.
376 *)
377 let rec commentize_skip_start_to_end xs =
378 match xs with
379 | [] -> ()
380 | x::xs ->
381 (match x with
382 | {tok = TCommentSkipTagStart info} ->
383 (try
384 let (before, x2, after) =
385 xs +> Common.split_when (function
386 | {tok = TCommentSkipTagEnd _ } -> true
387 | _ -> false
388 )
389 in
390 let topass = x::x2::before in
391 topass +> List.iter (fun tok ->
392 set_as_comment Token_c.CppPassingExplicit tok
393 );
394 commentize_skip_start_to_end after
395 with Not_found ->
396 failwith "could not find end of skip_start special comment"
397 )
398 | {tok = TCommentSkipTagEnd info} ->
399 failwith "found skip_end comment but no skip_start"
400 | _ ->
401 commentize_skip_start_to_end xs
402 )
403
404
405
406
407 (* ------------------------------------------------------------------------- *)
408 (* ifdef keeping/passing *)
409 (* ------------------------------------------------------------------------- *)
410
411 (* #if 0, #if 1, #if LINUX_VERSION handling *)
412 let rec find_ifdef_bool xs =
413 xs +> List.iter (function
414 | NotIfdefLine _ -> ()
415 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
416
417 msg_ifdef_bool_passing is_ifdef_positif;
418
419 (match xxs with
420 | [] -> raise Impossible
421 | firstclause::xxs ->
422 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
423
424 if is_ifdef_positif
425 then xxs +> List.iter
426 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
427 else begin
428 firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal);
429 (match List.rev xxs with
430 (* keep only last *)
431 | last::startxs ->
432 startxs +> List.iter
433 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
434 | [] -> (* not #else *) ()
435 );
436 end
437 );
438
439 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
440 )
441
442
443
444 let thresholdIfdefSizeMid = 6
445
446 (* infer ifdef involving not-closed expressions/statements *)
447 let rec find_ifdef_mid xs =
448 xs +> List.iter (function
449 | NotIfdefLine _ -> ()
450 | Ifdef (xxs, info_ifdef_stmt) ->
451 (match xxs with
452 | [] -> raise Impossible
453 | [first] -> ()
454 | first::second::rest ->
455 (* don't analyse big ifdef *)
456 if xxs +> List.for_all
457 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
458 (* don't want nested ifdef *)
459 xxs +> List.for_all (fun xs ->
460 xs +> List.for_all
461 (function NotIfdefLine _ -> true | _ -> false)
462 )
463
464 then
465 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
466 let cnt1, cnt2 = List.hd counts in
467 if cnt1 <> 0 || cnt2 <> 0 &&
468 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
469 (*
470 if counts +> List.exists (fun (cnt1, cnt2) ->
471 cnt1 <> 0 || cnt2 <> 0
472 )
473 *)
474 then begin
475 msg_ifdef_mid_something();
476
477 (* keep only first, treat the rest as comment *)
478 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
479 (second::rest) +> List.iter
480 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
481 end
482
483 );
484 List.iter find_ifdef_mid xxs
485
486 (* no need complex analysis for ifdefbool *)
487 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
488 List.iter find_ifdef_mid xxs
489
490
491 )
492
493
494 let thresholdFunheaderLimit = 4
495
496 (* ifdef defining alternate function header, type *)
497 let rec find_ifdef_funheaders = function
498 | [] -> ()
499 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
500
501 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
502 | Ifdef
503 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
504 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
505 ], info_ifdef_stmt
506 )
507 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
508 ::xs
509 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
510 List.length ifdefblock2 <= thresholdFunheaderLimit
511 ->
512 find_ifdef_funheaders xs;
513
514 msg_ifdef_funheaders ();
515 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
516 let all_toks = [xline2] @ line2 in
517 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ;
518 ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError);
519
520 (* ifdef with nested ifdef *)
521 | Ifdef
522 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
523 [Ifdef
524 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
525 [NotIfdefLine (({col = 0} as xline3)::line3)];
526 ], info_ifdef_stmt2
527 )
528 ]
529 ], info_ifdef_stmt
530 )
531 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
532 ::xs
533 ->
534 find_ifdef_funheaders xs;
535
536 msg_ifdef_funheaders ();
537 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
538 info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective);
539 let all_toks = [xline2;xline3] @ line2 @ line3 in
540 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError);
541
542 (* ifdef with elseif *)
543 | Ifdef
544 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
545 [NotIfdefLine (({col = 0} as xline2)::line2)];
546 [NotIfdefLine (({col = 0} as xline3)::line3)];
547 ], info_ifdef_stmt
548 )
549 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
550 ::xs
551 ->
552 find_ifdef_funheaders xs;
553
554 msg_ifdef_funheaders ();
555 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
556 let all_toks = [xline2;xline3] @ line2 @ line3 in
557 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError)
558
559 (* recurse *)
560 | Ifdef (xxs,info_ifdef_stmt)::xs
561 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
562 List.iter find_ifdef_funheaders xxs;
563 find_ifdef_funheaders xs
564
565
566
567 (* ?? *)
568 let rec adjust_inifdef_include xs =
569 xs +> List.iter (function
570 | NotIfdefLine _ -> ()
571 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
572 xxs +> List.iter (iter_token_ifdef (fun tokext ->
573 match tokext.tok with
574 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
575 inifdef_ref := true;
576 | _ -> ()
577 ));
578 )
579
580
581
582
583
584
585
586 let rec find_ifdef_cparen_else xs =
587 let rec aux xs =
588 xs +> List.iter (function
589 | NotIfdefLine _ -> ()
590 | Ifdef (xxs, info_ifdef_stmt) ->
591 (match xxs with
592 | [] -> raise Impossible
593 | [first] -> ()
594 | first::second::rest ->
595
596 (* found a closing ')' just after the #else *)
597
598 (* Too bad ocaml does not support better list pattern matching
599 * a la Prolog-III where can match the end of lists.
600 *)
601 let condition =
602 if List.length first = 0 then false
603 else
604 let last_line = Common.last first in
605 match last_line with
606 | NotIfdefLine xs ->
607 if List.length xs = 0 then false
608 else
609 let last_tok = Common.last xs in
610 TH.is_cpar last_tok.tok
611 | Ifdef _ | Ifdefbool _ -> false
612 in
613 if condition then begin
614 msg_ifdef_cparen_else();
615
616 (* keep only first, treat the rest as comment *)
617 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
618 (second::rest) +> List.iter
619 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
620 end
621
622 );
623 List.iter aux xxs
624
625 (* no need complex analysis for ifdefbool *)
626 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
627 List.iter aux xxs
628 )
629 in aux xs
630
631
632 (* ------------------------------------------------------------------------- *)
633 (* cpp-builtin part2, macro, using standard.h or other defs *)
634 (* ------------------------------------------------------------------------- *)
635
636 (* now in cpp_token_c.ml *)
637
638 (* ------------------------------------------------------------------------- *)
639 (* stringification *)
640 (* ------------------------------------------------------------------------- *)
641
642 let rec find_string_macro_paren xs =
643 match xs with
644 | [] -> ()
645 | Parenthised(xxs, info_parens)::xs ->
646 xxs +> List.iter (fun xs ->
647 if xs +> List.exists
648 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
649 xs +> List.for_all
650 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
651 true | _ -> false)
652 then
653 xs +> List.iter (fun tok ->
654 match tok with
655 | PToken({tok = TIdent (s,_)} as id) ->
656 msg_stringification s;
657 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
658 | _ -> ()
659 )
660 else
661 find_string_macro_paren xs
662 );
663 find_string_macro_paren xs
664 | PToken(tok)::xs ->
665 find_string_macro_paren xs
666
667
668 (* ------------------------------------------------------------------------- *)
669 (* macro2 *)
670 (* ------------------------------------------------------------------------- *)
671
672 (* don't forget to recurse in each case *)
673 let rec find_macro_paren xs =
674 match xs with
675 | [] -> ()
676
677 (* attribute *)
678 | PToken ({tok = Tattribute _} as id)
679 ::Parenthised (xxs,info_parens)
680 ::xs
681 ->
682 pr2_cpp ("MACRO: __attribute detected ");
683 [Parenthised (xxs, info_parens)] +>
684 iter_token_paren (set_as_comment Token_c.CppAttr);
685 set_as_comment Token_c.CppAttr id;
686 find_macro_paren xs
687
688 (*
689 (* attribute cpp, __xxx id *)
690 | PToken ({tok = TIdent (s,i1)} as id)
691 ::PToken ({tok = TIdent (s2, i2)} as id2)
692 ::xs when s ==~ regexp_annot
693 ->
694 msg_attribute s;
695 id.tok <- TMacroAttr (s, i1);
696 find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *)
697
698 (* attribute cpp, id __xxx *)
699 | PToken ({tok = TIdent (s,i1)} as _id)
700 ::PToken ({tok = TIdent (s2, i2)} as id2)
701 ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef))
702 ->
703 msg_attribute s2;
704 id2.tok <- TMacroAttr (s2, i2);
705 find_macro_paren xs
706
707 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
708 ::PToken ({tok = TIdent (s,i1)} as attr)
709 ::xs when s ==~ regexp_annot
710 ->
711 pr2_cpp ("storage attribute: " ^ s);
712 attr.tok <- TMacroAttrStorage (s,i1);
713 (* recurse, may have other storage attributes *)
714 find_macro_paren (PToken (tok1)::xs)
715
716
717 *)
718
719 (* storage attribute *)
720 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
721 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
722 ->
723 pr2_cpp ("storage attribute: " ^ s);
724 attr.tok <- TMacroAttrStorage (s,i1);
725 (* recurse, may have other storage attributes *)
726 find_macro_paren (PToken (tok1)::xs)
727
728
729 (* stringification
730 *
731 * the order of the matching clause is important
732 *
733 *)
734
735 (* string macro with params, before case *)
736 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
737 ::Parenthised (xxs, info_parens)
738 ::xs ->
739
740 msg_stringification_params s;
741 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
742 [Parenthised (xxs, info_parens)] +>
743 iter_token_paren (set_as_comment Token_c.CppMacro);
744 find_macro_paren xs
745
746 (* after case *)
747 | PToken ({tok = TIdent (s,_)} as id)
748 ::Parenthised (xxs, info_parens)
749 ::PToken ({tok = (TString _ | TMacroString _)})
750 ::xs ->
751
752 msg_stringification_params s;
753 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
754 [Parenthised (xxs, info_parens)] +>
755 iter_token_paren (set_as_comment Token_c.CppMacro);
756 find_macro_paren xs
757
758
759 (* for the case where the string is not inside a funcall, but
760 * for instance in an initializer.
761 *)
762
763 (* string macro variable, before case *)
764 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
765 ::xs ->
766
767 msg_stringification s;
768 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
769 find_macro_paren xs
770
771 (* after case *)
772 | PToken ({tok = TIdent (s,_)} as id)
773 ::PToken ({tok = (TString _ | TMacroString _)})
774 ::xs ->
775
776 msg_stringification s;
777 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
778 find_macro_paren xs
779
780
781
782
783
784 (* recurse *)
785 | (PToken x)::xs -> find_macro_paren xs
786 | (Parenthised (xxs, info_parens))::xs ->
787 xxs +> List.iter find_macro_paren;
788 find_macro_paren xs
789
790
791
792
793
794 (* don't forget to recurse in each case *)
795 let rec find_macro_lineparen xs =
796 match xs with
797 | [] -> ()
798
799 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
800 | (Line
801 (
802 [PToken ({tok = Tstatic _});
803 PToken ({tok = TIdent (s,_)} as macro);
804 Parenthised (xxs,info_parens);
805 PToken ({tok = TPtVirg _});
806 ]
807 ))
808 ::xs
809 when (s ==~ regexp_macro) ->
810
811 msg_declare_macro s;
812 let info = TH.info_of_tok macro.tok in
813 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
814
815 find_macro_lineparen (xs)
816
817 (* the static const case *)
818 | (Line
819 (
820 [PToken ({tok = Tstatic _});
821 PToken ({tok = Tconst _} as const);
822 PToken ({tok = TIdent (s,_)} as macro);
823 Parenthised (xxs,info_parens);
824 PToken ({tok = TPtVirg _});
825 ]
826 (*as line1*)
827
828 ))
829 ::xs
830 when (s ==~ regexp_macro) ->
831
832 msg_declare_macro s;
833 let info = TH.info_of_tok macro.tok in
834 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
835
836 (* need retag this const, otherwise ambiguity in grammar
837 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
838 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
839 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
840 storage_class_spec : Tstatic . (137)
841 *)
842 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
843
844 find_macro_lineparen (xs)
845
846
847 (* same but without trailing ';'
848 *
849 * I do not put the final ';' because it can be on a multiline and
850 * because of the way mk_line is coded, we will not have access to
851 * this ';' on the next line, even if next to the ')' *)
852 | (Line
853 ([PToken ({tok = Tstatic _});
854 PToken ({tok = TIdent (s,_)} as macro);
855 Parenthised (xxs,info_parens);
856 ]
857 ))
858 ::xs
859 when s ==~ regexp_macro ->
860
861 msg_declare_macro s;
862 let info = TH.info_of_tok macro.tok in
863 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
864
865 find_macro_lineparen (xs)
866
867
868
869
870 (* on multiple lines *)
871 | (Line
872 (
873 (PToken ({tok = Tstatic _})::[]
874 )))
875 ::(Line
876 (
877 [PToken ({tok = TIdent (s,_)} as macro);
878 Parenthised (xxs,info_parens);
879 PToken ({tok = TPtVirg _});
880 ]
881 )
882 )
883 ::xs
884 when (s ==~ regexp_macro) ->
885
886 msg_declare_macro s;
887 let info = TH.info_of_tok macro.tok in
888 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
889
890 find_macro_lineparen (xs)
891
892
893 (* linuxext: ex: DECLARE_BITMAP();
894 *
895 * Here I use regexp_declare and not regexp_macro because
896 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
897 * Here we don't have the preceding 'static' so only way to
898 * not have positive is to restrict to .*DECLARE.* macros.
899 *
900 * but there is a grammar rule for that, so don't need this case anymore
901 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
902 * on a argument_list
903 *)
904
905 | (Line
906 ([PToken ({tok = TIdent (s,_)} as macro);
907 Parenthised (xxs,info_parens);
908 PToken ({tok = TPtVirg _});
909 ]
910 ))
911 ::xs
912 when (s ==~ regexp_declare) ->
913
914 msg_declare_macro s;
915 let info = TH.info_of_tok macro.tok in
916 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
917
918 find_macro_lineparen (xs)
919
920
921 (* toplevel macros.
922 * module_init(xxx)
923 *
924 * Could also transform the TIdent in a TMacroTop but can have false
925 * positive, so easier to just change the TCPar and so just solve
926 * the end-of-stream pb of ocamlyacc
927 *)
928 | (Line
929 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
930 Parenthised (xxs,info_parens);
931 ] as _line1
932 ))
933 ::xs when col1 =|= 0
934 ->
935 let condition =
936 (* to reduce number of false positive *)
937 (match xs with
938 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
939 TH.is_eof other.tok || (col2 =|= 0 &&
940 (match other.tok with
941 | TOBrace _ -> false (* otherwise would match funcdecl *)
942 | TCBrace _ when ctx <> InFunction -> false
943 | TPtVirg _
944 | TDotDot _
945 -> false
946 | tok when TH.is_binary_operator tok -> false
947
948 | _ -> true
949 )
950 )
951 | _ -> false
952 )
953 in
954 if condition
955 then begin
956
957 msg_macro_toplevel_noptvirg s;
958 (* just to avoid the end-of-stream pb of ocamlyacc *)
959 let tcpar = Common.last info_parens in
960 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
961
962 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
963
964 end;
965
966 find_macro_lineparen (xs)
967
968
969
970 (* macro with parameters
971 * ex: DEBUG()
972 * return x;
973 *)
974 | (Line
975 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
976 Parenthised (xxs,info_parens);
977 ] as _line1
978 ))
979 ::(Line
980 (PToken ({col = col2 } as other)::restline2
981 ) as line2)
982 ::xs
983 (* when s ==~ regexp_macro *)
984 ->
985 let condition =
986 (col1 =|= col2 &&
987 (match other.tok with
988 | TOBrace _ -> false (* otherwise would match funcdecl *)
989 | TCBrace _ when ctx <> InFunction -> false
990 | TPtVirg _
991 | TDotDot _
992 -> false
993 | tok when TH.is_binary_operator tok -> false
994
995 | _ -> true
996 )
997 )
998 ||
999 (col2 <= col1 &&
1000 (match other.tok, restline2 with
1001 | TCBrace _, _ when ctx =*= InFunction -> true
1002 | Treturn _, _ -> true
1003 | Tif _, _ -> true
1004 | Telse _, _ -> true
1005
1006 (* case of label, usually put in first line *)
1007 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
1008 true
1009
1010
1011 | _ -> false
1012 )
1013 )
1014
1015 in
1016
1017 if condition
1018 then
1019 if col1 =|= 0 then ()
1020 else begin
1021 msg_macro_noptvirg s;
1022 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1023 [Parenthised (xxs, info_parens)] +>
1024 iter_token_paren (set_as_comment Token_c.CppMacro);
1025 end;
1026
1027 find_macro_lineparen (line2::xs)
1028
1029 (* linuxext:? single macro
1030 * ex: LOCK
1031 * foo();
1032 * UNLOCK
1033 *
1034 * todo: factorize code with previous rule ?
1035 *)
1036 | (Line
1037 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1038 ] as _line1
1039 ))
1040 ::(Line
1041 (PToken ({col = col2 } as other)::restline2
1042 ) as line2)
1043 ::xs ->
1044 (* when s ==~ regexp_macro *)
1045
1046 let condition =
1047 (col1 =|= col2 &&
1048 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1049 (match other.tok with
1050 | TPtVirg _ -> false
1051 | TOr _ -> false
1052 | TCBrace _ when ctx <> InFunction -> false
1053 | tok when TH.is_binary_operator tok -> false
1054
1055 | _ -> true
1056 )) ||
1057 (col2 <= col1 &&
1058 (match other.tok with
1059 | TCBrace _ when ctx =*= InFunction -> true
1060 | Treturn _ -> true
1061 | Tif _ -> true
1062 | Telse _ -> true
1063 | _ -> false
1064 ))
1065 in
1066
1067 if condition
1068 then begin
1069 msg_macro_noptvirg_single s;
1070 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1071 end;
1072 find_macro_lineparen (line2::xs)
1073
1074 | x::xs ->
1075 find_macro_lineparen xs
1076
1077
1078
1079 (* ------------------------------------------------------------------------- *)
1080 (* define tobrace init *)
1081 (* ------------------------------------------------------------------------- *)
1082
1083 let rec find_define_init_brace_paren xs =
1084 let rec aux xs =
1085 match xs with
1086 | [] -> ()
1087
1088 (* mainly for firefox *)
1089 | (PToken {tok = TDefine _})
1090 ::(PToken {tok = TIdentDefine (s,_)})
1091 ::(PToken ({tok = TOBrace i1} as tokbrace))
1092 ::(PToken tok2)
1093 ::(PToken tok3)
1094 ::xs ->
1095 let is_init =
1096 match tok2.tok, tok3.tok with
1097 | TInt _, TComma _ -> true
1098 | TString _, TComma _ -> true
1099 | TIdent _, TComma _ -> true
1100 | _ -> false
1101
1102 in
1103 if is_init
1104 then begin
1105 pr2_cpp("found define initializer: " ^s);
1106 tokbrace.tok <- TOBraceDefineInit i1;
1107 end;
1108
1109 aux xs
1110
1111 (* mainly for linux, especially in sound/ *)
1112 | (PToken {tok = TDefine _})
1113 ::(PToken {tok = TIdentDefine (s,_)})
1114 ::(Parenthised(xxx, info_parens))
1115 ::(PToken ({tok = TOBrace i1} as tokbrace))
1116 ::(PToken tok2)
1117 ::(PToken tok3)
1118 ::xs ->
1119 let is_init =
1120 match tok2.tok, tok3.tok with
1121 | TInt _, TComma _ -> true
1122 | TDot _, TIdent _ -> true
1123 | TIdent _, TComma _ -> true
1124 | _ -> false
1125
1126 in
1127 if is_init
1128 then begin
1129 pr2_cpp("found define initializer with param: " ^ s);
1130 tokbrace.tok <- TOBraceDefineInit i1;
1131 end;
1132
1133 aux xs
1134
1135
1136
1137 (* recurse *)
1138 | (PToken x)::xs -> aux xs
1139 | (Parenthised (xxs, info_parens))::xs ->
1140 (* not need for tobrace init:
1141 * xxs +> List.iter aux;
1142 *)
1143 aux xs
1144 in
1145 aux xs
1146
1147
1148 (* ------------------------------------------------------------------------- *)
1149 (* action *)
1150 (* ------------------------------------------------------------------------- *)
1151
1152 (* obsolete now with macro expansion ? get some regression if comment.
1153 * todo: if do bad decision here, then it can influence other phases
1154 * and make it hard to parse. So maybe when have a parse error, should
1155 * undo some of the guess those heuristics have done, and restore
1156 * the original token value.
1157 *)
1158
1159 let rec find_actions = function
1160 | [] -> ()
1161
1162 | PToken ({tok = TIdent (s,ii)})
1163 ::Parenthised (xxs,info_parens)
1164 ::xs ->
1165 find_actions xs;
1166 xxs +> List.iter find_actions;
1167 let modified = find_actions_params xxs in
1168 if modified
1169 then msg_macro_higher_order s
1170
1171 | x::xs ->
1172 find_actions xs
1173
1174 and find_actions_params xxs =
1175 xxs +> List.fold_left (fun acc xs ->
1176 let toks = tokens_of_paren xs in
1177 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1178 (* undo: && List.length toks > 1
1179 * good for sparse, not good for linux
1180 *)
1181 then begin
1182 xs +> iter_token_paren (fun x ->
1183 if TH.is_eof x.tok
1184 then
1185 (* certainly because paren detection had a pb because of
1186 * some ifdef-exp. Do similar additional checking than
1187 * what is done in set_as_comment.
1188 *)
1189 pr2 "PB: weird, I try to tag an EOF token as an action"
1190 else
1191 (* cf tests-bis/no_cpar_macro.c *)
1192 if TH.is_eom x.tok
1193 then
1194 pr2 "PB: weird, I try to tag an EOM token as an action"
1195 else
1196 x.tok <- TAction (TH.info_of_tok x.tok);
1197 );
1198 true (* modified *)
1199 end
1200 else acc
1201 ) false
1202
1203
1204
1205 (* ------------------------------------------------------------------------- *)
1206 (* main fix cpp function *)
1207 (* ------------------------------------------------------------------------- *)
1208
1209 let filter_cpp_stuff xs =
1210 let rec aux xs =
1211 match xs with
1212 | [] -> []
1213 | x::xs ->
1214 (match x.tok with
1215 | tok when TH.is_comment tok -> aux xs
1216 (* don't want drop the define, or if drop, have to drop
1217 * also its body otherwise the line heuristics may be lost
1218 * by not finding the TDefine in column 0 but by finding
1219 * a TDefineIdent in a column > 0
1220 *)
1221 | Parser_c.TDefine _ ->
1222 x::aux xs
1223 | tok when TH.is_cpp_instruction tok -> aux xs
1224 | _ -> x::aux xs
1225 )
1226 in
1227 aux xs
1228
1229 let insert_virtual_positions l =
1230 let strlen x = String.length (Ast_c.str_of_info x) in
1231 let rec loop prev offset acc = function
1232 [] -> List.rev acc
1233 | x::xs ->
1234 let ii = TH.info_of_tok x in
1235 let inject pi =
1236 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1237 match Ast_c.pinfo_of_info ii with
1238 Ast_c.OriginTok pi ->
1239 let prev = Ast_c.parse_info_of_info ii in
1240 loop prev (strlen ii) (x::acc) xs
1241 | Ast_c.ExpandedTok (pi,_) ->
1242 let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in
1243 loop prev (offset + (strlen ii)) (x'::acc) xs
1244 | Ast_c.FakeTok (s,_) ->
1245 let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in
1246 loop prev (offset + (strlen ii)) (x'::acc) xs
1247 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1248 let rec skip_fake = function
1249 | [] -> []
1250 | x::xs ->
1251 let ii = TH.info_of_tok x in
1252 match Ast_c.pinfo_of_info ii with
1253 | Ast_c.OriginTok pi ->
1254 let prev = Ast_c.parse_info_of_info ii in
1255 let res = loop prev (strlen ii) [] xs in
1256 x::res
1257 | _ -> x::skip_fake xs in
1258 skip_fake l
1259
1260
1261 (* ------------------------------------------------------------------------- *)
1262 let fix_tokens_cpp2 ~macro_defs tokens =
1263 let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in
1264
1265 begin
1266 (* the order is important, if you put the action heuristic first,
1267 * then because of ifdef, can have not closed paren
1268 * and so may believe that higher order macro
1269 * and it will eat too much tokens. So important to do
1270 * first the ifdef.
1271 *
1272 * I recompute multiple times cleaner cos the mutable
1273 * can have be changed and so may have more comments
1274 * in the token original list.
1275 *
1276 *)
1277
1278 commentize_skip_start_to_end !tokens2;
1279
1280 (* ifdef *)
1281 let cleaner = !tokens2 +> List.filter (fun x ->
1282 (* is_comment will also filter the TCommentCpp created in
1283 * commentize_skip_start_to_end *)
1284 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1285 ) in
1286 let ifdef_grouped = TV.mk_ifdef cleaner in
1287 set_ifdef_parenthize_info ifdef_grouped;
1288
1289 find_ifdef_funheaders ifdef_grouped;
1290 find_ifdef_bool ifdef_grouped;
1291 find_ifdef_mid ifdef_grouped;
1292 (* change order ? maybe cparen_else heuristic make some of the funheaders
1293 * heuristics irrelevant ?
1294 *)
1295 find_ifdef_cparen_else ifdef_grouped;
1296 adjust_inifdef_include ifdef_grouped;
1297
1298
1299 (* macro 1 *)
1300 let cleaner = !tokens2 +> filter_cpp_stuff in
1301
1302 let paren_grouped = TV.mk_parenthised cleaner in
1303 Cpp_token_c.apply_macro_defs
1304 ~msg_apply_known_macro
1305 ~msg_apply_known_macro_hint
1306 macro_defs paren_grouped;
1307 (* because the before field is used by apply_macro_defs *)
1308 tokens2 := TV.rebuild_tokens_extented !tokens2;
1309
1310 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1311 * that after the "ifdef-simplification" phase.
1312 *)
1313 let cleaner = !tokens2 +> List.filter (fun x ->
1314 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1315 ) in
1316
1317 let brace_grouped = TV.mk_braceised cleaner in
1318 set_context_tag brace_grouped;
1319
1320
1321
1322 (* macro *)
1323 let cleaner = !tokens2 +> filter_cpp_stuff in
1324
1325 let paren_grouped = TV.mk_parenthised cleaner in
1326 let line_paren_grouped = TV.mk_line_parenthised paren_grouped in
1327 find_define_init_brace_paren paren_grouped;
1328 find_string_macro_paren paren_grouped;
1329 find_macro_lineparen line_paren_grouped;
1330 find_macro_paren paren_grouped;
1331
1332
1333 (* obsolete: actions ? not yet *)
1334 let cleaner = !tokens2 +> filter_cpp_stuff in
1335 let paren_grouped = TV.mk_parenthised cleaner in
1336 find_actions paren_grouped;
1337
1338
1339
1340 insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok))
1341 end
1342
1343 let time_hack1 ~macro_defs a =
1344 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a)
1345
1346 let fix_tokens_cpp ~macro_defs a =
1347 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a)
1348
1349
1350
1351 (*****************************************************************************)
1352 (* for the cpp-builtin, standard.h, part 0 *)
1353 (*****************************************************************************)
1354
1355 (* now in cpp_token_c.ml *)
1356
1357 (*****************************************************************************)
1358 (* Lexing with lookahead *)
1359 (*****************************************************************************)
1360
1361 (* Why using yet another parsing_hack technique ? The fix_xxx where do
1362 * some pre-processing on the full list of tokens is not enough ?
1363 * No cos sometimes we need more contextual info, and even if
1364 * set_context() tries to give some contextual info, it's not completely
1365 * accurate so the following code give yet another alternative, yet another
1366 * chance to transform some tokens.
1367 *
1368 * todo?: maybe could try to get rid of this technique. Maybe a better
1369 * set_context() would make possible to move this code using a fix_xx
1370 * technique.
1371 *
1372 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
1373 * it is more general to do it via my LALR(k) tech. Because here we can
1374 * transform some token give some context information. So sometimes it
1375 * makes sense to transform a token in one context, sometimes not, and
1376 * lex can not provide us this context information. Note that the order
1377 * in the pattern matching in lookahead is important. Do not cut/paste.
1378 *
1379 * Note that in next there is only "clean" tokens, there is no comment
1380 * or space tokens. This is done by the caller.
1381 *
1382 *)
1383
1384 open Lexer_parser (* for the fields of lexer_hint type *)
1385
1386 let not_struct_enum = function
1387 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
1388 | _ -> true
1389
1390
1391 let lookahead2 ~pass next before =
1392
1393 match (next, before) with
1394
1395 (*-------------------------------------------------------------*)
1396 (* typedef inference, parse_typedef_fix3 *)
1397 (*-------------------------------------------------------------*)
1398 (* xx xx *)
1399 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
1400 && ok_typedef s
1401 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
1402 ->
1403 (* parse_typedef_fix3:
1404 * acpi_object acpi_object;
1405 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
1406 * Le parser en interne a deja appelé le prochain token pour pouvoir
1407 * decider des choses.
1408 * => special case in lexer_heuristic, again
1409 *)
1410 if !Flag_parsing_c.debug_typedef
1411 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
1412
1413 LP.disable_typedef();
1414
1415 msg_typedef s; LP.add_typedef_root s;
1416 TypedefIdent (s, i1)
1417
1418 (* xx yy *)
1419 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
1420 && ok_typedef s
1421 ->
1422 (* && not_annot s2 BUT lead to false positive*)
1423
1424 msg_typedef s; LP.add_typedef_root s;
1425 TypedefIdent (s, i1)
1426
1427
1428 (* xx inline *)
1429 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
1430 && ok_typedef s
1431 ->
1432 msg_typedef s; LP.add_typedef_root s;
1433 TypedefIdent (s, i1)
1434
1435
1436 (* [,(] xx [,)] AND param decl *)
1437 | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ )
1438 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1439 && ok_typedef s
1440 ->
1441 msg_typedef s; LP.add_typedef_root s;
1442 TypedefIdent (s, i1)
1443
1444 (* xx* [,)] *)
1445 (* specialcase: [,(] xx* [,)] *)
1446 | (TIdent (s, i1)::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1447 when not_struct_enum before
1448 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1449 && ok_typedef s
1450 ->
1451 msg_typedef s; LP.add_typedef_root s;
1452 TypedefIdent (s, i1)
1453
1454
1455 (* xx** [,)] *)
1456 (* specialcase: [,(] xx** [,)] *)
1457 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
1458 when not_struct_enum before
1459 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1460 && ok_typedef s
1461 ->
1462 msg_typedef s; LP.add_typedef_root s;
1463 TypedefIdent (s, i1)
1464
1465
1466
1467 (* xx const * USELESS because of next rule ? *)
1468 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
1469 when not_struct_enum before
1470 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1471 && ok_typedef s
1472 ->
1473
1474 msg_typedef s; LP.add_typedef_root s;
1475 TypedefIdent (s, i1)
1476
1477 (* xx const *)
1478 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
1479 when not_struct_enum before
1480 && ok_typedef s
1481 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1482 ->
1483
1484 msg_typedef s; LP.add_typedef_root s;
1485 TypedefIdent (s, i1)
1486
1487
1488 (* xx * const *)
1489 | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ )
1490 when not_struct_enum before
1491 && ok_typedef s
1492 ->
1493 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1494
1495 msg_typedef s; LP.add_typedef_root s;
1496 TypedefIdent (s, i1)
1497
1498
1499 (* ( const xx) *)
1500 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
1501 ok_typedef s ->
1502 msg_typedef s; LP.add_typedef_root s;
1503 TypedefIdent (s, i1)
1504
1505
1506
1507 (* ( xx ) [sizeof, ~] *)
1508 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
1509 when not_struct_enum before
1510 && ok_typedef s
1511 ->
1512 msg_typedef s; LP.add_typedef_root s;
1513 TypedefIdent (s, i1)
1514
1515 (* [(,] xx [ AND parameterdeclaration *)
1516 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
1517 when (LP.current_context() =*= LP.InParameter)
1518 && ok_typedef s
1519 ->
1520 msg_typedef s; LP.add_typedef_root s;
1521 TypedefIdent (s, i1)
1522
1523 (*------------------------------------------------------------*)
1524 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
1525 (* but if have a '=', or ',' I think not *)
1526 (*------------------------------------------------------------*)
1527
1528 (* static xx * yy *)
1529 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ ,
1530 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
1531 ok_typedef s
1532 ->
1533 msg_typedef s; LP.add_typedef_root s;
1534 TypedefIdent (s, i1)
1535
1536 (* TODO xx * yy ; AND in start of compound element *)
1537
1538
1539 (* xx * yy, AND in paramdecl *)
1540 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
1541 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
1542 && ok_typedef s
1543 ->
1544
1545 msg_typedef s; LP.add_typedef_root s;
1546 TypedefIdent (s, i1)
1547
1548
1549 (* xx * yy ; AND in Toplevel, except when have = before *)
1550 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
1551 TIdent (s, i1)
1552 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _)
1553 when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ()))
1554 ->
1555 msg_typedef s; LP.add_typedef_root s;
1556 TypedefIdent (s, i1)
1557
1558 (* xx * yy , AND in Toplevel *)
1559 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
1560 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
1561 && ok_typedef s
1562 ->
1563
1564 msg_typedef s; LP.add_typedef_root s;
1565 TypedefIdent (s, i1)
1566
1567 (* xx * yy ( AND in Toplevel *)
1568 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _)
1569 when not_struct_enum before
1570 && (LP.is_top_or_struct (LP.current_context ()))
1571 && ok_typedef s
1572 ->
1573 msg_typedef s; LP.add_typedef_root s;
1574 TypedefIdent (s, i1)
1575
1576 (* xx * yy [ *)
1577 (* todo? enough ? cos in struct def we can have some expression ! *)
1578 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _)
1579 when not_struct_enum before &&
1580 (LP.is_top_or_struct (LP.current_context ()))
1581 && ok_typedef s
1582 ->
1583 msg_typedef s; LP.add_typedef_root s;
1584 TypedefIdent (s, i1)
1585
1586 (* u16: 10; in struct *)
1587 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
1588 when (LP.is_top_or_struct (LP.current_context ()))
1589 && ok_typedef s
1590 ->
1591 msg_typedef s; LP.add_typedef_root s;
1592 TypedefIdent (s, i1)
1593
1594
1595 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
1596 (* YES cos at toplevel can have some expression !! for instance when *)
1597 (* enter in the dimension of an array *)
1598 (*
1599 | (TIdent s::TMul::TIdent s2::_ , _)
1600 when (take_safe 1 !passed_tok <> [Tstruct] &&
1601 (take_safe 1 !passed_tok <> [Tenum]))
1602 &&
1603 !LP._lexer_hint = Some LP.Toplevel ->
1604 msg_typedef s; LP.add_typedef_root s;
1605 TypedefIdent s
1606 *)
1607
1608 (* xx * yy = *)
1609 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _)
1610 when not_struct_enum before
1611 && ok_typedef s
1612 ->
1613 msg_typedef s; LP.add_typedef_root s;
1614 TypedefIdent (s, i1)
1615
1616
1617 (* xx * yy) AND in paramdecl *)
1618 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _)
1619 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
1620 && ok_typedef s
1621 ->
1622 msg_typedef s; LP.add_typedef_root s;
1623 TypedefIdent (s, i1)
1624
1625
1626 (* xx * yy; *) (* wrong ? *)
1627 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ ,
1628 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
1629 && ok_typedef s
1630 ->
1631 msg_typedef s; LP.add_typedef_root s;
1632 msg_maybe_dangereous_typedef s;
1633 TypedefIdent (s, i1)
1634
1635
1636 (* xx * yy, and ';' before xx *) (* wrong ? *)
1637 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ ,
1638 (TOBrace _| TPtVirg _)::_) when
1639 ok_typedef s
1640 ->
1641 msg_typedef s; LP.add_typedef_root s;
1642 TypedefIdent (s, i1)
1643
1644
1645 (* xx_t * yy *)
1646 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _)
1647 when s ==~ regexp_typedef && not_struct_enum before
1648 (* struct user_info_t sometimes *)
1649 && ok_typedef s
1650 ->
1651 msg_typedef s; LP.add_typedef_root s;
1652 TypedefIdent (s, i1)
1653
1654 (* xx ** yy *) (* wrong ? *)
1655 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
1656 when not_struct_enum before
1657 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1658 && ok_typedef s
1659 ->
1660 msg_typedef s; LP.add_typedef_root s;
1661 TypedefIdent (s, i1)
1662
1663 (* xx *** yy *)
1664 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
1665 when not_struct_enum before
1666 && ok_typedef s
1667 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1668 ->
1669 msg_typedef s; LP.add_typedef_root s;
1670 TypedefIdent (s, i1)
1671
1672 (* xx ** ) *)
1673 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
1674 when not_struct_enum before
1675 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
1676 && ok_typedef s
1677 ->
1678 msg_typedef s; LP.add_typedef_root s;
1679 TypedefIdent (s, i1)
1680
1681
1682
1683 (* ----------------------------------- *)
1684 (* old: why not do like for other rules and start with TIdent ?
1685 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
1686 * new: prefer now start with TIdent because otherwise the add_typedef_root
1687 * may have no effect if in second pass or if have disable the add_typedef.
1688 *)
1689
1690 (* (xx) yy *)
1691 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
1692 (TOPar info)::x::_)
1693 when not (TH.is_stuff_taking_parenthized x) &&
1694 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3
1695 && ok_typedef s
1696 ->
1697
1698 msg_typedef s; LP.add_typedef_root s;
1699 (*TOPar info*)
1700 TypedefIdent (s, i1)
1701
1702
1703 (* (xx) ( yy)
1704 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
1705 *)
1706 (*
1707 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
1708 when not (TH.is_stuff_taking_parenthized x)
1709 && ok_typedef s
1710 ->
1711 msg_typedef s; LP.add_typedef_root s;
1712 (* TOPar info *)
1713 TypedefIdent (s, i1)
1714 *)
1715 (* special case: = (xx) ( yy) *)
1716 | (TIdent (s, i1)::TCPar _::TOPar _::_ ,
1717 (TOPar info)::(TEq _ |TEqEq _)::_)
1718 when ok_typedef s
1719 ->
1720 msg_typedef s; LP.add_typedef_root s;
1721 (* TOPar info *)
1722 TypedefIdent (s, i1)
1723
1724
1725 (* (xx * ) yy *)
1726 | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when
1727 ok_typedef s
1728 ->
1729 msg_typedef s; LP.add_typedef_root s;
1730 (*TOPar info*)
1731 TypedefIdent (s,i1)
1732
1733
1734 (* (xx){ ... } constructor *)
1735 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
1736 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
1737 && ok_typedef s
1738 ->
1739 msg_typedef s; LP.add_typedef_root s;
1740 TypedefIdent (s, i1)
1741
1742
1743 (* can have sizeof on expression
1744 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
1745 msg_typedef s; LP.add_typedef_root s;
1746 Tsizeof
1747 *)
1748
1749
1750 (* ----------------------------------- *)
1751 (* x ( *y )(params), function pointer *)
1752 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
1753 when not_struct_enum before
1754 && ok_typedef s
1755 ->
1756 msg_typedef s; LP.add_typedef_root s;
1757 TypedefIdent (s, i1)
1758
1759 (* x* ( *y )(params), function pointer 2 *)
1760 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
1761 when not_struct_enum before
1762 && ok_typedef s
1763 ->
1764 msg_typedef s; LP.add_typedef_root s;
1765 TypedefIdent (s, i1)
1766
1767
1768 (*-------------------------------------------------------------*)
1769 (* CPP *)
1770 (*-------------------------------------------------------------*)
1771 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
1772 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
1773 as x)
1774 ::_, _
1775 ->
1776 (*
1777 if not !Flag_parsing_c.ifdef_to_if
1778 then TCommentCpp (Ast_c.CppDirective, ii)
1779 else
1780 *)
1781 (* not !LP._lexer_hint.toplevel *)
1782 if !Flag_parsing_c.ifdef_directive_passing
1783 || (pass >= 2)
1784 then begin
1785
1786 if (LP.current_context () =*= LP.InInitializer)
1787 then begin
1788 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
1789 incr Stat.nIfdefInitializer;
1790 end else begin
1791 pr2_cpp("IFDEF: or related inside function. I treat it as comment");
1792 incr Stat.nIfdefPassing;
1793 end;
1794 TCommentCpp (Token_c.CppDirective, ii)
1795 end
1796 else x
1797
1798 | (TUndef (id, ii) as x)::_, _
1799 ->
1800 if (pass >= 2)
1801 then begin
1802 pr2_cpp("UNDEF: I treat it as comment");
1803 TCommentCpp (Token_c.CppDirective, ii)
1804 end
1805 else x
1806
1807 | (TCppDirectiveOther (ii) as x)::_, _
1808 ->
1809 if (pass >= 2)
1810 then begin
1811 pr2_cpp ("OTHER directive: I treat it as comment");
1812 TCommentCpp (Token_c.CppDirective, ii)
1813 end
1814 else x
1815
1816 (* If ident contain a for_each, then certainly a macro. But to be
1817 * sure should look if there is a '{' after the ')', but it requires
1818 * to count the '('. Because this can be expensive, we do that only
1819 * when the token contains "for_each".
1820 *)
1821 | (TIdent (s, i1)::TOPar _::rest, _)
1822 when not (LP.current_context () =*= LP.InTopLevel)
1823 (* otherwise a function such as static void loopback_enable(int i) {
1824 * will be considered as a loop
1825 *)
1826 ->
1827
1828
1829 if s ==~ regexp_foreach &&
1830 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
1831
1832 then begin
1833 msg_foreach s;
1834 TMacroIterator (s, i1)
1835 end
1836 else TIdent (s, i1)
1837
1838
1839
1840 (*-------------------------------------------------------------*)
1841 | v::xs, _ -> v
1842 | _ -> raise Impossible
1843
1844 let lookahead ~pass a b =
1845 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
1846
1847