Release coccinelle-0.1.2
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
1 (* Copyright (C) 2007, 2008 Yoann Padioleau
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
11 *)
12
13 open Common
14
15 module TH = Token_helpers
16 module LP = Lexer_parser
17
18 module Stat = Parsing_stat
19
20 open Parser_c
21
22 (*****************************************************************************)
23 (* Some debugging functions *)
24 (*****************************************************************************)
25
26 let pr2 s =
27 if !Flag_parsing_c.verbose_parsing
28 then Common.pr2 s
29
30 let pr2_cpp s =
31 if !Flag_parsing_c.debug_cpp
32 then Common.pr2_once ("CPP-" ^ s)
33
34
35 let msg_gen cond is_known printer s =
36 if cond
37 then
38 if not (!Flag_parsing_c.filter_msg)
39 then printer s
40 else
41 if not (is_known s)
42 then printer s
43
44
45 (* In the following, there are some harcoded names of types or macros
46 * but they are not used by our heuristics! They are just here to
47 * enable to detect false positive by printing only the typedef/macros
48 * that we don't know yet. If we print everything, then we can easily
49 * get lost with too much verbose tracing information. So those
50 * functions "filter" some messages. So our heuristics are still good,
51 * there is no more (or not that much) hardcoded linux stuff.
52 *)
53
54 let is_known_typdef =
55 (fun s ->
56 (match s with
57 | "u_char" | "u_short" | "u_int" | "u_long"
58 | "u8" | "u16" | "u32" | "u64"
59 | "s8" | "s16" | "s32" | "s64"
60 | "__u8" | "__u16" | "__u32" | "__u64"
61 -> true
62
63 | "acpi_handle"
64 | "acpi_status"
65 -> true
66
67 | "FILE"
68 | "DIR"
69 -> true
70
71 | s when s =~ ".*_t$" -> true
72 | _ -> false
73 )
74 )
75
76 (* note: cant use partial application with let msg_typedef =
77 * because it would compute msg_typedef at compile time when
78 * the flag debug_typedef is always false
79 *)
80 let msg_typedef s =
81 incr Stat.nTypedefInfer;
82 msg_gen (!Flag_parsing_c.debug_typedef)
83 is_known_typdef
84 (fun s ->
85 pr2_cpp ("TYPEDEF: promoting: " ^ s)
86 )
87 s
88
89 let msg_maybe_dangereous_typedef s =
90 if not (is_known_typdef s)
91 then
92 pr2 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
93
94
95
96 let msg_declare_macro s =
97 incr Stat.nMacroDecl;
98 msg_gen (!Flag_parsing_c.debug_cpp)
99 (fun s ->
100 (match s with
101 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
102 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
103 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
104 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
105 | "SENSOR_DEVICE_ATTR"
106 | "LIST_HEAD"
107 | "DECLARE_WORK" | "DECLARE_TASKLET"
108 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
109 | "DECLARE_BITMAP"
110
111 -> true
112 (*
113 | s when s =~ "^DECLARE_.*" -> true
114 | s when s =~ ".*_ATTR$" -> true
115 | s when s =~ "^DEFINE_.*" -> true
116 *)
117
118 | _ -> false
119 )
120 )
121 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
122 s
123
124
125 let msg_foreach s =
126 incr Stat.nIteratorHeuristic;
127 pr2_cpp ("MACRO: found foreach: " ^ s)
128
129
130 (* ??
131 let msg_debug_macro s =
132 pr2_cpp ("MACRO: found debug-macro: " ^ s)
133 *)
134
135
136 let msg_macro_noptvirg s =
137 incr Stat.nMacroStmt;
138 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
139
140 let msg_macro_toplevel_noptvirg s =
141 incr Stat.nMacroStmt;
142 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
143
144 let msg_macro_noptvirg_single s =
145 incr Stat.nMacroStmt;
146 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
147
148
149
150
151 let msg_macro_higher_order s =
152 incr Stat.nMacroHigherOrder;
153 msg_gen (!Flag_parsing_c.debug_cpp)
154 (fun s ->
155 (match s with
156 | "DBGINFO"
157 | "DBGPX"
158 | "DFLOW"
159 -> true
160 | _ -> false
161 )
162 )
163 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
164 s
165
166
167 let msg_stringification s =
168 incr Stat.nMacroString;
169 msg_gen (!Flag_parsing_c.debug_cpp)
170 (fun s ->
171 (match s with
172 | "REVISION"
173 | "UTS_RELEASE"
174 | "SIZE_STR"
175 | "DMA_STR"
176 -> true
177 (* s when s =~ ".*STR.*" -> true *)
178 | _ -> false
179 )
180 )
181 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
182 s
183
184 let msg_stringification_params s =
185 incr Stat.nMacroString;
186 pr2_cpp ("MACRO: string-macro with params : " ^ s)
187
188
189
190 let msg_apply_known_macro s =
191 incr Stat.nMacroExpand;
192 pr2_cpp ("MACRO: found known macro = " ^ s)
193
194 let msg_apply_known_macro_hint s =
195 incr Stat.nMacroHint;
196 pr2_cpp ("MACRO: found known macro hint = " ^ s)
197
198
199
200
201 let msg_ifdef_bool_passing is_ifdef_positif =
202 incr Stat.nIfdefZero; (* of Version ? *)
203 if is_ifdef_positif
204 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
205 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
206
207
208 let msg_ifdef_mid_something () =
209 incr Stat.nIfdefExprPassing;
210 pr2_cpp "found ifdef-mid-something"
211
212 let msg_ifdef_funheaders () =
213 incr Stat.nIfdefFunheader;
214 ()
215
216 let msg_ifdef_passing () =
217 pr2_cpp("IFDEF: or related outside function. I treat it as comment");
218 incr Stat.nIfdefPassing;
219 ()
220
221 let msg_attribute s =
222 incr Stat.nMacroAttribute;
223 pr2_cpp("ATTR:" ^ s)
224
225
226
227 (*****************************************************************************)
228 (* The regexp and basic view definitions *)
229 (*****************************************************************************)
230
231 (* opti: better to built then once and for all, especially regexp_foreach *)
232
233 let regexp_macro = Str.regexp
234 "^[A-Z_][A-Z_0-9]*$"
235
236 (* linuxext: *)
237 let regexp_annot = Str.regexp
238 "^__.*$"
239
240 (* linuxext: *)
241 let regexp_declare = Str.regexp
242 ".*DECLARE.*"
243
244 (* linuxext: *)
245 let regexp_foreach = Str.regexp_case_fold
246 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
247
248 let regexp_typedef = Str.regexp
249 ".*_t$"
250
251 let false_typedef = [
252 "printk";
253 ]
254
255
256 let ok_typedef s = not (List.mem s false_typedef)
257
258 let not_annot s =
259 not (s ==~ regexp_annot)
260
261
262 (* ------------------------------------------------------------------------- *)
263 (* cpp part 1 for standard.h *)
264 (* ------------------------------------------------------------------------- *)
265
266 type define_def = string * define_param * define_body
267 and define_param =
268 | NoParam
269 | Params of string list
270 and define_body =
271 | DefineBody of Parser_c.token list
272 | DefineHint of parsinghack_hint
273
274 and parsinghack_hint =
275 | HintIterator
276 | HintDeclarator
277 | HintMacroString
278 | HintMacroStatement
279 | HintAttribute
280
281
282 (* cf also data/test.h *)
283 let assoc_hint_string = [
284 "YACFE_ITERATOR" , HintIterator;
285 "YACFE_DECLARATOR" , HintDeclarator;
286 "YACFE_STRING" , HintMacroString;
287 "YACFE_STATEMENT" , HintMacroStatement;
288 "YACFE_ATTRIBUTE" , HintAttribute;
289 "MACROSTATEMENT" , HintMacroStatement; (* backward compatibility *)
290 ]
291
292
293 let (parsinghack_hint_of_string: string -> parsinghack_hint option) = fun s ->
294 Common.assoc_option s assoc_hint_string
295
296 let (is_parsinghack_hint: string -> bool) = fun s ->
297 parsinghack_hint_of_string s <> None
298
299 let (token_from_parsinghack_hint:
300 (string * Ast_c.info) -> parsinghack_hint -> Parser_c.token) =
301 fun (s,ii) hint ->
302 match hint with
303 | HintIterator ->
304 Parser_c.TMacroIterator (s, ii)
305 | HintDeclarator ->
306 Parser_c.TMacroDecl (s, ii)
307 | HintMacroString ->
308 Parser_c.TMacroString (s, ii)
309 | HintMacroStatement ->
310 Parser_c.TMacroStmt (s, ii)
311 | HintAttribute ->
312 Parser_c.TMacroAttr (s, ii)
313
314
315
316 let (_defs : (string, define_def) Hashtbl.t ref) =
317 ref (Hashtbl.create 101)
318
319
320 (* ------------------------------------------------------------------------- *)
321 (* fuzzy parsing, different "views" over the same program *)
322 (* ------------------------------------------------------------------------- *)
323
324
325 (* Normally I should not use ref/mutable in the token_extended type
326 * and I should have a set of functions taking a list of tokens and
327 * returning a list of tokens. The problem is that to make easier some
328 * functions, it is better to work on better representation, on "views"
329 * over this list of tokens. But then modifying those views and get
330 * back from those views to the original simple list of tokens is
331 * tedious. One way is to maintain next to the view a list of "actions"
332 * (I was using a hash storing the charpos of the token and associating
333 * the action) but it is tedious too. Simpler to use mutable/ref. We
334 * use the same idea that we use when working on the Ast_c. *)
335
336 (* old: when I was using the list of "actions" next to the views, the hash
337 * indexed by the charpos, there could have been some problems:
338 * how my fake_pos interact with the way I tag and adjust token ?
339 * because I base my tagging on the position of the token ! so sometimes
340 * could tag another fakeInfo that should not be tagged ?
341 * fortunately I don't use anymore this technique.
342 *)
343
344 (* update: quite close to the Place_c.Inxxx *)
345 type context =
346 InFunction | InEnum | InStruct | InInitializer | NoContext
347
348 type token_extended = {
349 mutable tok: Parser_c.token;
350 mutable where: context;
351
352 (* less: need also a after ? *)
353 mutable new_tokens_before : Parser_c.token list;
354
355 (* line x col cache, more easily accessible, of the info in the token *)
356 line: int;
357 col : int;
358 }
359
360 let set_as_comment cppkind x =
361 if TH.is_eof x.tok
362 then () (* otherwise parse_c will be lost if don't find a EOF token *)
363 else
364 x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
365
366 let mk_token_extended x =
367 let (line, col) = TH.linecol_of_tok x in
368 { tok = x;
369 line = line; col = col;
370 where = NoContext;
371 new_tokens_before = [];
372 }
373
374
375 (* x list list, because x list separated by ',' *)
376 type paren_grouped =
377 | Parenthised of paren_grouped list list * token_extended list
378 | PToken of token_extended
379
380 type brace_grouped =
381 | Braceised of
382 brace_grouped list list * token_extended * token_extended option
383 | BToken of token_extended
384
385 (* Far better data structure than doing hacks in the lexer or parser
386 * because in lexer we don't know to which ifdef a endif is related
387 * and so when we want to comment a ifdef, we don't know which endif
388 * we must also comment. Especially true for the #if 0 which sometimes
389 * have a #else part.
390 *
391 * x list list, because x list separated by #else or #elif
392 *)
393 type ifdef_grouped =
394 | Ifdef of ifdef_grouped list list * token_extended list
395 | Ifdefbool of bool * ifdef_grouped list list * token_extended list
396 | NotIfdefLine of token_extended list
397
398
399 type 'a line_grouped =
400 Line of 'a list
401
402
403 type body_function_grouped =
404 | BodyFunction of token_extended list
405 | NotBodyLine of token_extended list
406
407
408 (* ------------------------------------------------------------------------- *)
409 (* view builders *)
410 (* ------------------------------------------------------------------------- *)
411
412 (* todo: synchro ! use more indentation
413 * if paren not closed and same indentation level, certainly because
414 * part of a mid-ifdef-expression.
415 *)
416 let rec mk_parenthised xs =
417 match xs with
418 | [] -> []
419 | x::xs ->
420 (match x.tok with
421 | TOPar _ | TOParDefine _ ->
422 let body, extras, xs = mk_parameters [x] [] xs in
423 Parenthised (body,extras)::mk_parenthised xs
424 | _ ->
425 PToken x::mk_parenthised xs
426 )
427
428 (* return the body of the parenthised expression and the rest of the tokens *)
429 and mk_parameters extras acc_before_sep xs =
430 match xs with
431 | [] ->
432 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
433 pr2 "PB: not found closing paren in fuzzy parsing";
434 [List.rev acc_before_sep], List.rev extras, []
435 | x::xs ->
436 (match x.tok with
437 (* synchro *)
438 | TOBrace _ when x.col = 0 ->
439 pr2 "PB: found synchro point } in paren";
440 [List.rev acc_before_sep], List.rev (extras), (x::xs)
441
442 | TCPar _ | TCParEOL _ ->
443 [List.rev acc_before_sep], List.rev (x::extras), xs
444 | TOPar _ | TOParDefine _ ->
445 let body, extrasnest, xs = mk_parameters [x] [] xs in
446 mk_parameters extras
447 (Parenthised (body,extrasnest)::acc_before_sep)
448 xs
449 | TComma _ ->
450 let body, extras, xs = mk_parameters (x::extras) [] xs in
451 (List.rev acc_before_sep)::body, extras, xs
452 | _ ->
453 mk_parameters extras (PToken x::acc_before_sep) xs
454 )
455
456
457
458
459 let rec mk_braceised xs =
460 match xs with
461 | [] -> []
462 | x::xs ->
463 (match x.tok with
464 | TOBrace _ ->
465 let body, endbrace, xs = mk_braceised_aux [] xs in
466 Braceised (body, x, endbrace)::mk_braceised xs
467 | TCBrace _ ->
468 pr2 "PB: found closing brace alone in fuzzy parsing";
469 BToken x::mk_braceised xs
470 | _ ->
471 BToken x::mk_braceised xs
472 )
473
474 (* return the body of the parenthised expression and the rest of the tokens *)
475 and mk_braceised_aux acc xs =
476 match xs with
477 | [] ->
478 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
479 pr2 "PB: not found closing brace in fuzzy parsing";
480 [List.rev acc], None, []
481 | x::xs ->
482 (match x.tok with
483 | TCBrace _ -> [List.rev acc], Some x, xs
484 | TOBrace _ ->
485 let body, endbrace, xs = mk_braceised_aux [] xs in
486 mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
487 | _ ->
488 mk_braceised_aux (BToken x::acc) xs
489 )
490
491
492
493
494 let rec mk_ifdef xs =
495 match xs with
496 | [] -> []
497 | x::xs ->
498 (match x.tok with
499 | TIfdef _ ->
500 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
501 Ifdef (body, extra)::mk_ifdef xs
502 | TIfdefBool (b,_, _) ->
503 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
504
505 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
506 if !Flag_parsing_c.if0_passing
507 then Ifdefbool (b, body, extra)::mk_ifdef xs
508 else Ifdef(body, extra)::mk_ifdef xs
509
510 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
511 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
512 Ifdefbool (b, body, extra)::mk_ifdef xs
513
514
515 | _ ->
516 (* todo? can have some Ifdef in the line ? *)
517 let line, xs = Common.span (fun y -> y.line = x.line) (x::xs) in
518 NotIfdefLine line::mk_ifdef xs
519 )
520
521 and mk_ifdef_parameters extras acc_before_sep xs =
522 match xs with
523 | [] ->
524 (* Note that mk_ifdef is assuming that CPP instruction are alone
525 * on their line. Because I do a span (fun x -> is_same_line ...)
526 * I might take with me a #endif if this one is mixed on a line
527 * with some "normal" tokens.
528 *)
529 pr2 "PB: not found closing ifdef in fuzzy parsing";
530 [List.rev acc_before_sep], List.rev extras, []
531 | x::xs ->
532 (match x.tok with
533 | TEndif _ ->
534 [List.rev acc_before_sep], List.rev (x::extras), xs
535 | TIfdef _ ->
536 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
537 mk_ifdef_parameters
538 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
539
540 | TIfdefBool (b,_,_) ->
541 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
542
543 if !Flag_parsing_c.if0_passing
544 then
545 mk_ifdef_parameters
546 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
547 else
548 mk_ifdef_parameters
549 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
550
551
552 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
553 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
554 mk_ifdef_parameters
555 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
556
557 | TIfdefelse _
558 | TIfdefelif _ ->
559 let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
560 (List.rev acc_before_sep)::body, extras, xs
561 | _ ->
562 let line, xs = Common.span (fun y -> y.line = x.line) (x::xs) in
563 mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
564 )
565
566 (* --------------------------------------- *)
567
568 let line_of_paren = function
569 | PToken x -> x.line
570 | Parenthised (xxs, info_parens) ->
571 (match info_parens with
572 | [] -> raise Impossible
573 | x::xs -> x.line
574 )
575
576
577 let rec span_line_paren line = function
578 | [] -> [],[]
579 | x::xs ->
580 (match x with
581 | PToken tok when TH.is_eof tok.tok ->
582 [], x::xs
583 | _ ->
584 if line_of_paren x = line
585 then
586 let (l1, l2) = span_line_paren line xs in
587 (x::l1, l2)
588 else ([], x::xs)
589 )
590
591
592 let rec mk_line_parenthised xs =
593 match xs with
594 | [] -> []
595 | x::xs ->
596 let line_no = line_of_paren x in
597 let line, xs = span_line_paren line_no xs in
598 Line (x::line)::mk_line_parenthised xs
599
600
601 (* --------------------------------------- *)
602 let rec mk_body_function_grouped xs =
603 match xs with
604 | [] -> []
605 | x::xs ->
606 (match x with
607 | {tok = TOBrace _; col = 0} ->
608 let is_closing_brace = function
609 | {tok = TCBrace _; col = 0 } -> true
610 | _ -> false
611 in
612 let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
613 (match xs with
614 | ({tok = TCBrace _; col = 0 })::xs ->
615 BodyFunction body::mk_body_function_grouped xs
616 | [] ->
617 pr2 "PB:not found closing brace in fuzzy parsing";
618 [NotBodyLine body]
619 | _ -> raise Impossible
620 )
621
622 | _ ->
623 let line, xs = Common.span (fun y -> y.line = x.line) (x::xs) in
624 NotBodyLine line::mk_body_function_grouped xs
625 )
626
627
628 (* ------------------------------------------------------------------------- *)
629 (* view iterators *)
630 (* ------------------------------------------------------------------------- *)
631
632 let rec iter_token_paren f xs =
633 xs +> List.iter (function
634 | PToken tok -> f tok;
635 | Parenthised (xxs, info_parens) ->
636 info_parens +> List.iter f;
637 xxs +> List.iter (fun xs -> iter_token_paren f xs)
638 )
639
640 let rec iter_token_brace f xs =
641 xs +> List.iter (function
642 | BToken tok -> f tok;
643 | Braceised (xxs, tok1, tok2opt) ->
644 f tok1; do_option f tok2opt;
645 xxs +> List.iter (fun xs -> iter_token_brace f xs)
646 )
647
648 let rec iter_token_ifdef f xs =
649 xs +> List.iter (function
650 | NotIfdefLine xs -> xs +> List.iter f;
651 | Ifdefbool (_, xxs, info_ifdef)
652 | Ifdef (xxs, info_ifdef) ->
653 info_ifdef +> List.iter f;
654 xxs +> List.iter (iter_token_ifdef f)
655 )
656
657
658
659
660 let tokens_of_paren xs =
661 let g = ref [] in
662 xs +> iter_token_paren (fun tok -> push2 tok g);
663 List.rev !g
664
665
666 let tokens_of_paren_ordered xs =
667 let g = ref [] in
668
669 let rec aux_tokens_ordered = function
670 | PToken tok -> push2 tok g;
671 | Parenthised (xxs, info_parens) ->
672 let (opar, cpar, commas) =
673 match info_parens with
674 | opar::xs ->
675 (match List.rev xs with
676 | cpar::xs ->
677 opar, cpar, List.rev xs
678 | _ -> raise Impossible
679 )
680 | _ -> raise Impossible
681 in
682 push2 opar g;
683 aux_args (xxs,commas);
684 push2 cpar g;
685
686 and aux_args (xxs, commas) =
687 match xxs, commas with
688 | [], [] -> ()
689 | [xs], [] -> xs +> List.iter aux_tokens_ordered
690 | xs::ys::xxs, comma::commas ->
691 xs +> List.iter aux_tokens_ordered;
692 push2 comma g;
693 aux_args (ys::xxs, commas)
694 | _ -> raise Impossible
695
696 in
697
698 xs +> List.iter aux_tokens_ordered;
699 List.rev !g
700
701
702
703
704 (* ------------------------------------------------------------------------- *)
705 (* set the context info in token *)
706 (* ------------------------------------------------------------------------- *)
707
708
709 let rec set_in_function_tag xs =
710 (* could try: ) { } but it can be the ) of a if or while, so
711 * better to base the heuristic on the position in column zero.
712 * Note that some struct or enum or init put also their { in first column
713 * but set_in_other will overwrite the previous InFunction tag.
714 *)
715 match xs with
716 | [] -> ()
717 (* ) { and the closing } is in column zero, then certainly a function *)
718 | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
719 when tok1.col <> 0 && tok2.col = 0 ->
720 body +> List.iter (iter_token_brace (fun tok ->
721 tok.where <- InFunction
722 ));
723 set_in_function_tag xs
724
725 | (BToken x)::xs -> set_in_function_tag xs
726
727 | (Braceised (body, tok1, Some tok2))::xs
728 when tok1.col = 0 && tok2.col = 0 ->
729 body +> List.iter (iter_token_brace (fun tok ->
730 tok.where <- InFunction
731 ));
732 set_in_function_tag xs
733 | Braceised (body, tok1, tok2)::xs ->
734 set_in_function_tag xs
735
736
737 let rec set_in_other xs =
738 match xs with
739 | [] -> ()
740 (* enum x { } *)
741 | BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
742 ::Braceised(body, tok1, tok2)::xs
743 | BToken ({tok = Tenum _})
744 ::Braceised(body, tok1, tok2)::xs
745 ->
746 body +> List.iter (iter_token_brace (fun tok ->
747 tok.where <- InEnum;
748 ));
749 set_in_other xs
750
751 (* struct x { } *)
752 | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
753 ::Braceised(body, tok1, tok2)::xs ->
754 body +> List.iter (iter_token_brace (fun tok ->
755 tok.where <- InStruct;
756 ));
757 set_in_other xs
758 (* = { } *)
759 | BToken ({tok = TEq _})
760 ::Braceised(body, tok1, tok2)::xs ->
761 body +> List.iter (iter_token_brace (fun tok ->
762 tok.where <- InInitializer;
763 ));
764 set_in_other xs
765
766 | BToken _::xs -> set_in_other xs
767
768 | Braceised(body, tok1, tok2)::xs ->
769 body +> List.iter set_in_other;
770 set_in_other xs
771
772
773
774
775 let set_context_tag xs =
776 begin
777 set_in_function_tag xs;
778 set_in_other xs;
779 end
780
781
782 (*****************************************************************************)
783 (* Helpers *)
784 (*****************************************************************************)
785
786 (* To expand the parameter of the macro. The env corresponds to the actual
787 * code that is binded to the parameters of the macro.
788 * TODO? recurse ? fixpoint ? the expansion may also contain macro.
789 * Or to macro expansion in a strict manner, that is process first
790 * the parameters, expands macro in params, and then process enclosing
791 * macro call.
792 *)
793 let rec (cpp_engine: (string , Parser_c.token list) assoc ->
794 Parser_c.token list -> Parser_c.token list) =
795 fun env xs ->
796 xs +> List.map (fun tok ->
797 match tok with
798 | TIdent (s,i1) when List.mem_assoc s env -> Common.assoc s env
799 | x -> [x]
800 )
801 +> List.flatten
802
803
804
805
806 (* ------------------------------------------------------------------------- *)
807 (* the pair is the status of '()' and '{}', ex: (-1,0)
808 * if too much ')' and good '{}'
809 * could do for [] too ?
810 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
811 * then if have ifdef, then certainly can lead to a problem.
812 *)
813 let (count_open_close_stuff_ifdef_clause: ifdef_grouped list -> (int * int)) =
814 fun xs ->
815 let cnt_paren, cnt_brace = ref 0, ref 0 in
816 xs +> iter_token_ifdef (fun x ->
817 (match x.tok with
818 | x when TH.is_opar x -> incr cnt_paren
819 | TOBrace _ -> incr cnt_brace
820 | x when TH.is_cpar x -> decr cnt_paren
821 | TCBrace _ -> decr cnt_brace
822 | _ -> ()
823 )
824 );
825 !cnt_paren, !cnt_brace
826
827
828 (* ------------------------------------------------------------------------- *)
829 let forLOOKAHEAD = 30
830
831
832 (* look if there is a '{' just after the closing ')', and handling the
833 * possibility to have nested expressions inside nested parenthesis
834 *
835 * todo: use indentation instead of premier(statement) ?
836 *)
837 let rec is_really_foreach xs =
838 let rec is_foreach_aux = function
839 | [] -> false, []
840 | TCPar _::TOBrace _::xs -> true, xs
841 (* the following attempts to handle the cases where there is a
842 single statement in the body of the loop. undoubtedly more
843 cases are needed.
844 todo: premier(statement) - suivant(funcall)
845 *)
846 | TCPar _::TIdent _::xs -> true, xs
847 | TCPar _::Tif _::xs -> true, xs
848 | TCPar _::Twhile _::xs -> true, xs
849 | TCPar _::Tfor _::xs -> true, xs
850 | TCPar _::Tswitch _::xs -> true, xs
851 | TCPar _::Treturn _::xs -> true, xs
852
853
854 | TCPar _::xs -> false, xs
855 | TOPar _::xs ->
856 let (_, xs') = is_foreach_aux xs in
857 is_foreach_aux xs'
858 | x::xs -> is_foreach_aux xs
859 in
860 is_foreach_aux xs +> fst
861
862
863 (* ------------------------------------------------------------------------- *)
864 let set_ifdef_token_parenthize_info cnt x =
865 match x with
866 | TIfdef (tag, _)
867 | TIfdefelse (tag, _)
868 | TIfdefelif (tag, _)
869 | TEndif (tag, _)
870
871 | TIfdefBool (_, tag, _)
872 | TIfdefMisc (_, tag, _)
873 | TIfdefVersion (_, tag, _)
874 ->
875 tag := Some cnt;
876
877 | _ -> raise Impossible
878
879
880
881 let ifdef_paren_cnt = ref 0
882
883
884 let rec set_ifdef_parenthize_info xs =
885 xs +> List.iter (function
886 | NotIfdefLine xs -> ()
887 | Ifdefbool (_, xxs, info_ifdef)
888 | Ifdef (xxs, info_ifdef) ->
889
890 incr ifdef_paren_cnt;
891 let total_directives = List.length info_ifdef in
892
893 info_ifdef +> List.iter (fun x ->
894 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
895 x.tok);
896 xxs +> List.iter set_ifdef_parenthize_info
897 )
898
899
900 (*****************************************************************************)
901 (* CPP handling: macros, ifdefs, macros defs *)
902 (*****************************************************************************)
903
904 (* ------------------------------------------------------------------------- *)
905 (* ifdef keeping/passing *)
906 (* ------------------------------------------------------------------------- *)
907
908 (* #if 0, #if 1, #if LINUX_VERSION handling *)
909 let rec find_ifdef_bool xs =
910 xs +> List.iter (function
911 | NotIfdefLine _ -> ()
912 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
913
914 msg_ifdef_bool_passing is_ifdef_positif;
915
916 (match xxs with
917 | [] -> raise Impossible
918 | firstclause::xxs ->
919 info_ifdef_stmt +> List.iter (set_as_comment Ast_c.CppDirective);
920
921 if is_ifdef_positif
922 then xxs +> List.iter
923 (iter_token_ifdef (set_as_comment Ast_c.CppPassingNormal))
924 else begin
925 firstclause +> iter_token_ifdef (set_as_comment Ast_c.CppPassingNormal);
926 (match List.rev xxs with
927 (* keep only last *)
928 | last::startxs ->
929 startxs +> List.iter
930 (iter_token_ifdef (set_as_comment Ast_c.CppPassingNormal))
931 | [] -> (* not #else *) ()
932 );
933 end
934 );
935
936 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
937 )
938
939
940
941 let thresholdIfdefSizeMid = 6
942
943 (* infer ifdef involving not-closed expressions/statements *)
944 let rec find_ifdef_mid xs =
945 xs +> List.iter (function
946 | NotIfdefLine _ -> ()
947 | Ifdef (xxs, info_ifdef_stmt) ->
948 (match xxs with
949 | [] -> raise Impossible
950 | [first] -> ()
951 | first::second::rest ->
952 (* don't analyse big ifdef *)
953 if xxs +> List.for_all
954 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
955 (* don't want nested ifdef *)
956 xxs +> List.for_all (fun xs ->
957 xs +> List.for_all
958 (function NotIfdefLine _ -> true | _ -> false)
959 )
960
961 then
962 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
963 let cnt1, cnt2 = List.hd counts in
964 if cnt1 <> 0 || cnt2 <> 0 &&
965 counts +> List.for_all (fun x -> x = (cnt1, cnt2))
966 (*
967 if counts +> List.exists (fun (cnt1, cnt2) ->
968 cnt1 <> 0 || cnt2 <> 0
969 )
970 *)
971 then begin
972 msg_ifdef_mid_something();
973
974 (* keep only first, treat the rest as comment *)
975 info_ifdef_stmt +> List.iter (set_as_comment Ast_c.CppDirective);
976 (second::rest) +> List.iter
977 (iter_token_ifdef (set_as_comment Ast_c.CppPassingCosWouldGetError));
978 end
979
980 );
981 List.iter find_ifdef_mid xxs
982
983 (* no need complex analysis for ifdefbool *)
984 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
985 List.iter find_ifdef_mid xxs
986
987
988 )
989
990
991 let thresholdFunheaderLimit = 4
992
993 (* ifdef defining alternate function header, type *)
994 let rec find_ifdef_funheaders = function
995 | [] -> ()
996 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
997
998 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
999 | Ifdef
1000 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
1001 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
1002 ], info_ifdef_stmt
1003 )
1004 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
1005 ::xs
1006 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
1007 List.length ifdefblock2 <= thresholdFunheaderLimit
1008 ->
1009 find_ifdef_funheaders xs;
1010
1011 msg_ifdef_funheaders ();
1012 info_ifdef_stmt +> List.iter (set_as_comment Ast_c.CppDirective);
1013 let all_toks = [xline2] @ line2 in
1014 all_toks +> List.iter (set_as_comment Ast_c.CppPassingCosWouldGetError) ;
1015 ifdefblock2 +> iter_token_ifdef (set_as_comment Ast_c.CppPassingCosWouldGetError);
1016
1017 (* ifdef with nested ifdef *)
1018 | Ifdef
1019 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
1020 [Ifdef
1021 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
1022 [NotIfdefLine (({col = 0} as xline3)::line3)];
1023 ], info_ifdef_stmt2
1024 )
1025 ]
1026 ], info_ifdef_stmt
1027 )
1028 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
1029 ::xs
1030 ->
1031 find_ifdef_funheaders xs;
1032
1033 msg_ifdef_funheaders ();
1034 info_ifdef_stmt +> List.iter (set_as_comment Ast_c.CppDirective);
1035 info_ifdef_stmt2 +> List.iter (set_as_comment Ast_c.CppDirective);
1036 let all_toks = [xline2;xline3] @ line2 @ line3 in
1037 all_toks +> List.iter (set_as_comment Ast_c.CppPassingCosWouldGetError);
1038
1039 (* ifdef with elseif *)
1040 | Ifdef
1041 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
1042 [NotIfdefLine (({col = 0} as xline2)::line2)];
1043 [NotIfdefLine (({col = 0} as xline3)::line3)];
1044 ], info_ifdef_stmt
1045 )
1046 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
1047 ::xs
1048 ->
1049 find_ifdef_funheaders xs;
1050
1051 msg_ifdef_funheaders ();
1052 info_ifdef_stmt +> List.iter (set_as_comment Ast_c.CppDirective);
1053 let all_toks = [xline2;xline3] @ line2 @ line3 in
1054 all_toks +> List.iter (set_as_comment Ast_c.CppPassingCosWouldGetError)
1055
1056 (* recurse *)
1057 | Ifdef (xxs,info_ifdef_stmt)::xs
1058 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
1059 List.iter find_ifdef_funheaders xxs;
1060 find_ifdef_funheaders xs
1061
1062
1063
1064 (* ?? *)
1065 let rec adjust_inifdef_include xs =
1066 xs +> List.iter (function
1067 | NotIfdefLine _ -> ()
1068 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
1069 xxs +> List.iter (iter_token_ifdef (fun tokext ->
1070 match tokext.tok with
1071 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
1072 inifdef_ref := true;
1073 | _ -> ()
1074 ));
1075 )
1076
1077
1078
1079 (* ------------------------------------------------------------------------- *)
1080 (* cpp-builtin part2, macro, using standard.h or other defs *)
1081 (* ------------------------------------------------------------------------- *)
1082
1083 (* Thanks to this function many stuff are not anymore hardcoded in ocaml code
1084 * (but they are now hardcoded in standard.h ...)
1085 *
1086 *
1087 *
1088 * No need to take care to not substitute the macro name itself
1089 * that occurs in the macro definition because the macro name is
1090 * after fix_token_define a TDefineIdent, no more a TIdent.
1091 *)
1092
1093 let rec apply_macro_defs xs =
1094 match xs with
1095 | [] -> ()
1096
1097 (* old: "but could do more, could reuse same original token
1098 * so that have in the Ast a Dbg, not a MACROSTATEMENT"
1099 *
1100 * | PToken ({tok = TIdent (s,i1)} as id)::xs
1101 * when s = "MACROSTATEMENT" ->
1102 *
1103 * msg_macro_statement_hint s;
1104 * id.tok <- TMacroStmt(TH.info_of_tok id.tok);
1105 * find_macro_paren xs
1106 *
1107 * let msg_macro_statement_hint s =
1108 * incr Stat.nMacroHint;
1109 * ()
1110 *
1111 *)
1112
1113 (* recognized macro of standard.h (or other) *)
1114 | PToken ({tok = TIdent (s,i1)} as id)::Parenthised (xxs,info_parens)::xs
1115 when Hashtbl.mem !_defs s ->
1116
1117 msg_apply_known_macro s;
1118 let (s, params, body) = Hashtbl.find !_defs s in
1119
1120 (match params with
1121 | NoParam ->
1122 pr2 ("WIERD: macro without param used before parenthize: " ^ s);
1123 (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *)
1124
1125 (match body with
1126 | DefineBody bodymacro ->
1127 set_as_comment (Ast_c.CppMacro) id;
1128 id.new_tokens_before <- bodymacro;
1129 | DefineHint hint ->
1130 msg_apply_known_macro_hint s;
1131 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1132 )
1133 | Params params ->
1134 if List.length params != List.length xxs
1135 then begin
1136 pr2 ("WIERD: macro with wrong number of arguments: " ^ s);
1137 (* old: id.new_tokens_before <- bodymacro; *)
1138 ()
1139 end
1140 else
1141 (match body with
1142 | DefineBody bodymacro ->
1143 let xxs' = xxs +> List.map (fun x ->
1144 (tokens_of_paren_ordered x) +> List.map (fun x ->
1145 TH.visitor_info_of_tok Ast_c.make_expanded x.tok
1146 )
1147 ) in
1148 id.new_tokens_before <-
1149 cpp_engine (Common.zip params xxs') bodymacro;
1150
1151 (* important to do that after have apply the macro, otherwise
1152 * will pass as argument to the macro some tokens that
1153 * are all TCommentCpp
1154 *)
1155 [Parenthised (xxs, info_parens)] +>
1156 iter_token_paren (set_as_comment Ast_c.CppMacro);
1157 set_as_comment Ast_c.CppMacro id;
1158
1159 | DefineHint (HintMacroStatement as hint) ->
1160 (* important to do that after have apply the macro, otherwise
1161 * will pass as argument to the macro some tokens that
1162 * are all TCommentCpp
1163 *)
1164 msg_apply_known_macro_hint s;
1165 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1166 [Parenthised (xxs, info_parens)] +>
1167 iter_token_paren (set_as_comment Ast_c.CppMacro);
1168
1169
1170 | DefineHint hint ->
1171 msg_apply_known_macro_hint s;
1172 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1173 )
1174 );
1175 apply_macro_defs xs
1176
1177 | PToken ({tok = TIdent (s,i1)} as id)::xs
1178 when Hashtbl.mem !_defs s ->
1179
1180 msg_apply_known_macro s;
1181 let (_s, params, body) = Hashtbl.find !_defs s in
1182
1183 (match params with
1184 | Params params ->
1185 pr2 ("WIERD: macro with params but no parens found: " ^ s);
1186 (* dont apply the macro, perhaps a redefinition *)
1187 ()
1188 | NoParam ->
1189 (match body with
1190 | DefineBody [newtok] ->
1191 (* special case when 1-1 substitution, we reuse the token *)
1192 id.tok <- (newtok +> TH.visitor_info_of_tok (fun _ ->
1193 TH.info_of_tok id.tok))
1194 | DefineBody bodymacro ->
1195 set_as_comment Ast_c.CppMacro id;
1196 id.new_tokens_before <- bodymacro;
1197 | DefineHint hint ->
1198 msg_apply_known_macro_hint s;
1199 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1200 )
1201 );
1202 apply_macro_defs xs
1203
1204
1205
1206
1207 (* recurse *)
1208 | (PToken x)::xs -> apply_macro_defs xs
1209 | (Parenthised (xxs, info_parens))::xs ->
1210 xxs +> List.iter apply_macro_defs;
1211 apply_macro_defs xs
1212
1213
1214
1215
1216
1217 (* ------------------------------------------------------------------------- *)
1218 (* stringification *)
1219 (* ------------------------------------------------------------------------- *)
1220
1221 let rec find_string_macro_paren xs =
1222 match xs with
1223 | [] -> ()
1224 | Parenthised(xxs, info_parens)::xs ->
1225 xxs +> List.iter (fun xs ->
1226 if xs +> List.exists
1227 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
1228 xs +> List.for_all
1229 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
1230 true | _ -> false)
1231 then
1232 xs +> List.iter (fun tok ->
1233 match tok with
1234 | PToken({tok = TIdent (s,_)} as id) ->
1235 msg_stringification s;
1236 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1237 | _ -> ()
1238 )
1239 else
1240 find_string_macro_paren xs
1241 );
1242 find_string_macro_paren xs
1243 | PToken(tok)::xs ->
1244 find_string_macro_paren xs
1245
1246
1247 (* ------------------------------------------------------------------------- *)
1248 (* macro2 *)
1249 (* ------------------------------------------------------------------------- *)
1250
1251 (* don't forget to recurse in each case *)
1252 let rec find_macro_paren xs =
1253 match xs with
1254 | [] -> ()
1255
1256 (* attribute *)
1257 | PToken ({tok = Tattribute _} as id)
1258 ::Parenthised (xxs,info_parens)
1259 ::xs
1260 ->
1261 pr2_cpp ("MACRO: __attribute detected ");
1262 [Parenthised (xxs, info_parens)] +>
1263 iter_token_paren (set_as_comment Ast_c.CppAttr);
1264 set_as_comment Ast_c.CppAttr id;
1265 find_macro_paren xs
1266
1267 (*
1268 (* attribute cpp, __xxx id() *)
1269 | PToken ({tok = TIdent (s,i1)} as id)
1270 ::PToken ({tok = TIdent (s2, i2)})
1271 ::Parenthised(xxs,info_parens)
1272 ::xs when s ==~ regexp_annot
1273 ->
1274 msg_attribute s;
1275 id.tok <- TMacroAttr (s, i1);
1276 find_macro_paren (Parenthised(xxs,info_parens)::xs)
1277
1278 (* attribute cpp, id __xxx = *)
1279 | PToken ({tok = TIdent (s,i1)})
1280 ::PToken ({tok = TIdent (s2, i2)} as id)
1281 ::xs when s2 ==~ regexp_annot
1282 ->
1283 msg_attribute s2;
1284 id.tok <- TMacroAttr (s2, i2);
1285 find_macro_paren (xs)
1286 *)
1287
1288 (* storage attribute *)
1289 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
1290 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
1291 ->
1292 pr2_cpp ("storage attribute: " ^ s);
1293 attr.tok <- TMacroAttrStorage (s,i1);
1294 (* recurse, may have other storage attributes *)
1295 find_macro_paren (PToken (tok1)::xs)
1296
1297
1298 (* stringification
1299 *
1300 * the order of the matching clause is important
1301 *
1302 *)
1303
1304 (* string macro with params, before case *)
1305 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1306 ::Parenthised (xxs, info_parens)
1307 ::xs ->
1308
1309 msg_stringification_params s;
1310 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1311 [Parenthised (xxs, info_parens)] +>
1312 iter_token_paren (set_as_comment Ast_c.CppMacro);
1313 find_macro_paren xs
1314
1315 (* after case *)
1316 | PToken ({tok = TIdent (s,_)} as id)
1317 ::Parenthised (xxs, info_parens)
1318 ::PToken ({tok = (TString _ | TMacroString _)})
1319 ::xs ->
1320
1321 msg_stringification_params s;
1322 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1323 [Parenthised (xxs, info_parens)] +>
1324 iter_token_paren (set_as_comment Ast_c.CppMacro);
1325 find_macro_paren xs
1326
1327
1328 (* for the case where the string is not inside a funcall, but
1329 * for instance in an initializer.
1330 *)
1331
1332 (* string macro variable, before case *)
1333 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1334 ::xs ->
1335
1336 msg_stringification s;
1337 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1338 find_macro_paren xs
1339
1340 (* after case *)
1341 | PToken ({tok = TIdent (s,_)} as id)
1342 ::PToken ({tok = (TString _ | TMacroString _)})
1343 ::xs ->
1344
1345 msg_stringification s;
1346 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1347 find_macro_paren xs
1348
1349
1350
1351
1352
1353 (* recurse *)
1354 | (PToken x)::xs -> find_macro_paren xs
1355 | (Parenthised (xxs, info_parens))::xs ->
1356 xxs +> List.iter find_macro_paren;
1357 find_macro_paren xs
1358
1359
1360
1361
1362
1363 (* don't forget to recurse in each case *)
1364 let rec find_macro_lineparen xs =
1365 match xs with
1366 | [] -> ()
1367
1368 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
1369 | (Line
1370 (
1371 [PToken ({tok = Tstatic _});
1372 PToken ({tok = TIdent (s,_)} as macro);
1373 Parenthised (xxs,info_parens);
1374 PToken ({tok = TPtVirg _});
1375 ]
1376 ))
1377 ::xs
1378 when (s ==~ regexp_macro) ->
1379
1380 msg_declare_macro s;
1381 let info = TH.info_of_tok macro.tok in
1382 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1383
1384 find_macro_lineparen (xs)
1385
1386 (* the static const case *)
1387 | (Line
1388 (
1389 [PToken ({tok = Tstatic _});
1390 PToken ({tok = Tconst _} as const);
1391 PToken ({tok = TIdent (s,_)} as macro);
1392 Parenthised (xxs,info_parens);
1393 PToken ({tok = TPtVirg _});
1394 ]
1395 (*as line1*)
1396
1397 ))
1398 ::xs
1399 when (s ==~ regexp_macro) ->
1400
1401 msg_declare_macro s;
1402 let info = TH.info_of_tok macro.tok in
1403 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1404
1405 (* need retag this const, otherwise ambiguity in grammar
1406 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1407 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1408 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1409 storage_class_spec : Tstatic . (137)
1410 *)
1411 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1412
1413 find_macro_lineparen (xs)
1414
1415
1416 (* same but without trailing ';'
1417 *
1418 * I do not put the final ';' because it can be on a multiline and
1419 * because of the way mk_line is coded, we will not have access to
1420 * this ';' on the next line, even if next to the ')' *)
1421 | (Line
1422 ([PToken ({tok = Tstatic _});
1423 PToken ({tok = TIdent (s,_)} as macro);
1424 Parenthised (xxs,info_parens);
1425 ]
1426 ))
1427 ::xs
1428 when s ==~ regexp_macro ->
1429
1430 msg_declare_macro s;
1431 let info = TH.info_of_tok macro.tok in
1432 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1433
1434 find_macro_lineparen (xs)
1435
1436
1437
1438
1439 (* on multiple lines *)
1440 | (Line
1441 (
1442 (PToken ({tok = Tstatic _})::[]
1443 )))
1444 ::(Line
1445 (
1446 [PToken ({tok = TIdent (s,_)} as macro);
1447 Parenthised (xxs,info_parens);
1448 PToken ({tok = TPtVirg _});
1449 ]
1450 )
1451 )
1452 ::xs
1453 when (s ==~ regexp_macro) ->
1454
1455 msg_declare_macro s;
1456 let info = TH.info_of_tok macro.tok in
1457 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1458
1459 find_macro_lineparen (xs)
1460
1461
1462 (* linuxext: ex: DECLARE_BITMAP();
1463 *
1464 * Here I use regexp_declare and not regexp_macro because
1465 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1466 * Here we don't have the preceding 'static' so only way to
1467 * not have positive is to restrict to .*DECLARE.* macros.
1468 *
1469 * but there is a grammar rule for that, so don't need this case anymore
1470 * unless the parameter of the DECLARE_xxx are wierd and can not be mapped
1471 * on a argument_list
1472 *)
1473
1474 | (Line
1475 ([PToken ({tok = TIdent (s,_)} as macro);
1476 Parenthised (xxs,info_parens);
1477 PToken ({tok = TPtVirg _});
1478 ]
1479 ))
1480 ::xs
1481 when (s ==~ regexp_declare) ->
1482
1483 msg_declare_macro s;
1484 let info = TH.info_of_tok macro.tok in
1485 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1486
1487 find_macro_lineparen (xs)
1488
1489
1490 (* toplevel macros.
1491 * module_init(xxx)
1492 *
1493 * Could also transform the TIdent in a TMacroTop but can have false
1494 * positive, so easier to just change the TCPar and so just solve
1495 * the end-of-stream pb of ocamlyacc
1496 *)
1497 | (Line
1498 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1499 Parenthised (xxs,info_parens);
1500 ] as _line1
1501 ))
1502 ::xs when col1 = 0
1503 ->
1504 let condition =
1505 (* to reduce number of false positive *)
1506 (match xs with
1507 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
1508 TH.is_eof other.tok || (col2 = 0 &&
1509 (match other.tok with
1510 | TOBrace _ -> false (* otherwise would match funcdecl *)
1511 | TCBrace _ when ctx <> InFunction -> false
1512 | TPtVirg _
1513 | TDotDot _
1514 -> false
1515 | tok when TH.is_binary_operator tok -> false
1516
1517 | _ -> true
1518 )
1519 )
1520 | _ -> false
1521 )
1522 in
1523 if condition
1524 then begin
1525
1526 msg_macro_toplevel_noptvirg s;
1527 (* just to avoid the end-of-stream pb of ocamlyacc *)
1528 let tcpar = Common.last info_parens in
1529 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
1530
1531 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
1532
1533 end;
1534
1535 find_macro_lineparen (xs)
1536
1537
1538
1539 (* macro with parameters
1540 * ex: DEBUG()
1541 * return x;
1542 *)
1543 | (Line
1544 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1545 Parenthised (xxs,info_parens);
1546 ] as _line1
1547 ))
1548 ::(Line
1549 (PToken ({col = col2 } as other)::restline2
1550 ) as line2)
1551 ::xs
1552 (* when s ==~ regexp_macro *)
1553 ->
1554 let condition =
1555 (col1 = col2 &&
1556 (match other.tok with
1557 | TOBrace _ -> false (* otherwise would match funcdecl *)
1558 | TCBrace _ when ctx <> InFunction -> false
1559 | TPtVirg _
1560 | TDotDot _
1561 -> false
1562 | tok when TH.is_binary_operator tok -> false
1563
1564 | _ -> true
1565 )
1566 )
1567 ||
1568 (col2 <= col1 &&
1569 (match other.tok with
1570 | TCBrace _ when ctx = InFunction -> true
1571 | Treturn _ -> true
1572 | Tif _ -> true
1573 | Telse _ -> true
1574
1575 | _ -> false
1576 )
1577 )
1578
1579 in
1580
1581 if condition
1582 then
1583 if col1 = 0 then ()
1584 else begin
1585 msg_macro_noptvirg s;
1586 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1587 [Parenthised (xxs, info_parens)] +>
1588 iter_token_paren (set_as_comment Ast_c.CppMacro);
1589 end;
1590
1591 find_macro_lineparen (line2::xs)
1592
1593 (* linuxext:? single macro
1594 * ex: LOCK
1595 * foo();
1596 * UNLOCK
1597 *)
1598 | (Line
1599 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1600 ] as _line1
1601 ))
1602 ::(Line
1603 (PToken ({col = col2 } as other)::restline2
1604 ) as line2)
1605 ::xs ->
1606 (* when s ==~ regexp_macro *)
1607
1608 let condition =
1609 (col1 = col2 &&
1610 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1611 (match other.tok with
1612 | TPtVirg _ -> false
1613 | TOr _ -> false
1614 | TCBrace _ when ctx <> InFunction -> false
1615 | tok when TH.is_binary_operator tok -> false
1616
1617 | _ -> true
1618 )) ||
1619 (col2 <= col1 &&
1620 (match other.tok with
1621 | TCBrace _ when ctx = InFunction -> true
1622 | Treturn _ -> true
1623 | Tif _ -> true
1624 | Telse _ -> true
1625 | _ -> false
1626 ))
1627 in
1628
1629 if condition
1630 then begin
1631 msg_macro_noptvirg_single s;
1632 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1633 end;
1634 find_macro_lineparen (line2::xs)
1635
1636 | x::xs ->
1637 find_macro_lineparen xs
1638
1639
1640
1641 (* ------------------------------------------------------------------------- *)
1642 (* define tobrace init *)
1643 (* ------------------------------------------------------------------------- *)
1644
1645 let rec find_define_init_brace_paren xs =
1646 let rec aux xs =
1647 match xs with
1648 | [] -> ()
1649
1650 (* mainly for firefox *)
1651 | (PToken {tok = TDefine _})
1652 ::(PToken {tok = TIdentDefine (s,_)})
1653 ::(PToken ({tok = TOBrace i1} as tokbrace))
1654 ::(PToken tok2)
1655 ::(PToken tok3)
1656 ::xs ->
1657 let is_init =
1658 match tok2.tok, tok3.tok with
1659 | TInt _, TComma _ -> true
1660 | TString _, TComma _ -> true
1661 | TIdent _, TComma _ -> true
1662 | _ -> false
1663
1664 in
1665 if is_init
1666 then begin
1667 pr2_cpp("found define initializer: " ^s);
1668 tokbrace.tok <- TOBraceDefineInit i1;
1669 end;
1670
1671 aux xs
1672
1673 (* mainly for linux, especially in sound/ *)
1674 | (PToken {tok = TDefine _})
1675 ::(PToken {tok = TIdentDefine (s,_)})
1676 ::(Parenthised(xxx, info_parens))
1677 ::(PToken ({tok = TOBrace i1} as tokbrace))
1678 ::(PToken tok2)
1679 ::(PToken tok3)
1680 ::xs ->
1681 let is_init =
1682 match tok2.tok, tok3.tok with
1683 | TInt _, TComma _ -> true
1684 | TDot _, TIdent _ -> true
1685 | TIdent _, TComma _ -> true
1686 | _ -> false
1687
1688 in
1689 if is_init
1690 then begin
1691 pr2_cpp("found define initializer with param: " ^ s);
1692 tokbrace.tok <- TOBraceDefineInit i1;
1693 end;
1694
1695 aux xs
1696
1697
1698
1699 (* recurse *)
1700 | (PToken x)::xs -> aux xs
1701 | (Parenthised (xxs, info_parens))::xs ->
1702 (* not need for tobrace init:
1703 * xxs +> List.iter aux;
1704 *)
1705 aux xs
1706 in
1707 aux xs
1708
1709
1710 (* ------------------------------------------------------------------------- *)
1711 (* action *)
1712 (* ------------------------------------------------------------------------- *)
1713
1714 let rec find_actions = function
1715 | [] -> ()
1716
1717 | PToken ({tok = TIdent (s,ii)})
1718 ::Parenthised (xxs,info_parens)
1719 ::xs ->
1720 find_actions xs;
1721 xxs +> List.iter find_actions;
1722 let modified = find_actions_params xxs in
1723 if modified
1724 then msg_macro_higher_order s
1725
1726 | x::xs ->
1727 find_actions xs
1728
1729 and find_actions_params xxs =
1730 xxs +> List.fold_left (fun acc xs ->
1731 let toks = tokens_of_paren xs in
1732 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1733 then begin
1734 xs +> iter_token_paren (fun x ->
1735 if TH.is_eof x.tok
1736 then
1737 (* certainly because paren detection had a pb because of
1738 * some ifdef-exp
1739 *)
1740 pr2 "PB: wierd, I try to tag an EOF token as action"
1741 else
1742 x.tok <- TAction (TH.info_of_tok x.tok);
1743 );
1744 true (* modified *)
1745 end
1746 else acc
1747 ) false
1748
1749
1750
1751 (* ------------------------------------------------------------------------- *)
1752 (* main fix cpp function *)
1753 (* ------------------------------------------------------------------------- *)
1754
1755 let rebuild_tokens_extented toks_ext =
1756 let _tokens = ref [] in
1757 toks_ext +> List.iter (fun tok ->
1758 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
1759 push2 tok.tok _tokens
1760 );
1761 let tokens = List.rev !_tokens in
1762 (tokens +> acc_map mk_token_extended)
1763
1764 let filter_cpp_stuff xs =
1765 let rec aux xs =
1766 match xs with
1767 | [] -> []
1768 | x::xs ->
1769 (match x.tok with
1770 | tok when TH.is_comment tok -> aux xs
1771 (* don't want drop the define, or if drop, have to drop
1772 * also its body otherwise the line heuristics may be lost
1773 * by not finding the TDefine in column 0 but by finding
1774 * a TDefineIdent in a column > 0
1775 *)
1776 | Parser_c.TDefine _ ->
1777 x::aux xs
1778 | tok when TH.is_cpp_instruction tok -> aux xs
1779 | _ -> x::aux xs
1780 )
1781 in
1782 aux xs
1783
1784 let insert_virtual_positions l =
1785 let strlen x = String.length (Ast_c.str_of_info x) in
1786 let rec loop prev offset = function
1787 [] -> []
1788 | x::xs ->
1789 let ii = TH.info_of_tok x in
1790 let inject pi =
1791 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1792 match Ast_c.pinfo_of_info ii with
1793 Ast_c.OriginTok pi ->
1794 let prev = Ast_c.parse_info_of_info ii in
1795 x::(loop prev (strlen ii) xs)
1796 | Ast_c.ExpandedTok (pi,_) ->
1797 inject (Ast_c.ExpandedTok (pi,(prev,offset))) ::
1798 (loop prev (offset + (strlen ii)) xs)
1799 | Ast_c.FakeTok (s,_) ->
1800 inject (Ast_c.FakeTok (s,(prev,offset))) ::
1801 (loop prev (offset + (strlen ii)) xs)
1802 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1803 let rec skip_fake = function
1804 [] -> []
1805 | x::xs ->
1806 let ii = TH.info_of_tok x in
1807 match Ast_c.pinfo_of_info ii with
1808 Ast_c.OriginTok pi ->
1809 let prev = Ast_c.parse_info_of_info ii in
1810 x::(loop prev (strlen ii) xs)
1811 | _ -> x::skip_fake xs in
1812 skip_fake l
1813
1814 (* ------------------------------------------------------------------------- *)
1815 let fix_tokens_cpp2 tokens =
1816 let tokens2 = ref (tokens +> acc_map mk_token_extended) in
1817
1818 begin
1819 (* the order is important, if you put the action heuristic first,
1820 * then because of ifdef, can have not closed paren
1821 * and so may believe that higher order macro
1822 * and it will eat too much tokens. So important to do
1823 * first the ifdef.
1824 *
1825 * I recompute multiple times cleaner cos the mutable
1826 * can have be changed and so may have more comments
1827 * in the token original list.
1828 *
1829 *)
1830
1831 (* ifdef *)
1832 let cleaner = !tokens2 +> List.filter (fun x ->
1833 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1834 ) in
1835 let ifdef_grouped = mk_ifdef cleaner in
1836 set_ifdef_parenthize_info ifdef_grouped;
1837
1838 find_ifdef_funheaders ifdef_grouped;
1839 find_ifdef_bool ifdef_grouped;
1840 find_ifdef_mid ifdef_grouped;
1841 adjust_inifdef_include ifdef_grouped;
1842
1843
1844 (* macro 1 *)
1845 let cleaner = !tokens2 +> filter_cpp_stuff in
1846
1847 let paren_grouped = mk_parenthised cleaner in
1848 apply_macro_defs paren_grouped;
1849 (* because the before field is used by apply_macro_defs *)
1850 tokens2 := rebuild_tokens_extented !tokens2;
1851
1852 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1853 * that after the "ifdef-simplification" phase.
1854 *)
1855 let cleaner = !tokens2 +> List.filter (fun x ->
1856 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1857 ) in
1858
1859 let brace_grouped = mk_braceised cleaner in
1860 set_context_tag brace_grouped;
1861
1862
1863
1864 (* macro *)
1865 let cleaner = !tokens2 +> filter_cpp_stuff in
1866
1867 let paren_grouped = mk_parenthised cleaner in
1868 let line_paren_grouped = mk_line_parenthised paren_grouped in
1869 find_define_init_brace_paren paren_grouped;
1870 find_string_macro_paren paren_grouped;
1871 find_macro_lineparen line_paren_grouped;
1872 find_macro_paren paren_grouped;
1873
1874
1875 (* actions *)
1876 let cleaner = !tokens2 +> filter_cpp_stuff in
1877 let paren_grouped = mk_parenthised cleaner in
1878 find_actions paren_grouped;
1879
1880
1881 insert_virtual_positions (!tokens2 +> acc_map (fun x -> x.tok))
1882 end
1883
1884 let time_hack1 a =
1885 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 a)
1886
1887 let fix_tokens_cpp a =
1888 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 a)
1889
1890
1891
1892
1893 (*****************************************************************************)
1894 (* The #define tricks *)
1895 (*****************************************************************************)
1896
1897 (* ugly hack, a better solution perhaps would be to erase TDefEOL
1898 * from the Ast and list of tokens in parse_c.
1899 *
1900 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
1901 *
1902 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
1903 * is where is the \n TCommentSpace. Normally there is always a last token
1904 * to synchronize on, either EOF or the token of the next toplevel.
1905 * In the case of the #define we got in list of token
1906 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
1907 * not synchronize on it and so we will not print the "\n".
1908 * A solution would be to put the TDefEOL before the "\n".
1909 *
1910 * todo?: could put a ExpandedTok for that ?
1911 *)
1912 let mark_end_define ii =
1913 let ii' =
1914 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
1915 Common.str = "";
1916 Common.charpos = Ast_c.pos_of_info ii + 1
1917 };
1918 cocci_tag = ref Ast_c.emptyAnnot;
1919 comments_tag = ref Ast_c.emptyComments;
1920 }
1921 in
1922 TDefEOL (ii')
1923
1924 (* put the TDefEOL at the good place *)
1925 let rec define_line_1 acc xs =
1926 match xs with
1927 | [] -> List.rev acc
1928 | TDefine ii::xs ->
1929 let line = Ast_c.line_of_info ii in
1930 let acc = (TDefine ii) :: acc in
1931 define_line_2 acc line ii xs
1932 | TCppEscapedNewline ii::xs ->
1933 pr2 "WIERD: a \\ outside a #define";
1934 let acc = (TCommentSpace ii) :: acc in
1935 define_line_1 acc xs
1936 | x::xs -> define_line_1 (x::acc) xs
1937
1938 and define_line_2 acc line lastinfo xs =
1939 match xs with
1940 | [] ->
1941 (* should not happened, should meet EOF before *)
1942 pr2 "PB: WIERD";
1943 List.rev (mark_end_define lastinfo::acc)
1944 | x::xs ->
1945 let line' = TH.line_of_tok x in
1946 let info = TH.info_of_tok x in
1947
1948 (match x with
1949 | EOF ii ->
1950 let acc = (mark_end_define lastinfo) :: acc in
1951 let acc = (EOF ii) :: acc in
1952 define_line_1 acc xs
1953 | TCppEscapedNewline ii ->
1954 if (line' <> line) then pr2 "PB: WIERD: not same line number";
1955 let acc = (TCommentSpace ii) :: acc in
1956 define_line_2 acc (line+1) info xs
1957 | x ->
1958 if line' = line
1959 then define_line_2 (x::acc) line info xs
1960 else define_line_1 (mark_end_define lastinfo::acc) (x::xs)
1961 )
1962
1963 let rec define_ident acc xs =
1964 match xs with
1965 | [] -> List.rev acc
1966 | TDefine ii::xs ->
1967 let acc = TDefine ii :: acc in
1968 (match xs with
1969 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
1970 (* Change also the kind of TIdent to avoid bad interaction
1971 * with other parsing_hack tricks. For instant if keep TIdent then
1972 * the stringication algo can believe the TIdent is a string-macro.
1973 * So simpler to change the kind of the ident too.
1974 *)
1975 (* if TOParDefine sticked to the ident, then
1976 * it's a macro-function. Change token to avoid ambiguity
1977 * between #define foo(x) and #define foo (x)
1978 *)
1979 let acc = (TCommentSpace i1) :: acc in
1980 let acc = (TIdentDefine (s,i2)) :: acc in
1981 let acc = (TOParDefine i3) :: acc in
1982 define_ident acc xs
1983 | TCommentSpace i1::TIdent (s,i2)::xs ->
1984 let acc = (TCommentSpace i1) :: acc in
1985 let acc = (TIdentDefine (s,i2)) :: acc in
1986 define_ident acc xs
1987 | _ ->
1988 pr2 "WIERD: wierd #define body";
1989 define_ident acc xs
1990 )
1991 | x::xs ->
1992 let acc = x :: acc in
1993 define_ident acc xs
1994
1995
1996
1997 let fix_tokens_define2 xs =
1998 define_ident [] (define_line_1 [] xs)
1999
2000 let fix_tokens_define a =
2001 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
2002
2003
2004 (*****************************************************************************)
2005 (* for the cpp-builtin, standard.h, part 0 *)
2006 (*****************************************************************************)
2007
2008 let macro_body_to_maybe_hint body =
2009 match body with
2010 | [] -> DefineBody body
2011 | [TIdent (s,i1)] ->
2012 (match parsinghack_hint_of_string s with
2013 | Some hint -> DefineHint hint
2014 | None -> DefineBody body
2015 )
2016 | xs -> DefineBody body
2017
2018
2019 let rec define_parse xs =
2020 match xs with
2021 | [] -> []
2022 | TDefine i1::TIdentDefine (s,i2)::TOParDefine i3::xs ->
2023 let (tokparams, _, xs) =
2024 xs +> Common.split_when (function TCPar _ -> true | _ -> false) in
2025 let (body, _, xs) =
2026 xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in
2027 let params =
2028 tokparams +> Common.map_filter (function
2029 | TComma _ -> None
2030 | TIdent (s, _) -> Some s
2031 | x -> error_cant_have x
2032 ) in
2033 let body = body +> List.map
2034 (TH.visitor_info_of_tok Ast_c.make_expanded) in
2035 let def = (s, (s, Params params, macro_body_to_maybe_hint body)) in
2036 def::define_parse xs
2037
2038 | TDefine i1::TIdentDefine (s,i2)::xs ->
2039 let (body, _, xs) =
2040 xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in
2041 let body = body +> List.map
2042 (TH.visitor_info_of_tok Ast_c.make_expanded) in
2043 let def = (s, (s, NoParam, macro_body_to_maybe_hint body)) in
2044 def::define_parse xs
2045
2046 | TDefine i1::_ ->
2047 pr2_gen i1;
2048 raise Impossible
2049 | x::xs -> define_parse xs
2050
2051
2052 let extract_cpp_define xs =
2053 let cleaner = xs +> List.filter (fun x ->
2054 not (TH.is_comment x)
2055 ) in
2056 define_parse cleaner
2057
2058
2059
2060
2061 (*****************************************************************************)
2062 (* Lexing with lookahead *)
2063 (*****************************************************************************)
2064
2065 (* Why using yet another parsing_hack technique ? The fix_xxx where do
2066 * some pre-processing on the full list of tokens is not enough ?
2067 * No cos sometimes we need more contextual info, and even if
2068 * set_context() tries to give some contextual info, it's not completely
2069 * accurate so the following code give yet another alternative, yet another
2070 * chance to transform some tokens.
2071 *
2072 * todo?: maybe could try to get rid of this technique. Maybe a better
2073 * set_context() would make possible to move this code using a fix_xx
2074 * technique.
2075 *
2076 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
2077 * it is more general to do it via my LALR(k) tech. Because here we can
2078 * transform some token give some context information. So sometimes it
2079 * makes sense to transform a token in one context, sometimes not, and
2080 * lex can not provide us this context information. Note that the order
2081 * in the pattern matching in lookahead is important. Do not cut/paste.
2082 *
2083 * Note that in next there is only "clean" tokens, there is no comment
2084 * or space tokens. This is done by the caller.
2085 *
2086 *)
2087
2088 open Lexer_parser (* for the fields of lexer_hint type *)
2089
2090 let not_struct_enum = function
2091 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
2092 | _ -> true
2093
2094
2095 let lookahead2 ~pass next before =
2096
2097 match (next, before) with
2098
2099 (*-------------------------------------------------------------*)
2100 (* typedef inference, parse_typedef_fix3 *)
2101 (*-------------------------------------------------------------*)
2102 (* xx xx *)
2103 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s = s2
2104 && ok_typedef s
2105 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
2106 ->
2107 (* parse_typedef_fix3:
2108 * acpi_object acpi_object;
2109 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
2110 * Le parser en interne a deja appelé le prochain token pour pouvoir
2111 * decider des choses.
2112 * => special case in lexer_heuristic, again
2113 *)
2114 if !Flag_parsing_c.debug_typedef
2115 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
2116
2117 LP.disable_typedef();
2118
2119 msg_typedef s; LP.add_typedef_root s;
2120 TypedefIdent (s, i1)
2121
2122 (* xx yy *)
2123 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
2124 && ok_typedef s
2125 ->
2126 (* && not_annot s2 BUT lead to false positive*)
2127
2128 msg_typedef s; LP.add_typedef_root s;
2129 TypedefIdent (s, i1)
2130
2131
2132 (* xx inline *)
2133 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
2134 && ok_typedef s
2135 ->
2136 msg_typedef s; LP.add_typedef_root s;
2137 TypedefIdent (s, i1)
2138
2139
2140 (* [,(] xx [,)] AND param decl *)
2141 | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ )
2142 when not_struct_enum before && (LP.current_context() = LP.InParameter)
2143 && ok_typedef s
2144 ->
2145 msg_typedef s; LP.add_typedef_root s;
2146 TypedefIdent (s, i1)
2147
2148 (* xx* [,)] *)
2149 (* specialcase: [,(] xx* [,)] *)
2150 | (TIdent (s, i1)::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
2151 when not_struct_enum before
2152 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2153 && ok_typedef s
2154 ->
2155 msg_typedef s; LP.add_typedef_root s;
2156 TypedefIdent (s, i1)
2157
2158
2159 (* xx** [,)] *)
2160 (* specialcase: [,(] xx** [,)] *)
2161 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
2162 when not_struct_enum before
2163 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2164 && ok_typedef s
2165 ->
2166 msg_typedef s; LP.add_typedef_root s;
2167 TypedefIdent (s, i1)
2168
2169
2170
2171 (* xx const * USELESS because of next rule ? *)
2172 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
2173 when not_struct_enum before
2174 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2175 && ok_typedef s
2176 ->
2177
2178 msg_typedef s; LP.add_typedef_root s;
2179 TypedefIdent (s, i1)
2180
2181 (* xx const *)
2182 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
2183 when not_struct_enum before
2184 && ok_typedef s
2185 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2186 ->
2187
2188 msg_typedef s; LP.add_typedef_root s;
2189 TypedefIdent (s, i1)
2190
2191
2192 (* xx * const *)
2193 | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ )
2194 when not_struct_enum before
2195 && ok_typedef s
2196 ->
2197 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2198
2199 msg_typedef s; LP.add_typedef_root s;
2200 TypedefIdent (s, i1)
2201
2202
2203 (* ( const xx) *)
2204 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
2205 ok_typedef s ->
2206 msg_typedef s; LP.add_typedef_root s;
2207 TypedefIdent (s, i1)
2208
2209
2210
2211 (* ( xx ) [sizeof, ~] *)
2212 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
2213 when not_struct_enum before
2214 && ok_typedef s
2215 ->
2216 msg_typedef s; LP.add_typedef_root s;
2217 TypedefIdent (s, i1)
2218
2219 (* [(,] xx [ AND parameterdeclaration *)
2220 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
2221 when (LP.current_context() = LP.InParameter)
2222 && ok_typedef s
2223 ->
2224 msg_typedef s; LP.add_typedef_root s;
2225 TypedefIdent (s, i1)
2226
2227 (*------------------------------------------------------------*)
2228 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
2229 (* but if have a '=', or ',' I think not *)
2230 (*------------------------------------------------------------*)
2231
2232 (* static xx * yy *)
2233 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ ,
2234 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
2235 ok_typedef s
2236 ->
2237 msg_typedef s; LP.add_typedef_root s;
2238 TypedefIdent (s, i1)
2239
2240 (* TODO xx * yy ; AND in start of compound element *)
2241
2242
2243 (* xx * yy, AND in paramdecl *)
2244 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
2245 when not_struct_enum before && (LP.current_context() = LP.InParameter)
2246 && ok_typedef s
2247 ->
2248
2249 msg_typedef s; LP.add_typedef_root s;
2250 TypedefIdent (s, i1)
2251
2252
2253 (* xx * yy ; AND in Toplevel, except when have = before *)
2254 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
2255 TIdent (s, i1)
2256 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _)
2257 when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ()))
2258 ->
2259 msg_typedef s; LP.add_typedef_root s;
2260 TypedefIdent (s, i1)
2261
2262 (* xx * yy , AND in Toplevel *)
2263 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
2264 when not_struct_enum before && (LP.current_context () = LP.InTopLevel)
2265 && ok_typedef s
2266 ->
2267
2268 msg_typedef s; LP.add_typedef_root s;
2269 TypedefIdent (s, i1)
2270
2271 (* xx * yy ( AND in Toplevel *)
2272 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _)
2273 when not_struct_enum before
2274 && (LP.is_top_or_struct (LP.current_context ()))
2275 && ok_typedef s
2276 ->
2277 msg_typedef s; LP.add_typedef_root s;
2278 TypedefIdent (s, i1)
2279
2280 (* xx * yy [ *)
2281 (* todo? enough ? cos in struct def we can have some expression ! *)
2282 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _)
2283 when not_struct_enum before &&
2284 (LP.is_top_or_struct (LP.current_context ()))
2285 && ok_typedef s
2286 ->
2287 msg_typedef s; LP.add_typedef_root s;
2288 TypedefIdent (s, i1)
2289
2290 (* u16: 10; in struct *)
2291 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
2292 when (LP.is_top_or_struct (LP.current_context ()))
2293 && ok_typedef s
2294 ->
2295 msg_typedef s; LP.add_typedef_root s;
2296 TypedefIdent (s, i1)
2297
2298
2299 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
2300 (* YES cos at toplevel can have some expression !! for instance when *)
2301 (* enter in the dimension of an array *)
2302 (*
2303 | (TIdent s::TMul::TIdent s2::_ , _)
2304 when (take_safe 1 !passed_tok <> [Tstruct] &&
2305 (take_safe 1 !passed_tok <> [Tenum]))
2306 &&
2307 !LP._lexer_hint = Some LP.Toplevel ->
2308 msg_typedef s;
2309 LP.add_typedef_root s;
2310 TypedefIdent s
2311 *)
2312
2313 (* xx * yy = *)
2314 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _)
2315 when not_struct_enum before
2316 && ok_typedef s
2317 ->
2318 msg_typedef s; LP.add_typedef_root s;
2319 TypedefIdent (s, i1)
2320
2321
2322 (* xx * yy) AND in paramdecl *)
2323 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _)
2324 when not_struct_enum before && (LP.current_context () = LP.InParameter)
2325 && ok_typedef s
2326 ->
2327 msg_typedef s; LP.add_typedef_root s;
2328 TypedefIdent (s, i1)
2329
2330
2331 (* xx * yy; *) (* wrong ? *)
2332 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ ,
2333 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
2334 && ok_typedef s
2335 ->
2336 msg_typedef s; LP.add_typedef_root s;
2337 msg_maybe_dangereous_typedef s;
2338 TypedefIdent (s, i1)
2339
2340
2341 (* xx * yy, and ';' before xx *) (* wrong ? *)
2342 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ ,
2343 (TOBrace _| TPtVirg _)::_) when
2344 ok_typedef s
2345 ->
2346 msg_typedef s; LP.add_typedef_root s;
2347 TypedefIdent (s, i1)
2348
2349
2350 (* xx_t * yy *)
2351 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _)
2352 when s ==~ regexp_typedef && not_struct_enum before
2353 (* struct user_info_t sometimes *)
2354 && ok_typedef s
2355 ->
2356 msg_typedef s; LP.add_typedef_root s;
2357 TypedefIdent (s, i1)
2358
2359 (* xx ** yy *) (* wrong ? *)
2360 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2361 when not_struct_enum before
2362 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2363 && ok_typedef s
2364 ->
2365 msg_typedef s; LP.add_typedef_root s;
2366 TypedefIdent (s, i1)
2367
2368 (* xx *** yy *)
2369 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2370 when not_struct_enum before
2371 && ok_typedef s
2372 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2373 ->
2374 msg_typedef s; LP.add_typedef_root s;
2375 TypedefIdent (s, i1)
2376
2377 (* xx ** ) *)
2378 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
2379 when not_struct_enum before
2380 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2381 && ok_typedef s
2382 ->
2383 msg_typedef s; LP.add_typedef_root s;
2384 TypedefIdent (s, i1)
2385
2386
2387
2388 (* ----------------------------------- *)
2389 (* old: why not do like for other rules and start with TIdent ?
2390 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
2391 * new: prefer now start with TIdent because otherwise the add_typedef_root
2392 * may have no effect if in second pass or if have disable the add_typedef.
2393 *)
2394
2395 (* (xx) yy *)
2396 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
2397 (TOPar info)::x::_)
2398 when not (TH.is_stuff_taking_parenthized x) &&
2399 Ast_c.line_of_info i2 = Ast_c.line_of_info i3
2400 && ok_typedef s
2401 ->
2402
2403 msg_typedef s; LP.add_typedef_root s;
2404 (*TOPar info*)
2405 TypedefIdent (s, i1)
2406
2407
2408 (* (xx) ( yy) *)
2409 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2410 when not (TH.is_stuff_taking_parenthized x)
2411 && ok_typedef s
2412 ->
2413 msg_typedef s; LP.add_typedef_root s;
2414 (* TOPar info *)
2415 TypedefIdent (s, i1)
2416
2417 (* (xx * ) yy *)
2418 | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when
2419 ok_typedef s
2420 ->
2421 msg_typedef s; LP.add_typedef_root s;
2422 (*TOPar info*)
2423 TypedefIdent (s,i1)
2424
2425
2426 (* (xx){ ... } constructor *)
2427 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
2428 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2429 && ok_typedef s
2430 ->
2431 msg_typedef s; LP.add_typedef_root s;
2432 TypedefIdent (s, i1)
2433
2434
2435 (* can have sizeof on expression
2436 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
2437 msg_typedef s;
2438 LP.add_typedef_root s;
2439 Tsizeof
2440 *)
2441 (* x ( *y )(params), function pointer *)
2442 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2443 when not_struct_enum before
2444 && ok_typedef s
2445 ->
2446 msg_typedef s; LP.add_typedef_root s;
2447 TypedefIdent (s, i1)
2448
2449
2450 (*-------------------------------------------------------------*)
2451 (* CPP *)
2452 (*-------------------------------------------------------------*)
2453 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2454 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
2455 as x)
2456 ::_, _
2457 ->
2458 (*
2459 if not !Flag_parsing_c.ifdef_to_if
2460 then TCommentCpp (Ast_c.CppDirective, ii)
2461 else
2462 *)
2463 (* not !LP._lexer_hint.toplevel *)
2464 if !Flag_parsing_c.ifdef_directive_passing
2465 || (pass = 2)
2466 then begin
2467
2468 if (LP.current_context () = LP.InInitializer)
2469 then begin
2470 pr2 "In Initializer passing"; (* cheat: dont count in stat *)
2471 incr Stat.nIfdefInitializer;
2472
2473 end
2474 else msg_ifdef_passing ()
2475 ;
2476
2477 TCommentCpp (Ast_c.CppDirective, ii)
2478 end
2479 else x
2480
2481 | (TUndef (id, ii) as x)::_, _
2482 ->
2483 if (pass = 2)
2484 then begin
2485 pr2_once ("CPP-UNDEF: I treat it as comment");
2486 TCommentCpp (Ast_c.CppDirective, ii)
2487 end
2488 else x
2489
2490 (* If ident contain a for_each, then certainly a macro. But to be
2491 * sure should look if there is a '{' after the ')', but it requires
2492 * to count the '('. Because this can be expensive, we do that only
2493 * when the token contains "for_each".
2494 *)
2495 | (TIdent (s, i1)::TOPar _::rest, _) when not (LP.current_context () = LP.InTopLevel)
2496 (* otherwise a function such as static void loopback_enable(int i) {
2497 * will be considered as a loop
2498 *)
2499 ->
2500
2501
2502 if s ==~ regexp_foreach &&
2503 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
2504
2505 then begin
2506 msg_foreach s;
2507 TMacroIterator (s, i1)
2508 end
2509 else TIdent (s, i1)
2510
2511
2512
2513 (*-------------------------------------------------------------*)
2514 | v::xs, _ -> v
2515 | _ -> raise Impossible
2516
2517 let lookahead ~pass a b =
2518 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
2519
2520