866463b1500a5f1b9f22c44202f02646ec6732fd
[bpt/coccinelle.git] / parsing_c / parsing_hacks.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15 open Common
16
17 module TH = Token_helpers
18 module LP = Lexer_parser
19
20 module Stat = Parsing_stat
21
22 open Parser_c
23
24 (*****************************************************************************)
25 (* Some debugging functions *)
26 (*****************************************************************************)
27
28 let pr2 s =
29 if !Flag_parsing_c.verbose_parsing
30 then Common.pr2 s
31
32 let pr2_once s =
33 if !Flag_parsing_c.verbose_parsing
34 then Common.pr2_once s
35
36 let pr2_cpp s =
37 if !Flag_parsing_c.debug_cpp
38 then Common.pr2_once ("CPP-" ^ s)
39
40
41 let msg_gen cond is_known printer s =
42 if cond
43 then
44 if not (!Flag_parsing_c.filter_msg)
45 then printer s
46 else
47 if not (is_known s)
48 then printer s
49
50
51 (* In the following, there are some harcoded names of types or macros
52 * but they are not used by our heuristics! They are just here to
53 * enable to detect false positive by printing only the typedef/macros
54 * that we don't know yet. If we print everything, then we can easily
55 * get lost with too much verbose tracing information. So those
56 * functions "filter" some messages. So our heuristics are still good,
57 * there is no more (or not that much) hardcoded linux stuff.
58 *)
59
60 let is_known_typdef =
61 (fun s ->
62 (match s with
63 | "u_char" | "u_short" | "u_int" | "u_long"
64 | "u8" | "u16" | "u32" | "u64"
65 | "s8" | "s16" | "s32" | "s64"
66 | "__u8" | "__u16" | "__u32" | "__u64"
67 -> true
68
69 | "acpi_handle"
70 | "acpi_status"
71 -> true
72
73 | "FILE"
74 | "DIR"
75 -> true
76
77 | s when s =~ ".*_t$" -> true
78 | _ -> false
79 )
80 )
81
82 (* note: cant use partial application with let msg_typedef =
83 * because it would compute msg_typedef at compile time when
84 * the flag debug_typedef is always false
85 *)
86 let msg_typedef s =
87 incr Stat.nTypedefInfer;
88 msg_gen (!Flag_parsing_c.debug_typedef)
89 is_known_typdef
90 (fun s ->
91 pr2_cpp ("TYPEDEF: promoting: " ^ s)
92 )
93 s
94
95 let msg_maybe_dangereous_typedef s =
96 if not (is_known_typdef s)
97 then
98 pr2
99 ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s)
100
101
102
103 let msg_declare_macro s =
104 incr Stat.nMacroDecl;
105 msg_gen (!Flag_parsing_c.debug_cpp)
106 (fun s ->
107 (match s with
108 | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM"
109 | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD"
110 | "DEFINE_SPINLOCK" | "DEFINE_TIMER"
111 | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR"
112 | "SENSOR_DEVICE_ATTR"
113 | "LIST_HEAD"
114 | "DECLARE_WORK" | "DECLARE_TASKLET"
115 | "PORT_ATTR_RO" | "PORT_PMA_ATTR"
116 | "DECLARE_BITMAP"
117
118 -> true
119 (*
120 | s when s =~ "^DECLARE_.*" -> true
121 | s when s =~ ".*_ATTR$" -> true
122 | s when s =~ "^DEFINE_.*" -> true
123 *)
124
125 | _ -> false
126 )
127 )
128 (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s))
129 s
130
131
132 let msg_foreach s =
133 incr Stat.nIteratorHeuristic;
134 pr2_cpp ("MACRO: found foreach: " ^ s)
135
136
137 (* ??
138 let msg_debug_macro s =
139 pr2_cpp ("MACRO: found debug-macro: " ^ s)
140 *)
141
142
143 let msg_macro_noptvirg s =
144 incr Stat.nMacroStmt;
145 pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s)
146
147 let msg_macro_toplevel_noptvirg s =
148 incr Stat.nMacroStmt;
149 pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s)
150
151 let msg_macro_noptvirg_single s =
152 incr Stat.nMacroStmt;
153 pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s)
154
155
156
157
158 let msg_macro_higher_order s =
159 incr Stat.nMacroHigherOrder;
160 msg_gen (!Flag_parsing_c.debug_cpp)
161 (fun s ->
162 (match s with
163 | "DBGINFO"
164 | "DBGPX"
165 | "DFLOW"
166 -> true
167 | _ -> false
168 )
169 )
170 (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s))
171 s
172
173
174 let msg_stringification s =
175 incr Stat.nMacroString;
176 msg_gen (!Flag_parsing_c.debug_cpp)
177 (fun s ->
178 (match s with
179 | "REVISION"
180 | "UTS_RELEASE"
181 | "SIZE_STR"
182 | "DMA_STR"
183 -> true
184 (* s when s =~ ".*STR.*" -> true *)
185 | _ -> false
186 )
187 )
188 (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s))
189 s
190
191 let msg_stringification_params s =
192 incr Stat.nMacroString;
193 pr2_cpp ("MACRO: string-macro with params : " ^ s)
194
195
196
197 let msg_apply_known_macro s =
198 incr Stat.nMacroExpand;
199 pr2_cpp ("MACRO: found known macro = " ^ s)
200
201 let msg_apply_known_macro_hint s =
202 incr Stat.nMacroHint;
203 pr2_cpp ("MACRO: found known macro hint = " ^ s)
204
205
206
207
208 let msg_ifdef_bool_passing is_ifdef_positif =
209 incr Stat.nIfdefZero; (* of Version ? *)
210 if is_ifdef_positif
211 then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION"
212 else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus"
213
214
215 let msg_ifdef_mid_something () =
216 incr Stat.nIfdefExprPassing;
217 pr2_cpp "found ifdef-mid-something"
218
219 let msg_ifdef_funheaders () =
220 incr Stat.nIfdefFunheader;
221 ()
222
223
224 let msg_attribute s =
225 incr Stat.nMacroAttribute;
226 pr2_cpp("ATTR:" ^ s)
227
228
229
230 (*****************************************************************************)
231 (* The regexp and basic view definitions *)
232 (*****************************************************************************)
233
234 (* opti: better to built then once and for all, especially regexp_foreach *)
235
236 let regexp_macro = Str.regexp
237 "^[A-Z_][A-Z_0-9]*$"
238
239 (* linuxext: *)
240 let regexp_annot = Str.regexp
241 "^__.*$"
242
243 (* linuxext: *)
244 let regexp_declare = Str.regexp
245 ".*DECLARE.*"
246
247 (* linuxext: *)
248 let regexp_foreach = Str.regexp_case_fold
249 ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)"
250
251 let regexp_typedef = Str.regexp
252 ".*_t$"
253
254 let false_typedef = [
255 "printk";
256 ]
257
258
259 let ok_typedef s = not (List.mem s false_typedef)
260
261 let not_annot s =
262 not (s ==~ regexp_annot)
263
264
265 (* ------------------------------------------------------------------------- *)
266 (* cpp part 1 for standard.h *)
267 (* ------------------------------------------------------------------------- *)
268
269 type define_def = string * define_param * define_body
270 and define_param =
271 | NoParam
272 | Params of string list
273 and define_body =
274 | DefineBody of Parser_c.token list
275 | DefineHint of parsinghack_hint
276
277 and parsinghack_hint =
278 | HintIterator
279 | HintDeclarator
280 | HintMacroString
281 | HintMacroStatement
282 | HintAttribute
283 | HintMacroIdentBuilder
284
285
286 (* cf also data/test.h *)
287 let assoc_hint_string = [
288 "YACFE_ITERATOR" , HintIterator;
289 "YACFE_DECLARATOR" , HintDeclarator;
290 "YACFE_STRING" , HintMacroString;
291 "YACFE_STATEMENT" , HintMacroStatement;
292 "YACFE_ATTRIBUTE" , HintAttribute;
293 "YACFE_IDENT_BUILDER" , HintMacroIdentBuilder;
294
295 "MACROSTATEMENT" , HintMacroStatement; (* backward compatibility *)
296 ]
297
298
299 let (parsinghack_hint_of_string: string -> parsinghack_hint option) = fun s ->
300 Common.assoc_option s assoc_hint_string
301
302 let (is_parsinghack_hint: string -> bool) = fun s ->
303 parsinghack_hint_of_string s <> None
304
305 let (token_from_parsinghack_hint:
306 (string * Ast_c.info) -> parsinghack_hint -> Parser_c.token) =
307 fun (s,ii) hint ->
308 match hint with
309 | HintIterator ->
310 Parser_c.TMacroIterator (s, ii)
311 | HintDeclarator ->
312 Parser_c.TMacroDecl (s, ii)
313 | HintMacroString ->
314 Parser_c.TMacroString (s, ii)
315 | HintMacroStatement ->
316 Parser_c.TMacroStmt (s, ii)
317 | HintAttribute ->
318 Parser_c.TMacroAttr (s, ii)
319 | HintMacroIdentBuilder ->
320 Parser_c.TMacroIdentBuilder (s, ii)
321
322
323
324 let (_defs : (string, define_def) Hashtbl.t ref) =
325 ref (Hashtbl.create 101)
326
327
328 (* ------------------------------------------------------------------------- *)
329 (* fuzzy parsing, different "views" over the same program *)
330 (* ------------------------------------------------------------------------- *)
331
332
333 (* Normally I should not use ref/mutable in the token_extended type
334 * and I should have a set of functions taking a list of tokens and
335 * returning a list of tokens. The problem is that to make easier some
336 * functions, it is better to work on better representation, on "views"
337 * over this list of tokens. But then modifying those views and get
338 * back from those views to the original simple list of tokens is
339 * tedious. One way is to maintain next to the view a list of "actions"
340 * (I was using a hash storing the charpos of the token and associating
341 * the action) but it is tedious too. Simpler to use mutable/ref. We
342 * use the same idea that we use when working on the Ast_c. *)
343
344 (* old: when I was using the list of "actions" next to the views, the hash
345 * indexed by the charpos, there could have been some problems:
346 * how my fake_pos interact with the way I tag and adjust token ?
347 * because I base my tagging on the position of the token ! so sometimes
348 * could tag another fakeInfo that should not be tagged ?
349 * fortunately I don't use anymore this technique.
350 *)
351
352 (* update: quite close to the Place_c.Inxxx *)
353 type context =
354 InFunction | InEnum | InStruct | InInitializer | NoContext
355
356 type token_extended = {
357 mutable tok: Parser_c.token;
358 mutable where: context;
359
360 (* less: need also a after ? *)
361 mutable new_tokens_before : Parser_c.token list;
362
363 (* line x col cache, more easily accessible, of the info in the token *)
364 line: int;
365 col : int;
366 }
367
368 let set_as_comment cppkind x =
369 if TH.is_eof x.tok
370 then () (* otherwise parse_c will be lost if don't find a EOF token *)
371 else
372 x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
373
374 let mk_token_extended x =
375 let (line, col) = TH.linecol_of_tok x in
376 { tok = x;
377 line = line; col = col;
378 where = NoContext;
379 new_tokens_before = [];
380 }
381
382
383 (* x list list, because x list separated by ',' *)
384 type paren_grouped =
385 | Parenthised of paren_grouped list list * token_extended list
386 | PToken of token_extended
387
388 type brace_grouped =
389 | Braceised of
390 brace_grouped list list * token_extended * token_extended option
391 | BToken of token_extended
392
393 (* Far better data structure than doing hacks in the lexer or parser
394 * because in lexer we don't know to which ifdef a endif is related
395 * and so when we want to comment a ifdef, we don't know which endif
396 * we must also comment. Especially true for the #if 0 which sometimes
397 * have a #else part.
398 *
399 * x list list, because x list separated by #else or #elif
400 *)
401 type ifdef_grouped =
402 | Ifdef of ifdef_grouped list list * token_extended list
403 | Ifdefbool of bool * ifdef_grouped list list * token_extended list
404 | NotIfdefLine of token_extended list
405
406
407 type 'a line_grouped =
408 Line of 'a list
409
410
411 type body_function_grouped =
412 | BodyFunction of token_extended list
413 | NotBodyLine of token_extended list
414
415
416 (* ------------------------------------------------------------------------- *)
417 (* view builders *)
418 (* ------------------------------------------------------------------------- *)
419
420 (* todo: synchro ! use more indentation
421 * if paren not closed and same indentation level, certainly because
422 * part of a mid-ifdef-expression.
423 *)
424 let rec mk_parenthised xs =
425 match xs with
426 | [] -> []
427 | x::xs ->
428 (match x.tok with
429 | TOPar _ | TOParDefine _ ->
430 let body, extras, xs = mk_parameters [x] [] xs in
431 Parenthised (body,extras)::mk_parenthised xs
432 | _ ->
433 PToken x::mk_parenthised xs
434 )
435
436 (* return the body of the parenthised expression and the rest of the tokens *)
437 and mk_parameters extras acc_before_sep xs =
438 match xs with
439 | [] ->
440 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
441 pr2 "PB: not found closing paren in fuzzy parsing";
442 [List.rev acc_before_sep], List.rev extras, []
443 | x::xs ->
444 (match x.tok with
445 (* synchro *)
446 | TOBrace _ when x.col =|= 0 ->
447 pr2 "PB: found synchro point } in paren";
448 [List.rev acc_before_sep], List.rev (extras), (x::xs)
449
450 | TCPar _ | TCParEOL _ ->
451 [List.rev acc_before_sep], List.rev (x::extras), xs
452 | TOPar _ | TOParDefine _ ->
453 let body, extrasnest, xs = mk_parameters [x] [] xs in
454 mk_parameters extras
455 (Parenthised (body,extrasnest)::acc_before_sep)
456 xs
457 | TComma _ ->
458 let body, extras, xs = mk_parameters (x::extras) [] xs in
459 (List.rev acc_before_sep)::body, extras, xs
460 | _ ->
461 mk_parameters extras (PToken x::acc_before_sep) xs
462 )
463
464
465
466
467 let rec mk_braceised xs =
468 match xs with
469 | [] -> []
470 | x::xs ->
471 (match x.tok with
472 | TOBrace _ ->
473 let body, endbrace, xs = mk_braceised_aux [] xs in
474 Braceised (body, x, endbrace)::mk_braceised xs
475 | TCBrace _ ->
476 pr2 "PB: found closing brace alone in fuzzy parsing";
477 BToken x::mk_braceised xs
478 | _ ->
479 BToken x::mk_braceised xs
480 )
481
482 (* return the body of the parenthised expression and the rest of the tokens *)
483 and mk_braceised_aux acc xs =
484 match xs with
485 | [] ->
486 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
487 pr2 "PB: not found closing brace in fuzzy parsing";
488 [List.rev acc], None, []
489 | x::xs ->
490 (match x.tok with
491 | TCBrace _ -> [List.rev acc], Some x, xs
492 | TOBrace _ ->
493 let body, endbrace, xs = mk_braceised_aux [] xs in
494 mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
495 | _ ->
496 mk_braceised_aux (BToken x::acc) xs
497 )
498
499
500
501
502 let rec mk_ifdef xs =
503 match xs with
504 | [] -> []
505 | x::xs ->
506 (match x.tok with
507 | TIfdef _ ->
508 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
509 Ifdef (body, extra)::mk_ifdef xs
510 | TIfdefBool (b,_, _) ->
511 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
512
513 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
514 if !Flag_parsing_c.if0_passing
515 then Ifdefbool (b, body, extra)::mk_ifdef xs
516 else Ifdef(body, extra)::mk_ifdef xs
517
518 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
519 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
520 Ifdefbool (b, body, extra)::mk_ifdef xs
521
522
523 | _ ->
524 (* todo? can have some Ifdef in the line ? *)
525 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
526 NotIfdefLine line::mk_ifdef xs
527 )
528
529 and mk_ifdef_parameters extras acc_before_sep xs =
530 match xs with
531 | [] ->
532 (* Note that mk_ifdef is assuming that CPP instruction are alone
533 * on their line. Because I do a span (fun x -> is_same_line ...)
534 * I might take with me a #endif if this one is mixed on a line
535 * with some "normal" tokens.
536 *)
537 pr2 "PB: not found closing ifdef in fuzzy parsing";
538 [List.rev acc_before_sep], List.rev extras, []
539 | x::xs ->
540 (match x.tok with
541 | TEndif _ ->
542 [List.rev acc_before_sep], List.rev (x::extras), xs
543 | TIfdef _ ->
544 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
545 mk_ifdef_parameters
546 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
547
548 | TIfdefBool (b,_,_) ->
549 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
550
551 if !Flag_parsing_c.if0_passing
552 then
553 mk_ifdef_parameters
554 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
555 else
556 mk_ifdef_parameters
557 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
558
559
560 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
561 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
562 mk_ifdef_parameters
563 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
564
565 | TIfdefelse _
566 | TIfdefelif _ ->
567 let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
568 (List.rev acc_before_sep)::body, extras, xs
569 | _ ->
570 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
571 mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
572 )
573
574 (* --------------------------------------- *)
575
576 let line_of_paren = function
577 | PToken x -> x.line
578 | Parenthised (xxs, info_parens) ->
579 (match info_parens with
580 | [] -> raise Impossible
581 | x::xs -> x.line
582 )
583
584
585 let rec span_line_paren line = function
586 | [] -> [],[]
587 | x::xs ->
588 (match x with
589 | PToken tok when TH.is_eof tok.tok ->
590 [], x::xs
591 | _ ->
592 if line_of_paren x =|= line
593 then
594 let (l1, l2) = span_line_paren line xs in
595 (x::l1, l2)
596 else ([], x::xs)
597 )
598
599
600 let rec mk_line_parenthised xs =
601 match xs with
602 | [] -> []
603 | x::xs ->
604 let line_no = line_of_paren x in
605 let line, xs = span_line_paren line_no xs in
606 Line (x::line)::mk_line_parenthised xs
607
608
609 (* --------------------------------------- *)
610 let rec mk_body_function_grouped xs =
611 match xs with
612 | [] -> []
613 | x::xs ->
614 (match x with
615 | {tok = TOBrace _; col = 0} ->
616 let is_closing_brace = function
617 | {tok = TCBrace _; col = 0 } -> true
618 | _ -> false
619 in
620 let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
621 (match xs with
622 | ({tok = TCBrace _; col = 0 })::xs ->
623 BodyFunction body::mk_body_function_grouped xs
624 | [] ->
625 pr2 "PB:not found closing brace in fuzzy parsing";
626 [NotBodyLine body]
627 | _ -> raise Impossible
628 )
629
630 | _ ->
631 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
632 NotBodyLine line::mk_body_function_grouped xs
633 )
634
635
636 (* ------------------------------------------------------------------------- *)
637 (* view iterators *)
638 (* ------------------------------------------------------------------------- *)
639
640 let rec iter_token_paren f xs =
641 xs +> List.iter (function
642 | PToken tok -> f tok;
643 | Parenthised (xxs, info_parens) ->
644 info_parens +> List.iter f;
645 xxs +> List.iter (fun xs -> iter_token_paren f xs)
646 )
647
648 let rec iter_token_brace f xs =
649 xs +> List.iter (function
650 | BToken tok -> f tok;
651 | Braceised (xxs, tok1, tok2opt) ->
652 f tok1; do_option f tok2opt;
653 xxs +> List.iter (fun xs -> iter_token_brace f xs)
654 )
655
656 let rec iter_token_ifdef f xs =
657 xs +> List.iter (function
658 | NotIfdefLine xs -> xs +> List.iter f;
659 | Ifdefbool (_, xxs, info_ifdef)
660 | Ifdef (xxs, info_ifdef) ->
661 info_ifdef +> List.iter f;
662 xxs +> List.iter (iter_token_ifdef f)
663 )
664
665
666
667
668 let tokens_of_paren xs =
669 let g = ref [] in
670 xs +> iter_token_paren (fun tok -> push2 tok g);
671 List.rev !g
672
673
674 let tokens_of_paren_ordered xs =
675 let g = ref [] in
676
677 let rec aux_tokens_ordered = function
678 | PToken tok -> push2 tok g;
679 | Parenthised (xxs, info_parens) ->
680 let (opar, cpar, commas) =
681 match info_parens with
682 | opar::xs ->
683 (match List.rev xs with
684 | cpar::xs ->
685 opar, cpar, List.rev xs
686 | _ -> raise Impossible
687 )
688 | _ -> raise Impossible
689 in
690 push2 opar g;
691 aux_args (xxs,commas);
692 push2 cpar g;
693
694 and aux_args (xxs, commas) =
695 match xxs, commas with
696 | [], [] -> ()
697 | [xs], [] -> xs +> List.iter aux_tokens_ordered
698 | xs::ys::xxs, comma::commas ->
699 xs +> List.iter aux_tokens_ordered;
700 push2 comma g;
701 aux_args (ys::xxs, commas)
702 | _ -> raise Impossible
703
704 in
705
706 xs +> List.iter aux_tokens_ordered;
707 List.rev !g
708
709
710
711
712 (* ------------------------------------------------------------------------- *)
713 (* set the context info in token *)
714 (* ------------------------------------------------------------------------- *)
715
716
717 let rec set_in_function_tag xs =
718 (* could try: ) { } but it can be the ) of a if or while, so
719 * better to base the heuristic on the position in column zero.
720 * Note that some struct or enum or init put also their { in first column
721 * but set_in_other will overwrite the previous InFunction tag.
722 *)
723 match xs with
724 | [] -> ()
725 (* ) { and the closing } is in column zero, then certainly a function *)
726 | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
727 when tok1.col <> 0 && tok2.col =|= 0 ->
728 body +> List.iter (iter_token_brace (fun tok ->
729 tok.where <- InFunction
730 ));
731 set_in_function_tag xs
732
733 | (BToken x)::xs -> set_in_function_tag xs
734
735 | (Braceised (body, tok1, Some tok2))::xs
736 when tok1.col =|= 0 && tok2.col =|= 0 ->
737 body +> List.iter (iter_token_brace (fun tok ->
738 tok.where <- InFunction
739 ));
740 set_in_function_tag xs
741 | Braceised (body, tok1, tok2)::xs ->
742 set_in_function_tag xs
743
744
745 let rec set_in_other xs =
746 match xs with
747 | [] -> ()
748 (* enum x { } *)
749 | BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
750 ::Braceised(body, tok1, tok2)::xs
751 | BToken ({tok = Tenum _})
752 ::Braceised(body, tok1, tok2)::xs
753 ->
754 body +> List.iter (iter_token_brace (fun tok ->
755 tok.where <- InEnum;
756 ));
757 set_in_other xs
758
759 (* struct x { } *)
760 | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
761 ::Braceised(body, tok1, tok2)::xs ->
762 body +> List.iter (iter_token_brace (fun tok ->
763 tok.where <- InStruct;
764 ));
765 set_in_other xs
766 (* = { } *)
767 | BToken ({tok = TEq _})
768 ::Braceised(body, tok1, tok2)::xs ->
769 body +> List.iter (iter_token_brace (fun tok ->
770 tok.where <- InInitializer;
771 ));
772 set_in_other xs
773
774 | BToken _::xs -> set_in_other xs
775
776 | Braceised(body, tok1, tok2)::xs ->
777 body +> List.iter set_in_other;
778 set_in_other xs
779
780
781
782
783 let set_context_tag xs =
784 begin
785 set_in_function_tag xs;
786 set_in_other xs;
787 end
788
789
790 (*****************************************************************************)
791 (* Helpers *)
792 (*****************************************************************************)
793
794 (* To expand the parameter of the macro. The env corresponds to the actual
795 * code that is binded to the parameters of the macro.
796 * TODO? recurse ? fixpoint ? the expansion may also contain macro.
797 * Or to macro expansion in a strict manner, that is process first
798 * the parameters, expands macro in params, and then process enclosing
799 * macro call.
800 *)
801 let rec (cpp_engine: (string , Parser_c.token list) assoc ->
802 Parser_c.token list -> Parser_c.token list) =
803 fun env xs ->
804 xs +> List.map (fun tok ->
805 match tok with
806 | TIdent (s,i1) when List.mem_assoc s env -> Common.assoc s env
807 | x -> [x]
808 )
809 +> List.flatten
810
811
812
813
814 (* ------------------------------------------------------------------------- *)
815 (* the pair is the status of '()' and '{}', ex: (-1,0)
816 * if too much ')' and good '{}'
817 * could do for [] too ?
818 * could do for ',' if encounter ',' at "toplevel", not inside () or {}
819 * then if have ifdef, then certainly can lead to a problem.
820 *)
821 let (count_open_close_stuff_ifdef_clause: ifdef_grouped list -> (int * int)) =
822 fun xs ->
823 let cnt_paren, cnt_brace = ref 0, ref 0 in
824 xs +> iter_token_ifdef (fun x ->
825 (match x.tok with
826 | x when TH.is_opar x -> incr cnt_paren
827 | TOBrace _ -> incr cnt_brace
828 | x when TH.is_cpar x -> decr cnt_paren
829 | TCBrace _ -> decr cnt_brace
830 | _ -> ()
831 )
832 );
833 !cnt_paren, !cnt_brace
834
835
836 (* ------------------------------------------------------------------------- *)
837 let forLOOKAHEAD = 30
838
839
840 (* look if there is a '{' just after the closing ')', and handling the
841 * possibility to have nested expressions inside nested parenthesis
842 *
843 * todo: use indentation instead of premier(statement) ?
844 *)
845 let rec is_really_foreach xs =
846 let rec is_foreach_aux = function
847 | [] -> false, []
848 | TCPar _::TOBrace _::xs -> true, xs
849 (* the following attempts to handle the cases where there is a
850 single statement in the body of the loop. undoubtedly more
851 cases are needed.
852 todo: premier(statement) - suivant(funcall)
853 *)
854 | TCPar _::TIdent _::xs -> true, xs
855 | TCPar _::Tif _::xs -> true, xs
856 | TCPar _::Twhile _::xs -> true, xs
857 | TCPar _::Tfor _::xs -> true, xs
858 | TCPar _::Tswitch _::xs -> true, xs
859 | TCPar _::Treturn _::xs -> true, xs
860
861
862 | TCPar _::xs -> false, xs
863 | TOPar _::xs ->
864 let (_, xs') = is_foreach_aux xs in
865 is_foreach_aux xs'
866 | x::xs -> is_foreach_aux xs
867 in
868 is_foreach_aux xs +> fst
869
870
871 (* ------------------------------------------------------------------------- *)
872 let set_ifdef_token_parenthize_info cnt x =
873 match x with
874 | TIfdef (tag, _)
875 | TIfdefelse (tag, _)
876 | TIfdefelif (tag, _)
877 | TEndif (tag, _)
878
879 | TIfdefBool (_, tag, _)
880 | TIfdefMisc (_, tag, _)
881 | TIfdefVersion (_, tag, _)
882 ->
883 tag := Some cnt;
884
885 | _ -> raise Impossible
886
887
888
889 let ifdef_paren_cnt = ref 0
890
891
892 let rec set_ifdef_parenthize_info xs =
893 xs +> List.iter (function
894 | NotIfdefLine xs -> ()
895 | Ifdefbool (_, xxs, info_ifdef)
896 | Ifdef (xxs, info_ifdef) ->
897
898 incr ifdef_paren_cnt;
899 let total_directives = List.length info_ifdef in
900
901 info_ifdef +> List.iter (fun x ->
902 set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives)
903 x.tok);
904 xxs +> List.iter set_ifdef_parenthize_info
905 )
906
907
908 (*****************************************************************************)
909 (* CPP handling: macros, ifdefs, macros defs *)
910 (*****************************************************************************)
911
912 (* ------------------------------------------------------------------------- *)
913 (* special skip_start skip_end handling *)
914 (* ------------------------------------------------------------------------- *)
915
916 (* note: after this normally the token list should not contain any more the
917 * TCommentSkipTagStart and End tokens.
918 *)
919 let rec commentize_skip_start_to_end xs =
920 match xs with
921 | [] -> ()
922 | x::xs ->
923 (match x with
924 | {tok = TCommentSkipTagStart info} ->
925 (try
926 let (before, x2, after) =
927 xs +> Common.split_when (function
928 | {tok = TCommentSkipTagEnd _ } -> true
929 | _ -> false
930 )
931 in
932 let topass = x::x2::before in
933 topass +> List.iter (fun tok ->
934 set_as_comment Token_c.CppPassingExplicit tok
935 );
936 commentize_skip_start_to_end after
937 with Not_found ->
938 failwith "could not find end of skip_start special comment"
939 )
940 | {tok = TCommentSkipTagEnd info} ->
941 failwith "found skip_end comment but no skip_start"
942 | _ ->
943 commentize_skip_start_to_end xs
944 )
945
946
947
948
949 (* ------------------------------------------------------------------------- *)
950 (* ifdef keeping/passing *)
951 (* ------------------------------------------------------------------------- *)
952
953 (* #if 0, #if 1, #if LINUX_VERSION handling *)
954 let rec find_ifdef_bool xs =
955 xs +> List.iter (function
956 | NotIfdefLine _ -> ()
957 | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) ->
958
959 msg_ifdef_bool_passing is_ifdef_positif;
960
961 (match xxs with
962 | [] -> raise Impossible
963 | firstclause::xxs ->
964 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
965
966 if is_ifdef_positif
967 then xxs +> List.iter
968 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
969 else begin
970 firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal);
971 (match List.rev xxs with
972 (* keep only last *)
973 | last::startxs ->
974 startxs +> List.iter
975 (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal))
976 | [] -> (* not #else *) ()
977 );
978 end
979 );
980
981 | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool
982 )
983
984
985
986 let thresholdIfdefSizeMid = 6
987
988 (* infer ifdef involving not-closed expressions/statements *)
989 let rec find_ifdef_mid xs =
990 xs +> List.iter (function
991 | NotIfdefLine _ -> ()
992 | Ifdef (xxs, info_ifdef_stmt) ->
993 (match xxs with
994 | [] -> raise Impossible
995 | [first] -> ()
996 | first::second::rest ->
997 (* don't analyse big ifdef *)
998 if xxs +> List.for_all
999 (fun xs -> List.length xs <= thresholdIfdefSizeMid) &&
1000 (* don't want nested ifdef *)
1001 xxs +> List.for_all (fun xs ->
1002 xs +> List.for_all
1003 (function NotIfdefLine _ -> true | _ -> false)
1004 )
1005
1006 then
1007 let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in
1008 let cnt1, cnt2 = List.hd counts in
1009 if cnt1 <> 0 || cnt2 <> 0 &&
1010 counts +> List.for_all (fun x -> x =*= (cnt1, cnt2))
1011 (*
1012 if counts +> List.exists (fun (cnt1, cnt2) ->
1013 cnt1 <> 0 || cnt2 <> 0
1014 )
1015 *)
1016 then begin
1017 msg_ifdef_mid_something();
1018
1019 (* keep only first, treat the rest as comment *)
1020 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
1021 (second::rest) +> List.iter
1022 (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError));
1023 end
1024
1025 );
1026 List.iter find_ifdef_mid xxs
1027
1028 (* no need complex analysis for ifdefbool *)
1029 | Ifdefbool (_, xxs, info_ifdef_stmt) ->
1030 List.iter find_ifdef_mid xxs
1031
1032
1033 )
1034
1035
1036 let thresholdFunheaderLimit = 4
1037
1038 (* ifdef defining alternate function header, type *)
1039 let rec find_ifdef_funheaders = function
1040 | [] -> ()
1041 | NotIfdefLine _::xs -> find_ifdef_funheaders xs
1042
1043 (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *)
1044 | Ifdef
1045 ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1;
1046 (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2
1047 ], info_ifdef_stmt
1048 )
1049 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3)
1050 ::xs
1051 when List.length ifdefblock1 <= thresholdFunheaderLimit &&
1052 List.length ifdefblock2 <= thresholdFunheaderLimit
1053 ->
1054 find_ifdef_funheaders xs;
1055
1056 msg_ifdef_funheaders ();
1057 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
1058 let all_toks = [xline2] @ line2 in
1059 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ;
1060 ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError);
1061
1062 (* ifdef with nested ifdef *)
1063 | Ifdef
1064 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
1065 [Ifdef
1066 ([[NotIfdefLine (({col = 0} as xline2)::line2)];
1067 [NotIfdefLine (({col = 0} as xline3)::line3)];
1068 ], info_ifdef_stmt2
1069 )
1070 ]
1071 ], info_ifdef_stmt
1072 )
1073 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
1074 ::xs
1075 ->
1076 find_ifdef_funheaders xs;
1077
1078 msg_ifdef_funheaders ();
1079 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
1080 info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective);
1081 let all_toks = [xline2;xline3] @ line2 @ line3 in
1082 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError);
1083
1084 (* ifdef with elseif *)
1085 | Ifdef
1086 ([[NotIfdefLine (({col = 0} as _xline1)::line1)];
1087 [NotIfdefLine (({col = 0} as xline2)::line2)];
1088 [NotIfdefLine (({col = 0} as xline3)::line3)];
1089 ], info_ifdef_stmt
1090 )
1091 ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4)
1092 ::xs
1093 ->
1094 find_ifdef_funheaders xs;
1095
1096 msg_ifdef_funheaders ();
1097 info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective);
1098 let all_toks = [xline2;xline3] @ line2 @ line3 in
1099 all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError)
1100
1101 (* recurse *)
1102 | Ifdef (xxs,info_ifdef_stmt)::xs
1103 | Ifdefbool (_, xxs,info_ifdef_stmt)::xs ->
1104 List.iter find_ifdef_funheaders xxs;
1105 find_ifdef_funheaders xs
1106
1107
1108
1109 (* ?? *)
1110 let rec adjust_inifdef_include xs =
1111 xs +> List.iter (function
1112 | NotIfdefLine _ -> ()
1113 | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) ->
1114 xxs +> List.iter (iter_token_ifdef (fun tokext ->
1115 match tokext.tok with
1116 | Parser_c.TInclude (s1, s2, inifdef_ref, ii) ->
1117 inifdef_ref := true;
1118 | _ -> ()
1119 ));
1120 )
1121
1122
1123
1124 (* ------------------------------------------------------------------------- *)
1125 (* cpp-builtin part2, macro, using standard.h or other defs *)
1126 (* ------------------------------------------------------------------------- *)
1127
1128 (* Thanks to this function many stuff are not anymore hardcoded in ocaml code
1129 * (but they are now hardcoded in standard.h ...)
1130 *
1131 *
1132 *
1133 * No need to take care to not substitute the macro name itself
1134 * that occurs in the macro definition because the macro name is
1135 * after fix_token_define a TDefineIdent, no more a TIdent.
1136 *)
1137
1138 let rec apply_macro_defs xs =
1139 match xs with
1140 | [] -> ()
1141
1142 (* old: "but could do more, could reuse same original token
1143 * so that have in the Ast a Dbg, not a MACROSTATEMENT"
1144 *
1145 * | PToken ({tok = TIdent (s,i1)} as id)::xs
1146 * when s = "MACROSTATEMENT" ->
1147 *
1148 * msg_macro_statement_hint s;
1149 * id.tok <- TMacroStmt(TH.info_of_tok id.tok);
1150 * find_macro_paren xs
1151 *
1152 * let msg_macro_statement_hint s =
1153 * incr Stat.nMacroHint;
1154 * ()
1155 *
1156 *)
1157
1158 (* recognized macro of standard.h (or other) *)
1159 | PToken ({tok = TIdent (s,i1)} as id)::Parenthised (xxs,info_parens)::xs
1160 when Hashtbl.mem !_defs s ->
1161
1162 msg_apply_known_macro s;
1163 let (s, params, body) = Hashtbl.find !_defs s in
1164
1165 (match params with
1166 | NoParam ->
1167 pr2 ("WEIRD: macro without param used before parenthize: " ^ s);
1168 (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *)
1169
1170 (match body with
1171 | DefineBody bodymacro ->
1172 set_as_comment (Token_c.CppMacro) id;
1173 id.new_tokens_before <- bodymacro;
1174 | DefineHint hint ->
1175 msg_apply_known_macro_hint s;
1176 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1177 )
1178 | Params params ->
1179 (match body with
1180 | DefineBody bodymacro ->
1181
1182 (* bugfix: better to put this that before the match body,
1183 * cos our macrostatement hint can have variable number of
1184 * arguments and so it's ok if it does not match exactly
1185 * the number of arguments. *)
1186 if List.length params != List.length xxs
1187 then begin
1188 pr2_once ("WEIRD: macro with wrong number of arguments: " ^ s);
1189 (* old: id.new_tokens_before <- bodymacro; *)
1190 ()
1191 end
1192 else
1193
1194 let xxs' = xxs +> List.map (fun x ->
1195 (tokens_of_paren_ordered x) +> List.map (fun x ->
1196 TH.visitor_info_of_tok Ast_c.make_expanded x.tok
1197 )
1198 ) in
1199 id.new_tokens_before <-
1200 cpp_engine (Common.zip params xxs') bodymacro;
1201
1202 (* important to do that after have apply the macro, otherwise
1203 * will pass as argument to the macro some tokens that
1204 * are all TCommentCpp
1205 *)
1206 [Parenthised (xxs, info_parens)] +>
1207 iter_token_paren (set_as_comment Token_c.CppMacro);
1208 set_as_comment Token_c.CppMacro id;
1209
1210 | DefineHint (HintMacroStatement as hint) ->
1211 (* important to do that after have apply the macro, otherwise
1212 * will pass as argument to the macro some tokens that
1213 * are all TCommentCpp
1214 *
1215 * note: such macrostatement can have a variable number of
1216 * arguments but here we don't care, we just pass all the
1217 * parameters.
1218 *)
1219
1220 (match xs with
1221 | PToken ({tok = TPtVirg _} as id2)::_ ->
1222 pr2_once
1223 ("macro stmt with trailing ';', passing also ';' for: "^
1224 s);
1225 (* sometimes still want pass its params ... as in
1226 * DEBUGPOLL(static unsigned int prev_mask = 0);
1227 *)
1228
1229 msg_apply_known_macro_hint s;
1230 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1231 [Parenthised (xxs, info_parens)] +>
1232 iter_token_paren (set_as_comment Token_c.CppMacro);
1233 set_as_comment Token_c.CppMacro id2;
1234
1235 | _ ->
1236 msg_apply_known_macro_hint s;
1237 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1238 [Parenthised (xxs, info_parens)] +>
1239 iter_token_paren (set_as_comment Token_c.CppMacro);
1240 )
1241
1242
1243 | DefineHint hint ->
1244 msg_apply_known_macro_hint s;
1245 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1246 )
1247 );
1248 apply_macro_defs xs
1249
1250 | PToken ({tok = TIdent (s,i1)} as id)::xs
1251 when Hashtbl.mem !_defs s ->
1252
1253 msg_apply_known_macro s;
1254 let (_s, params, body) = Hashtbl.find !_defs s in
1255
1256 (match params with
1257 | Params params ->
1258 pr2 ("WEIRD: macro with params but no parens found: " ^ s);
1259 (* dont apply the macro, perhaps a redefinition *)
1260 ()
1261 | NoParam ->
1262 (match body with
1263 | DefineBody [newtok] ->
1264 (* special case when 1-1 substitution, we reuse the token *)
1265 id.tok <- (newtok +> TH.visitor_info_of_tok (fun _ ->
1266 TH.info_of_tok id.tok))
1267 | DefineBody bodymacro ->
1268 set_as_comment Token_c.CppMacro id;
1269 id.new_tokens_before <- bodymacro;
1270 | DefineHint hint ->
1271 msg_apply_known_macro_hint s;
1272 id.tok <- token_from_parsinghack_hint (s,i1) hint;
1273 )
1274 );
1275 apply_macro_defs xs
1276
1277
1278
1279
1280 (* recurse *)
1281 | (PToken x)::xs -> apply_macro_defs xs
1282 | (Parenthised (xxs, info_parens))::xs ->
1283 xxs +> List.iter apply_macro_defs;
1284 apply_macro_defs xs
1285
1286
1287
1288
1289
1290 (* ------------------------------------------------------------------------- *)
1291 (* stringification *)
1292 (* ------------------------------------------------------------------------- *)
1293
1294 let rec find_string_macro_paren xs =
1295 match xs with
1296 | [] -> ()
1297 | Parenthised(xxs, info_parens)::xs ->
1298 xxs +> List.iter (fun xs ->
1299 if xs +> List.exists
1300 (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) &&
1301 xs +> List.for_all
1302 (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) ->
1303 true | _ -> false)
1304 then
1305 xs +> List.iter (fun tok ->
1306 match tok with
1307 | PToken({tok = TIdent (s,_)} as id) ->
1308 msg_stringification s;
1309 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1310 | _ -> ()
1311 )
1312 else
1313 find_string_macro_paren xs
1314 );
1315 find_string_macro_paren xs
1316 | PToken(tok)::xs ->
1317 find_string_macro_paren xs
1318
1319
1320 (* ------------------------------------------------------------------------- *)
1321 (* macro2 *)
1322 (* ------------------------------------------------------------------------- *)
1323
1324 (* don't forget to recurse in each case *)
1325 let rec find_macro_paren xs =
1326 match xs with
1327 | [] -> ()
1328
1329 (* attribute *)
1330 | PToken ({tok = Tattribute _} as id)
1331 ::Parenthised (xxs,info_parens)
1332 ::xs
1333 ->
1334 pr2_cpp ("MACRO: __attribute detected ");
1335 [Parenthised (xxs, info_parens)] +>
1336 iter_token_paren (set_as_comment Token_c.CppAttr);
1337 set_as_comment Token_c.CppAttr id;
1338 find_macro_paren xs
1339
1340 (*
1341 (* attribute cpp, __xxx id() *)
1342 | PToken ({tok = TIdent (s,i1)} as id)
1343 ::PToken ({tok = TIdent (s2, i2)})
1344 ::Parenthised(xxs,info_parens)
1345 ::xs when s ==~ regexp_annot
1346 ->
1347 msg_attribute s;
1348 id.tok <- TMacroAttr (s, i1);
1349 find_macro_paren (Parenthised(xxs,info_parens)::xs)
1350
1351 (* attribute cpp, id __xxx = *)
1352 | PToken ({tok = TIdent (s,i1)})
1353 ::PToken ({tok = TIdent (s2, i2)} as id)
1354 ::xs when s2 ==~ regexp_annot
1355 ->
1356 msg_attribute s2;
1357 id.tok <- TMacroAttr (s2, i2);
1358 find_macro_paren (xs)
1359 *)
1360
1361 (* storage attribute *)
1362 | PToken ({tok = (Tstatic _ | Textern _)} as tok1)
1363 ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs
1364 ->
1365 pr2_cpp ("storage attribute: " ^ s);
1366 attr.tok <- TMacroAttrStorage (s,i1);
1367 (* recurse, may have other storage attributes *)
1368 find_macro_paren (PToken (tok1)::xs)
1369
1370
1371 (* stringification
1372 *
1373 * the order of the matching clause is important
1374 *
1375 *)
1376
1377 (* string macro with params, before case *)
1378 | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1379 ::Parenthised (xxs, info_parens)
1380 ::xs ->
1381
1382 msg_stringification_params s;
1383 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1384 [Parenthised (xxs, info_parens)] +>
1385 iter_token_paren (set_as_comment Token_c.CppMacro);
1386 find_macro_paren xs
1387
1388 (* after case *)
1389 | PToken ({tok = TIdent (s,_)} as id)
1390 ::Parenthised (xxs, info_parens)
1391 ::PToken ({tok = (TString _ | TMacroString _)})
1392 ::xs ->
1393
1394 msg_stringification_params s;
1395 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1396 [Parenthised (xxs, info_parens)] +>
1397 iter_token_paren (set_as_comment Token_c.CppMacro);
1398 find_macro_paren xs
1399
1400
1401 (* for the case where the string is not inside a funcall, but
1402 * for instance in an initializer.
1403 *)
1404
1405 (* string macro variable, before case *)
1406 | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id)
1407 ::xs ->
1408
1409 msg_stringification s;
1410 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1411 find_macro_paren xs
1412
1413 (* after case *)
1414 | PToken ({tok = TIdent (s,_)} as id)
1415 ::PToken ({tok = (TString _ | TMacroString _)})
1416 ::xs ->
1417
1418 msg_stringification s;
1419 id.tok <- TMacroString (s, TH.info_of_tok id.tok);
1420 find_macro_paren xs
1421
1422
1423
1424
1425
1426 (* recurse *)
1427 | (PToken x)::xs -> find_macro_paren xs
1428 | (Parenthised (xxs, info_parens))::xs ->
1429 xxs +> List.iter find_macro_paren;
1430 find_macro_paren xs
1431
1432
1433
1434
1435
1436 (* don't forget to recurse in each case *)
1437 let rec find_macro_lineparen xs =
1438 match xs with
1439 | [] -> ()
1440
1441 (* linuxext: ex: static [const] DEVICE_ATTR(); *)
1442 | (Line
1443 (
1444 [PToken ({tok = Tstatic _});
1445 PToken ({tok = TIdent (s,_)} as macro);
1446 Parenthised (xxs,info_parens);
1447 PToken ({tok = TPtVirg _});
1448 ]
1449 ))
1450 ::xs
1451 when (s ==~ regexp_macro) ->
1452
1453 msg_declare_macro s;
1454 let info = TH.info_of_tok macro.tok in
1455 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1456
1457 find_macro_lineparen (xs)
1458
1459 (* the static const case *)
1460 | (Line
1461 (
1462 [PToken ({tok = Tstatic _});
1463 PToken ({tok = Tconst _} as const);
1464 PToken ({tok = TIdent (s,_)} as macro);
1465 Parenthised (xxs,info_parens);
1466 PToken ({tok = TPtVirg _});
1467 ]
1468 (*as line1*)
1469
1470 ))
1471 ::xs
1472 when (s ==~ regexp_macro) ->
1473
1474 msg_declare_macro s;
1475 let info = TH.info_of_tok macro.tok in
1476 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1477
1478 (* need retag this const, otherwise ambiguity in grammar
1479 21: shift/reduce conflict (shift 121, reduce 137) on Tconst
1480 decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ...
1481 decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ...
1482 storage_class_spec : Tstatic . (137)
1483 *)
1484 const.tok <- TMacroDeclConst (TH.info_of_tok const.tok);
1485
1486 find_macro_lineparen (xs)
1487
1488
1489 (* same but without trailing ';'
1490 *
1491 * I do not put the final ';' because it can be on a multiline and
1492 * because of the way mk_line is coded, we will not have access to
1493 * this ';' on the next line, even if next to the ')' *)
1494 | (Line
1495 ([PToken ({tok = Tstatic _});
1496 PToken ({tok = TIdent (s,_)} as macro);
1497 Parenthised (xxs,info_parens);
1498 ]
1499 ))
1500 ::xs
1501 when s ==~ regexp_macro ->
1502
1503 msg_declare_macro s;
1504 let info = TH.info_of_tok macro.tok in
1505 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1506
1507 find_macro_lineparen (xs)
1508
1509
1510
1511
1512 (* on multiple lines *)
1513 | (Line
1514 (
1515 (PToken ({tok = Tstatic _})::[]
1516 )))
1517 ::(Line
1518 (
1519 [PToken ({tok = TIdent (s,_)} as macro);
1520 Parenthised (xxs,info_parens);
1521 PToken ({tok = TPtVirg _});
1522 ]
1523 )
1524 )
1525 ::xs
1526 when (s ==~ regexp_macro) ->
1527
1528 msg_declare_macro s;
1529 let info = TH.info_of_tok macro.tok in
1530 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1531
1532 find_macro_lineparen (xs)
1533
1534
1535 (* linuxext: ex: DECLARE_BITMAP();
1536 *
1537 * Here I use regexp_declare and not regexp_macro because
1538 * Sometimes it can be a FunCallMacro such as DEBUG(foo());
1539 * Here we don't have the preceding 'static' so only way to
1540 * not have positive is to restrict to .*DECLARE.* macros.
1541 *
1542 * but there is a grammar rule for that, so don't need this case anymore
1543 * unless the parameter of the DECLARE_xxx are weird and can not be mapped
1544 * on a argument_list
1545 *)
1546
1547 | (Line
1548 ([PToken ({tok = TIdent (s,_)} as macro);
1549 Parenthised (xxs,info_parens);
1550 PToken ({tok = TPtVirg _});
1551 ]
1552 ))
1553 ::xs
1554 when (s ==~ regexp_declare) ->
1555
1556 msg_declare_macro s;
1557 let info = TH.info_of_tok macro.tok in
1558 macro.tok <- TMacroDecl (Ast_c.str_of_info info, info);
1559
1560 find_macro_lineparen (xs)
1561
1562
1563 (* toplevel macros.
1564 * module_init(xxx)
1565 *
1566 * Could also transform the TIdent in a TMacroTop but can have false
1567 * positive, so easier to just change the TCPar and so just solve
1568 * the end-of-stream pb of ocamlyacc
1569 *)
1570 | (Line
1571 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro);
1572 Parenthised (xxs,info_parens);
1573 ] as _line1
1574 ))
1575 ::xs when col1 =|= 0
1576 ->
1577 let condition =
1578 (* to reduce number of false positive *)
1579 (match xs with
1580 | (Line (PToken ({col = col2 } as other)::restline2))::_ ->
1581 TH.is_eof other.tok || (col2 =|= 0 &&
1582 (match other.tok with
1583 | TOBrace _ -> false (* otherwise would match funcdecl *)
1584 | TCBrace _ when ctx <> InFunction -> false
1585 | TPtVirg _
1586 | TDotDot _
1587 -> false
1588 | tok when TH.is_binary_operator tok -> false
1589
1590 | _ -> true
1591 )
1592 )
1593 | _ -> false
1594 )
1595 in
1596 if condition
1597 then begin
1598
1599 msg_macro_toplevel_noptvirg s;
1600 (* just to avoid the end-of-stream pb of ocamlyacc *)
1601 let tcpar = Common.last info_parens in
1602 tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok);
1603
1604 (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*)
1605
1606 end;
1607
1608 find_macro_lineparen (xs)
1609
1610
1611
1612 (* macro with parameters
1613 * ex: DEBUG()
1614 * return x;
1615 *)
1616 | (Line
1617 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1618 Parenthised (xxs,info_parens);
1619 ] as _line1
1620 ))
1621 ::(Line
1622 (PToken ({col = col2 } as other)::restline2
1623 ) as line2)
1624 ::xs
1625 (* when s ==~ regexp_macro *)
1626 ->
1627 let condition =
1628 (col1 =|= col2 &&
1629 (match other.tok with
1630 | TOBrace _ -> false (* otherwise would match funcdecl *)
1631 | TCBrace _ when ctx <> InFunction -> false
1632 | TPtVirg _
1633 | TDotDot _
1634 -> false
1635 | tok when TH.is_binary_operator tok -> false
1636
1637 | _ -> true
1638 )
1639 )
1640 ||
1641 (col2 <= col1 &&
1642 (match other.tok, restline2 with
1643 | TCBrace _, _ when ctx =*= InFunction -> true
1644 | Treturn _, _ -> true
1645 | Tif _, _ -> true
1646 | Telse _, _ -> true
1647
1648 (* case of label, usually put in first line *)
1649 | TIdent _, (PToken ({tok = TDotDot _}))::_ ->
1650 true
1651
1652
1653 | _ -> false
1654 )
1655 )
1656
1657 in
1658
1659 if condition
1660 then
1661 if col1 =|= 0 then ()
1662 else begin
1663 msg_macro_noptvirg s;
1664 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1665 [Parenthised (xxs, info_parens)] +>
1666 iter_token_paren (set_as_comment Token_c.CppMacro);
1667 end;
1668
1669 find_macro_lineparen (line2::xs)
1670
1671 (* linuxext:? single macro
1672 * ex: LOCK
1673 * foo();
1674 * UNLOCK
1675 *
1676 * todo: factorize code with previous rule ?
1677 *)
1678 | (Line
1679 ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro);
1680 ] as _line1
1681 ))
1682 ::(Line
1683 (PToken ({col = col2 } as other)::restline2
1684 ) as line2)
1685 ::xs ->
1686 (* when s ==~ regexp_macro *)
1687
1688 let condition =
1689 (col1 =|= col2 &&
1690 col1 <> 0 && (* otherwise can match typedef of fundecl*)
1691 (match other.tok with
1692 | TPtVirg _ -> false
1693 | TOr _ -> false
1694 | TCBrace _ when ctx <> InFunction -> false
1695 | tok when TH.is_binary_operator tok -> false
1696
1697 | _ -> true
1698 )) ||
1699 (col2 <= col1 &&
1700 (match other.tok with
1701 | TCBrace _ when ctx =*= InFunction -> true
1702 | Treturn _ -> true
1703 | Tif _ -> true
1704 | Telse _ -> true
1705 | _ -> false
1706 ))
1707 in
1708
1709 if condition
1710 then begin
1711 msg_macro_noptvirg_single s;
1712 macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok);
1713 end;
1714 find_macro_lineparen (line2::xs)
1715
1716 | x::xs ->
1717 find_macro_lineparen xs
1718
1719
1720
1721 (* ------------------------------------------------------------------------- *)
1722 (* define tobrace init *)
1723 (* ------------------------------------------------------------------------- *)
1724
1725 let rec find_define_init_brace_paren xs =
1726 let rec aux xs =
1727 match xs with
1728 | [] -> ()
1729
1730 (* mainly for firefox *)
1731 | (PToken {tok = TDefine _})
1732 ::(PToken {tok = TIdentDefine (s,_)})
1733 ::(PToken ({tok = TOBrace i1} as tokbrace))
1734 ::(PToken tok2)
1735 ::(PToken tok3)
1736 ::xs ->
1737 let is_init =
1738 match tok2.tok, tok3.tok with
1739 | TInt _, TComma _ -> true
1740 | TString _, TComma _ -> true
1741 | TIdent _, TComma _ -> true
1742 | _ -> false
1743
1744 in
1745 if is_init
1746 then begin
1747 pr2_cpp("found define initializer: " ^s);
1748 tokbrace.tok <- TOBraceDefineInit i1;
1749 end;
1750
1751 aux xs
1752
1753 (* mainly for linux, especially in sound/ *)
1754 | (PToken {tok = TDefine _})
1755 ::(PToken {tok = TIdentDefine (s,_)})
1756 ::(Parenthised(xxx, info_parens))
1757 ::(PToken ({tok = TOBrace i1} as tokbrace))
1758 ::(PToken tok2)
1759 ::(PToken tok3)
1760 ::xs ->
1761 let is_init =
1762 match tok2.tok, tok3.tok with
1763 | TInt _, TComma _ -> true
1764 | TDot _, TIdent _ -> true
1765 | TIdent _, TComma _ -> true
1766 | _ -> false
1767
1768 in
1769 if is_init
1770 then begin
1771 pr2_cpp("found define initializer with param: " ^ s);
1772 tokbrace.tok <- TOBraceDefineInit i1;
1773 end;
1774
1775 aux xs
1776
1777
1778
1779 (* recurse *)
1780 | (PToken x)::xs -> aux xs
1781 | (Parenthised (xxs, info_parens))::xs ->
1782 (* not need for tobrace init:
1783 * xxs +> List.iter aux;
1784 *)
1785 aux xs
1786 in
1787 aux xs
1788
1789
1790 (* ------------------------------------------------------------------------- *)
1791 (* action *)
1792 (* ------------------------------------------------------------------------- *)
1793
1794 let rec find_actions = function
1795 | [] -> ()
1796
1797 | PToken ({tok = TIdent (s,ii)})
1798 ::Parenthised (xxs,info_parens)
1799 ::xs ->
1800 find_actions xs;
1801 xxs +> List.iter find_actions;
1802 let modified = find_actions_params xxs in
1803 if modified
1804 then msg_macro_higher_order s
1805
1806 | x::xs ->
1807 find_actions xs
1808
1809 and find_actions_params xxs =
1810 xxs +> List.fold_left (fun acc xs ->
1811 let toks = tokens_of_paren xs in
1812 if toks +> List.exists (fun x -> TH.is_statement x.tok)
1813 then begin
1814 xs +> iter_token_paren (fun x ->
1815 if TH.is_eof x.tok
1816 then
1817 (* certainly because paren detection had a pb because of
1818 * some ifdef-exp
1819 *)
1820 pr2 "PB: weird, I try to tag an EOF token as action"
1821 else
1822 x.tok <- TAction (TH.info_of_tok x.tok);
1823 );
1824 true (* modified *)
1825 end
1826 else acc
1827 ) false
1828
1829
1830
1831 (* ------------------------------------------------------------------------- *)
1832 (* main fix cpp function *)
1833 (* ------------------------------------------------------------------------- *)
1834
1835 let rebuild_tokens_extented toks_ext =
1836 let _tokens = ref [] in
1837 toks_ext +> List.iter (fun tok ->
1838 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
1839 push2 tok.tok _tokens
1840 );
1841 let tokens = List.rev !_tokens in
1842 (tokens +> acc_map mk_token_extended)
1843
1844 let filter_cpp_stuff xs =
1845 let rec aux xs =
1846 match xs with
1847 | [] -> []
1848 | x::xs ->
1849 (match x.tok with
1850 | tok when TH.is_comment tok -> aux xs
1851 (* don't want drop the define, or if drop, have to drop
1852 * also its body otherwise the line heuristics may be lost
1853 * by not finding the TDefine in column 0 but by finding
1854 * a TDefineIdent in a column > 0
1855 *)
1856 | Parser_c.TDefine _ ->
1857 x::aux xs
1858 | tok when TH.is_cpp_instruction tok -> aux xs
1859 | _ -> x::aux xs
1860 )
1861 in
1862 aux xs
1863
1864 let insert_virtual_positions l =
1865 let strlen x = String.length (Ast_c.str_of_info x) in
1866 let rec loop prev offset = function
1867 [] -> []
1868 | x::xs ->
1869 let ii = TH.info_of_tok x in
1870 let inject pi =
1871 TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in
1872 match Ast_c.pinfo_of_info ii with
1873 Ast_c.OriginTok pi ->
1874 let prev = Ast_c.parse_info_of_info ii in
1875 x::(loop prev (strlen ii) xs)
1876 | Ast_c.ExpandedTok (pi,_) ->
1877 inject (Ast_c.ExpandedTok (pi,(prev,offset))) ::
1878 (loop prev (offset + (strlen ii)) xs)
1879 | Ast_c.FakeTok (s,_) ->
1880 inject (Ast_c.FakeTok (s,(prev,offset))) ::
1881 (loop prev (offset + (strlen ii)) xs)
1882 | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in
1883 let rec skip_fake = function
1884 [] -> []
1885 | x::xs ->
1886 let ii = TH.info_of_tok x in
1887 match Ast_c.pinfo_of_info ii with
1888 Ast_c.OriginTok pi ->
1889 let prev = Ast_c.parse_info_of_info ii in
1890 x::(loop prev (strlen ii) xs)
1891 | _ -> x::skip_fake xs in
1892 skip_fake l
1893
1894 (* ------------------------------------------------------------------------- *)
1895 let fix_tokens_cpp2 tokens =
1896 let tokens2 = ref (tokens +> acc_map mk_token_extended) in
1897
1898 begin
1899 (* the order is important, if you put the action heuristic first,
1900 * then because of ifdef, can have not closed paren
1901 * and so may believe that higher order macro
1902 * and it will eat too much tokens. So important to do
1903 * first the ifdef.
1904 *
1905 * I recompute multiple times cleaner cos the mutable
1906 * can have be changed and so may have more comments
1907 * in the token original list.
1908 *
1909 *)
1910
1911 commentize_skip_start_to_end !tokens2;
1912
1913 (* ifdef *)
1914 let cleaner = !tokens2 +> List.filter (fun x ->
1915 (* is_comment will also filter the TCommentCpp created in
1916 * commentize_skip_start_to_end *)
1917 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1918 ) in
1919 let ifdef_grouped = mk_ifdef cleaner in
1920 set_ifdef_parenthize_info ifdef_grouped;
1921
1922 find_ifdef_funheaders ifdef_grouped;
1923 find_ifdef_bool ifdef_grouped;
1924 find_ifdef_mid ifdef_grouped;
1925 adjust_inifdef_include ifdef_grouped;
1926
1927
1928 (* macro 1 *)
1929 let cleaner = !tokens2 +> filter_cpp_stuff in
1930
1931 let paren_grouped = mk_parenthised cleaner in
1932 apply_macro_defs paren_grouped;
1933 (* because the before field is used by apply_macro_defs *)
1934 tokens2 := rebuild_tokens_extented !tokens2;
1935
1936 (* tagging contextual info (InFunc, InStruct, etc). Better to do
1937 * that after the "ifdef-simplification" phase.
1938 *)
1939 let cleaner = !tokens2 +> List.filter (fun x ->
1940 not (TH.is_comment x.tok) (* could filter also #define/#include *)
1941 ) in
1942
1943 let brace_grouped = mk_braceised cleaner in
1944 set_context_tag brace_grouped;
1945
1946
1947
1948 (* macro *)
1949 let cleaner = !tokens2 +> filter_cpp_stuff in
1950
1951 let paren_grouped = mk_parenthised cleaner in
1952 let line_paren_grouped = mk_line_parenthised paren_grouped in
1953 find_define_init_brace_paren paren_grouped;
1954 find_string_macro_paren paren_grouped;
1955 find_macro_lineparen line_paren_grouped;
1956 find_macro_paren paren_grouped;
1957
1958
1959 (* actions *)
1960 let cleaner = !tokens2 +> filter_cpp_stuff in
1961 let paren_grouped = mk_parenthised cleaner in
1962 find_actions paren_grouped;
1963
1964
1965 insert_virtual_positions (!tokens2 +> acc_map (fun x -> x.tok))
1966 end
1967
1968 let time_hack1 a =
1969 Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 a)
1970
1971 let fix_tokens_cpp a =
1972 Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 a)
1973
1974
1975
1976
1977 (*****************************************************************************)
1978 (* The #define tricks *)
1979 (*****************************************************************************)
1980
1981 (* ugly hack, a better solution perhaps would be to erase TDefEOL
1982 * from the Ast and list of tokens in parse_c.
1983 *
1984 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
1985 *
1986 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
1987 * is where is the \n TCommentSpace. Normally there is always a last token
1988 * to synchronize on, either EOF or the token of the next toplevel.
1989 * In the case of the #define we got in list of token
1990 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
1991 * not synchronize on it and so we will not print the "\n".
1992 * A solution would be to put the TDefEOL before the "\n".
1993 *
1994 * todo?: could put a ExpandedTok for that ?
1995 *)
1996 let mark_end_define ii =
1997 let ii' =
1998 { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with
1999 Common.str = "";
2000 Common.charpos = Ast_c.pos_of_info ii + 1
2001 };
2002 cocci_tag = ref Ast_c.emptyAnnot;
2003 comments_tag = ref Ast_c.emptyComments;
2004 }
2005 in
2006 TDefEOL (ii')
2007
2008 (* put the TDefEOL at the good place *)
2009 let rec define_line_1 acc xs =
2010 match xs with
2011 | [] -> List.rev acc
2012 | TDefine ii::xs ->
2013 let line = Ast_c.line_of_info ii in
2014 let acc = (TDefine ii) :: acc in
2015 define_line_2 acc line ii xs
2016 | TCppEscapedNewline ii::xs ->
2017 pr2 "WEIRD: a \\ outside a #define";
2018 let acc = (TCommentSpace ii) :: acc in
2019 define_line_1 acc xs
2020 | x::xs -> define_line_1 (x::acc) xs
2021
2022 and define_line_2 acc line lastinfo xs =
2023 match xs with
2024 | [] ->
2025 (* should not happened, should meet EOF before *)
2026 pr2 "PB: WEIRD";
2027 List.rev (mark_end_define lastinfo::acc)
2028 | x::xs ->
2029 let line' = TH.line_of_tok x in
2030 let info = TH.info_of_tok x in
2031
2032 (match x with
2033 | EOF ii ->
2034 let acc = (mark_end_define lastinfo) :: acc in
2035 let acc = (EOF ii) :: acc in
2036 define_line_1 acc xs
2037 | TCppEscapedNewline ii ->
2038 if (line' <> line) then pr2 "PB: WEIRD: not same line number";
2039 let acc = (TCommentSpace ii) :: acc in
2040 define_line_2 acc (line+1) info xs
2041 | x ->
2042 if line' =|= line
2043 then define_line_2 (x::acc) line info xs
2044 else define_line_1 (mark_end_define lastinfo::acc) (x::xs)
2045 )
2046
2047 let rec define_ident acc xs =
2048 match xs with
2049 | [] -> List.rev acc
2050 | TDefine ii::xs ->
2051 let acc = TDefine ii :: acc in
2052 (match xs with
2053 | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs ->
2054 (* Change also the kind of TIdent to avoid bad interaction
2055 * with other parsing_hack tricks. For instant if keep TIdent then
2056 * the stringication algo can believe the TIdent is a string-macro.
2057 * So simpler to change the kind of the ident too.
2058 *)
2059 (* if TOParDefine sticked to the ident, then
2060 * it's a macro-function. Change token to avoid ambiguity
2061 * between #define foo(x) and #define foo (x)
2062 *)
2063 let acc = (TCommentSpace i1) :: acc in
2064 let acc = (TIdentDefine (s,i2)) :: acc in
2065 let acc = (TOParDefine i3) :: acc in
2066 define_ident acc xs
2067 | TCommentSpace i1::TIdent (s,i2)::xs ->
2068 let acc = (TCommentSpace i1) :: acc in
2069 let acc = (TIdentDefine (s,i2)) :: acc in
2070 define_ident acc xs
2071 | _ ->
2072 pr2 "WEIRD: weird #define body";
2073 define_ident acc xs
2074 )
2075 | x::xs ->
2076 let acc = x :: acc in
2077 define_ident acc xs
2078
2079
2080
2081 let fix_tokens_define2 xs =
2082 define_ident [] (define_line_1 [] xs)
2083
2084 let fix_tokens_define a =
2085 Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a)
2086
2087
2088 (*****************************************************************************)
2089 (* for the cpp-builtin, standard.h, part 0 *)
2090 (*****************************************************************************)
2091
2092 let macro_body_to_maybe_hint body =
2093 match body with
2094 | [] -> DefineBody body
2095 | [TIdent (s,i1)] ->
2096 (match parsinghack_hint_of_string s with
2097 | Some hint -> DefineHint hint
2098 | None -> DefineBody body
2099 )
2100 | xs -> DefineBody body
2101
2102
2103 let rec define_parse xs =
2104 match xs with
2105 | [] -> []
2106 | TDefine i1::TIdentDefine (s,i2)::TOParDefine i3::xs ->
2107 let (tokparams, _, xs) =
2108 xs +> Common.split_when (function TCPar _ -> true | _ -> false) in
2109 let (body, _, xs) =
2110 xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in
2111 let params =
2112 tokparams +> Common.map_filter (function
2113 | TComma _ -> None
2114 | TIdent (s, _) -> Some s
2115 | x -> error_cant_have x
2116 ) in
2117 let body = body +> List.map
2118 (TH.visitor_info_of_tok Ast_c.make_expanded) in
2119 let def = (s, (s, Params params, macro_body_to_maybe_hint body)) in
2120 def::define_parse xs
2121
2122 | TDefine i1::TIdentDefine (s,i2)::xs ->
2123 let (body, _, xs) =
2124 xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in
2125 let body = body +> List.map
2126 (TH.visitor_info_of_tok Ast_c.make_expanded) in
2127 let def = (s, (s, NoParam, macro_body_to_maybe_hint body)) in
2128 def::define_parse xs
2129
2130 | TDefine i1::_ ->
2131 pr2_gen i1;
2132 raise Impossible
2133 | x::xs -> define_parse xs
2134
2135
2136 let extract_cpp_define xs =
2137 let cleaner = xs +> List.filter (fun x ->
2138 not (TH.is_comment x)
2139 ) in
2140 define_parse cleaner
2141
2142
2143
2144
2145 (*****************************************************************************)
2146 (* Lexing with lookahead *)
2147 (*****************************************************************************)
2148
2149 (* Why using yet another parsing_hack technique ? The fix_xxx where do
2150 * some pre-processing on the full list of tokens is not enough ?
2151 * No cos sometimes we need more contextual info, and even if
2152 * set_context() tries to give some contextual info, it's not completely
2153 * accurate so the following code give yet another alternative, yet another
2154 * chance to transform some tokens.
2155 *
2156 * todo?: maybe could try to get rid of this technique. Maybe a better
2157 * set_context() would make possible to move this code using a fix_xx
2158 * technique.
2159 *
2160 * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but
2161 * it is more general to do it via my LALR(k) tech. Because here we can
2162 * transform some token give some context information. So sometimes it
2163 * makes sense to transform a token in one context, sometimes not, and
2164 * lex can not provide us this context information. Note that the order
2165 * in the pattern matching in lookahead is important. Do not cut/paste.
2166 *
2167 * Note that in next there is only "clean" tokens, there is no comment
2168 * or space tokens. This is done by the caller.
2169 *
2170 *)
2171
2172 open Lexer_parser (* for the fields of lexer_hint type *)
2173
2174 let not_struct_enum = function
2175 | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false
2176 | _ -> true
2177
2178
2179 let lookahead2 ~pass next before =
2180
2181 match (next, before) with
2182
2183 (*-------------------------------------------------------------*)
2184 (* typedef inference, parse_typedef_fix3 *)
2185 (*-------------------------------------------------------------*)
2186 (* xx xx *)
2187 | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2
2188 && ok_typedef s
2189 (* (take_safe 1 !passed_tok <> [TOPar]) -> *)
2190 ->
2191 (* parse_typedef_fix3:
2192 * acpi_object acpi_object;
2193 * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec.
2194 * Le parser en interne a deja appelé le prochain token pour pouvoir
2195 * decider des choses.
2196 * => special case in lexer_heuristic, again
2197 *)
2198 if !Flag_parsing_c.debug_typedef
2199 then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s);
2200
2201 LP.disable_typedef();
2202
2203 msg_typedef s; LP.add_typedef_root s;
2204 TypedefIdent (s, i1)
2205
2206 (* xx yy *)
2207 | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before
2208 && ok_typedef s
2209 ->
2210 (* && not_annot s2 BUT lead to false positive*)
2211
2212 msg_typedef s; LP.add_typedef_root s;
2213 TypedefIdent (s, i1)
2214
2215
2216 (* xx inline *)
2217 | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before
2218 && ok_typedef s
2219 ->
2220 msg_typedef s; LP.add_typedef_root s;
2221 TypedefIdent (s, i1)
2222
2223
2224 (* [,(] xx [,)] AND param decl *)
2225 | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ )
2226 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
2227 && ok_typedef s
2228 ->
2229 msg_typedef s; LP.add_typedef_root s;
2230 TypedefIdent (s, i1)
2231
2232 (* xx* [,)] *)
2233 (* specialcase: [,(] xx* [,)] *)
2234 | (TIdent (s, i1)::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
2235 when not_struct_enum before
2236 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2237 && ok_typedef s
2238 ->
2239 msg_typedef s; LP.add_typedef_root s;
2240 TypedefIdent (s, i1)
2241
2242
2243 (* xx** [,)] *)
2244 (* specialcase: [,(] xx** [,)] *)
2245 | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ )
2246 when not_struct_enum before
2247 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2248 && ok_typedef s
2249 ->
2250 msg_typedef s; LP.add_typedef_root s;
2251 TypedefIdent (s, i1)
2252
2253
2254
2255 (* xx const * USELESS because of next rule ? *)
2256 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ )
2257 when not_struct_enum before
2258 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2259 && ok_typedef s
2260 ->
2261
2262 msg_typedef s; LP.add_typedef_root s;
2263 TypedefIdent (s, i1)
2264
2265 (* xx const *)
2266 | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ )
2267 when not_struct_enum before
2268 && ok_typedef s
2269 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2270 ->
2271
2272 msg_typedef s; LP.add_typedef_root s;
2273 TypedefIdent (s, i1)
2274
2275
2276 (* xx * const *)
2277 | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ )
2278 when not_struct_enum before
2279 && ok_typedef s
2280 ->
2281 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2282
2283 msg_typedef s; LP.add_typedef_root s;
2284 TypedefIdent (s, i1)
2285
2286
2287 (* ( const xx) *)
2288 | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when
2289 ok_typedef s ->
2290 msg_typedef s; LP.add_typedef_root s;
2291 TypedefIdent (s, i1)
2292
2293
2294
2295 (* ( xx ) [sizeof, ~] *)
2296 | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ )
2297 when not_struct_enum before
2298 && ok_typedef s
2299 ->
2300 msg_typedef s; LP.add_typedef_root s;
2301 TypedefIdent (s, i1)
2302
2303 (* [(,] xx [ AND parameterdeclaration *)
2304 | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_)
2305 when (LP.current_context() =*= LP.InParameter)
2306 && ok_typedef s
2307 ->
2308 msg_typedef s; LP.add_typedef_root s;
2309 TypedefIdent (s, i1)
2310
2311 (*------------------------------------------------------------*)
2312 (* if 'x*y' maybe an expr, maybe just a classic multiplication *)
2313 (* but if have a '=', or ',' I think not *)
2314 (*------------------------------------------------------------*)
2315
2316 (* static xx * yy *)
2317 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ ,
2318 (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when
2319 ok_typedef s
2320 ->
2321 msg_typedef s; LP.add_typedef_root s;
2322 TypedefIdent (s, i1)
2323
2324 (* TODO xx * yy ; AND in start of compound element *)
2325
2326
2327 (* xx * yy, AND in paramdecl *)
2328 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
2329 when not_struct_enum before && (LP.current_context() =*= LP.InParameter)
2330 && ok_typedef s
2331 ->
2332
2333 msg_typedef s; LP.add_typedef_root s;
2334 TypedefIdent (s, i1)
2335
2336
2337 (* xx * yy ; AND in Toplevel, except when have = before *)
2338 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) ->
2339 TIdent (s, i1)
2340 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _)
2341 when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ()))
2342 ->
2343 msg_typedef s; LP.add_typedef_root s;
2344 TypedefIdent (s, i1)
2345
2346 (* xx * yy , AND in Toplevel *)
2347 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _)
2348 when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel)
2349 && ok_typedef s
2350 ->
2351
2352 msg_typedef s; LP.add_typedef_root s;
2353 TypedefIdent (s, i1)
2354
2355 (* xx * yy ( AND in Toplevel *)
2356 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _)
2357 when not_struct_enum before
2358 && (LP.is_top_or_struct (LP.current_context ()))
2359 && ok_typedef s
2360 ->
2361 msg_typedef s; LP.add_typedef_root s;
2362 TypedefIdent (s, i1)
2363
2364 (* xx * yy [ *)
2365 (* todo? enough ? cos in struct def we can have some expression ! *)
2366 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _)
2367 when not_struct_enum before &&
2368 (LP.is_top_or_struct (LP.current_context ()))
2369 && ok_typedef s
2370 ->
2371 msg_typedef s; LP.add_typedef_root s;
2372 TypedefIdent (s, i1)
2373
2374 (* u16: 10; in struct *)
2375 | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_)
2376 when (LP.is_top_or_struct (LP.current_context ()))
2377 && ok_typedef s
2378 ->
2379 msg_typedef s; LP.add_typedef_root s;
2380 TypedefIdent (s, i1)
2381
2382
2383 (* why need TOPar condition as stated in preceding rule ? really needed ? *)
2384 (* YES cos at toplevel can have some expression !! for instance when *)
2385 (* enter in the dimension of an array *)
2386 (*
2387 | (TIdent s::TMul::TIdent s2::_ , _)
2388 when (take_safe 1 !passed_tok <> [Tstruct] &&
2389 (take_safe 1 !passed_tok <> [Tenum]))
2390 &&
2391 !LP._lexer_hint = Some LP.Toplevel ->
2392 msg_typedef s;
2393 LP.add_typedef_root s;
2394 TypedefIdent s
2395 *)
2396
2397 (* xx * yy = *)
2398 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _)
2399 when not_struct_enum before
2400 && ok_typedef s
2401 ->
2402 msg_typedef s; LP.add_typedef_root s;
2403 TypedefIdent (s, i1)
2404
2405
2406 (* xx * yy) AND in paramdecl *)
2407 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _)
2408 when not_struct_enum before && (LP.current_context () =*= LP.InParameter)
2409 && ok_typedef s
2410 ->
2411 msg_typedef s; LP.add_typedef_root s;
2412 TypedefIdent (s, i1)
2413
2414
2415 (* xx * yy; *) (* wrong ? *)
2416 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ ,
2417 (TOBrace _| TPtVirg _)::_) when not_struct_enum before
2418 && ok_typedef s
2419 ->
2420 msg_typedef s; LP.add_typedef_root s;
2421 msg_maybe_dangereous_typedef s;
2422 TypedefIdent (s, i1)
2423
2424
2425 (* xx * yy, and ';' before xx *) (* wrong ? *)
2426 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ ,
2427 (TOBrace _| TPtVirg _)::_) when
2428 ok_typedef s
2429 ->
2430 msg_typedef s; LP.add_typedef_root s;
2431 TypedefIdent (s, i1)
2432
2433
2434 (* xx_t * yy *)
2435 | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _)
2436 when s ==~ regexp_typedef && not_struct_enum before
2437 (* struct user_info_t sometimes *)
2438 && ok_typedef s
2439 ->
2440 msg_typedef s; LP.add_typedef_root s;
2441 TypedefIdent (s, i1)
2442
2443 (* xx ** yy *) (* wrong ? *)
2444 | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2445 when not_struct_enum before
2446 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2447 && ok_typedef s
2448 ->
2449 msg_typedef s; LP.add_typedef_root s;
2450 TypedefIdent (s, i1)
2451
2452 (* xx *** yy *)
2453 | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _)
2454 when not_struct_enum before
2455 && ok_typedef s
2456 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2457 ->
2458 msg_typedef s; LP.add_typedef_root s;
2459 TypedefIdent (s, i1)
2460
2461 (* xx ** ) *)
2462 | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _)
2463 when not_struct_enum before
2464 (* && !LP._lexer_hint = Some LP.ParameterDeclaration *)
2465 && ok_typedef s
2466 ->
2467 msg_typedef s; LP.add_typedef_root s;
2468 TypedefIdent (s, i1)
2469
2470
2471
2472 (* ----------------------------------- *)
2473 (* old: why not do like for other rules and start with TIdent ?
2474 * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ?
2475 * new: prefer now start with TIdent because otherwise the add_typedef_root
2476 * may have no effect if in second pass or if have disable the add_typedef.
2477 *)
2478
2479 (* (xx) yy *)
2480 | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ ,
2481 (TOPar info)::x::_)
2482 when not (TH.is_stuff_taking_parenthized x) &&
2483 Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3
2484 && ok_typedef s
2485 ->
2486
2487 msg_typedef s; LP.add_typedef_root s;
2488 (*TOPar info*)
2489 TypedefIdent (s, i1)
2490
2491
2492 (* (xx) ( yy)
2493 * but false positif: typedef int (xxx_t)(...), so do specialisation below.
2494 *)
2495 (*
2496 | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_)
2497 when not (TH.is_stuff_taking_parenthized x)
2498 && ok_typedef s
2499 ->
2500 msg_typedef s; LP.add_typedef_root s;
2501 (* TOPar info *)
2502 TypedefIdent (s, i1)
2503 *)
2504 (* special case: = (xx) ( yy) *)
2505 | (TIdent (s, i1)::TCPar _::TOPar _::_ ,
2506 (TOPar info)::(TEq _ |TEqEq _)::_)
2507 when ok_typedef s
2508 ->
2509 msg_typedef s; LP.add_typedef_root s;
2510 (* TOPar info *)
2511 TypedefIdent (s, i1)
2512
2513
2514 (* (xx * ) yy *)
2515 | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when
2516 ok_typedef s
2517 ->
2518 msg_typedef s; LP.add_typedef_root s;
2519 (*TOPar info*)
2520 TypedefIdent (s,i1)
2521
2522
2523 (* (xx){ ... } constructor *)
2524 | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_)
2525 when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x)
2526 && ok_typedef s
2527 ->
2528 msg_typedef s; LP.add_typedef_root s;
2529 TypedefIdent (s, i1)
2530
2531
2532 (* can have sizeof on expression
2533 | (Tsizeof::TOPar::TIdent s::TCPar::_, _) ->
2534 msg_typedef s;
2535 LP.add_typedef_root s;
2536 Tsizeof
2537 *)
2538
2539
2540 (* ----------------------------------- *)
2541 (* x ( *y )(params), function pointer *)
2542 | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2543 when not_struct_enum before
2544 && ok_typedef s
2545 ->
2546 msg_typedef s; LP.add_typedef_root s;
2547 TypedefIdent (s, i1)
2548
2549 (* x* ( *y )(params), function pointer 2 *)
2550 | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _)
2551 when not_struct_enum before
2552 && ok_typedef s
2553 ->
2554 msg_typedef s; LP.add_typedef_root s;
2555 TypedefIdent (s, i1)
2556
2557
2558 (*-------------------------------------------------------------*)
2559 (* CPP *)
2560 (*-------------------------------------------------------------*)
2561 | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) |
2562 TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii))
2563 as x)
2564 ::_, _
2565 ->
2566 (*
2567 if not !Flag_parsing_c.ifdef_to_if
2568 then TCommentCpp (Ast_c.CppDirective, ii)
2569 else
2570 *)
2571 (* not !LP._lexer_hint.toplevel *)
2572 if !Flag_parsing_c.ifdef_directive_passing
2573 || (pass =|= 2)
2574 then begin
2575
2576 if (LP.current_context () =*= LP.InInitializer)
2577 then begin
2578 pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *)
2579 incr Stat.nIfdefInitializer;
2580 end else begin
2581 pr2_cpp("IFDEF: or related insde function. I treat it as comment");
2582 incr Stat.nIfdefPassing;
2583 end;
2584 TCommentCpp (Token_c.CppDirective, ii)
2585 end
2586 else x
2587
2588 | (TUndef (id, ii) as x)::_, _
2589 ->
2590 if (pass =|= 2)
2591 then begin
2592 pr2_cpp("UNDEF: I treat it as comment");
2593 TCommentCpp (Token_c.CppDirective, ii)
2594 end
2595 else x
2596
2597 | (TCppDirectiveOther (ii) as x)::_, _
2598 ->
2599 if (pass =|= 2)
2600 then begin
2601 pr2_cpp ("OTHER directive: I treat it as comment");
2602 TCommentCpp (Token_c.CppDirective, ii)
2603 end
2604 else x
2605
2606 (* If ident contain a for_each, then certainly a macro. But to be
2607 * sure should look if there is a '{' after the ')', but it requires
2608 * to count the '('. Because this can be expensive, we do that only
2609 * when the token contains "for_each".
2610 *)
2611 | (TIdent (s, i1)::TOPar _::rest, _)
2612 when not (LP.current_context () =*= LP.InTopLevel)
2613 (* otherwise a function such as static void loopback_enable(int i) {
2614 * will be considered as a loop
2615 *)
2616 ->
2617
2618
2619 if s ==~ regexp_foreach &&
2620 is_really_foreach (Common.take_safe forLOOKAHEAD rest)
2621
2622 then begin
2623 msg_foreach s;
2624 TMacroIterator (s, i1)
2625 end
2626 else TIdent (s, i1)
2627
2628
2629
2630 (*-------------------------------------------------------------*)
2631 | v::xs, _ -> v
2632 | _ -> raise Impossible
2633
2634 let lookahead ~pass a b =
2635 Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b)
2636
2637