Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | ||
16 | open Common | |
17 | ||
ae4735db | 18 | module TH = Token_helpers |
708f4980 | 19 | module TV = Token_views_c |
34e49164 C |
20 | module LP = Lexer_parser |
21 | ||
485bce71 | 22 | module Stat = Parsing_stat |
34e49164 | 23 | |
ae4735db | 24 | open Parser_c |
34e49164 | 25 | |
ae4735db | 26 | open TV |
708f4980 | 27 | |
34e49164 C |
28 | (*****************************************************************************) |
29 | (* Some debugging functions *) | |
30 | (*****************************************************************************) | |
31 | ||
708f4980 | 32 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
113803cf | 33 | |
ae4735db | 34 | let pr2_cpp s = |
34e49164 C |
35 | if !Flag_parsing_c.debug_cpp |
36 | then Common.pr2_once ("CPP-" ^ s) | |
37 | ||
38 | ||
ae4735db | 39 | let msg_gen cond is_known printer s = |
34e49164 C |
40 | if cond |
41 | then | |
42 | if not (!Flag_parsing_c.filter_msg) | |
43 | then printer s | |
44 | else | |
45 | if not (is_known s) | |
46 | then printer s | |
ae4735db | 47 | |
34e49164 | 48 | |
485bce71 C |
49 | (* In the following, there are some harcoded names of types or macros |
50 | * but they are not used by our heuristics! They are just here to | |
51 | * enable to detect false positive by printing only the typedef/macros | |
52 | * that we don't know yet. If we print everything, then we can easily | |
53 | * get lost with too much verbose tracing information. So those | |
54 | * functions "filter" some messages. So our heuristics are still good, | |
55 | * there is no more (or not that much) hardcoded linux stuff. | |
34e49164 | 56 | *) |
485bce71 | 57 | |
ae4735db C |
58 | let is_known_typdef = |
59 | (fun s -> | |
34e49164 C |
60 | (match s with |
61 | | "u_char" | "u_short" | "u_int" | "u_long" | |
ae4735db C |
62 | | "u8" | "u16" | "u32" | "u64" |
63 | | "s8" | "s16" | "s32" | "s64" | |
64 | | "__u8" | "__u16" | "__u32" | "__u64" | |
34e49164 | 65 | -> true |
ae4735db C |
66 | |
67 | | "acpi_handle" | |
68 | | "acpi_status" | |
34e49164 C |
69 | -> true |
70 | ||
ae4735db C |
71 | | "FILE" |
72 | | "DIR" | |
34e49164 | 73 | -> true |
ae4735db | 74 | |
34e49164 | 75 | | s when s =~ ".*_t$" -> true |
ae4735db | 76 | | _ -> false |
34e49164 C |
77 | ) |
78 | ) | |
485bce71 | 79 | |
ae4735db C |
80 | (* note: cant use partial application with let msg_typedef = |
81 | * because it would compute msg_typedef at compile time when | |
485bce71 C |
82 | * the flag debug_typedef is always false |
83 | *) | |
ae4735db | 84 | let msg_typedef s = |
485bce71 C |
85 | incr Stat.nTypedefInfer; |
86 | msg_gen (!Flag_parsing_c.debug_typedef) | |
87 | is_known_typdef | |
ae4735db | 88 | (fun s -> |
34e49164 C |
89 | pr2_cpp ("TYPEDEF: promoting: " ^ s) |
90 | ) | |
91 | s | |
92 | ||
485bce71 C |
93 | let msg_maybe_dangereous_typedef s = |
94 | if not (is_known_typdef s) | |
ae4735db | 95 | then |
113803cf C |
96 | pr2 |
97 | ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s) | |
34e49164 C |
98 | |
99 | ||
100 | ||
ae4735db | 101 | let msg_declare_macro s = |
485bce71 | 102 | incr Stat.nMacroDecl; |
34e49164 | 103 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
104 | (fun s -> |
105 | (match s with | |
34e49164 | 106 | | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM" |
ae4735db | 107 | | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD" |
34e49164 C |
108 | | "DEFINE_SPINLOCK" | "DEFINE_TIMER" |
109 | | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR" | |
110 | | "SENSOR_DEVICE_ATTR" | |
111 | | "LIST_HEAD" | |
112 | | "DECLARE_WORK" | "DECLARE_TASKLET" | |
113 | | "PORT_ATTR_RO" | "PORT_PMA_ATTR" | |
114 | | "DECLARE_BITMAP" | |
115 | ||
116 | -> true | |
117 | (* | |
118 | | s when s =~ "^DECLARE_.*" -> true | |
119 | | s when s =~ ".*_ATTR$" -> true | |
120 | | s when s =~ "^DEFINE_.*" -> true | |
121 | *) | |
122 | ||
123 | | _ -> false | |
124 | ) | |
125 | ) | |
126 | (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s)) | |
127 | s | |
34e49164 | 128 | |
ae4735db C |
129 | |
130 | let msg_foreach s = | |
485bce71 | 131 | incr Stat.nIteratorHeuristic; |
34e49164 C |
132 | pr2_cpp ("MACRO: found foreach: " ^ s) |
133 | ||
134 | ||
ae4735db C |
135 | (* ?? |
136 | let msg_debug_macro s = | |
34e49164 | 137 | pr2_cpp ("MACRO: found debug-macro: " ^ s) |
485bce71 | 138 | *) |
34e49164 C |
139 | |
140 | ||
ae4735db | 141 | let msg_macro_noptvirg s = |
485bce71 | 142 | incr Stat.nMacroStmt; |
34e49164 C |
143 | pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s) |
144 | ||
ae4735db | 145 | let msg_macro_toplevel_noptvirg s = |
485bce71 | 146 | incr Stat.nMacroStmt; |
34e49164 C |
147 | pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s) |
148 | ||
ae4735db | 149 | let msg_macro_noptvirg_single s = |
485bce71 | 150 | incr Stat.nMacroStmt; |
34e49164 C |
151 | pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s) |
152 | ||
153 | ||
485bce71 C |
154 | |
155 | ||
ae4735db | 156 | let msg_macro_higher_order s = |
485bce71 | 157 | incr Stat.nMacroHigherOrder; |
34e49164 | 158 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
159 | (fun s -> |
160 | (match s with | |
34e49164 C |
161 | | "DBGINFO" |
162 | | "DBGPX" | |
163 | | "DFLOW" | |
164 | -> true | |
165 | | _ -> false | |
166 | ) | |
167 | ) | |
168 | (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s)) | |
169 | s | |
170 | ||
171 | ||
ae4735db | 172 | let msg_stringification s = |
485bce71 | 173 | incr Stat.nMacroString; |
34e49164 | 174 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
175 | (fun s -> |
176 | (match s with | |
34e49164 C |
177 | | "REVISION" |
178 | | "UTS_RELEASE" | |
179 | | "SIZE_STR" | |
180 | | "DMA_STR" | |
181 | -> true | |
ae4735db | 182 | (* s when s =~ ".*STR.*" -> true *) |
34e49164 C |
183 | | _ -> false |
184 | ) | |
185 | ) | |
186 | (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s)) | |
187 | s | |
188 | ||
485bce71 C |
189 | let msg_stringification_params s = |
190 | incr Stat.nMacroString; | |
191 | pr2_cpp ("MACRO: string-macro with params : " ^ s) | |
192 | ||
193 | ||
194 | ||
ae4735db | 195 | let msg_apply_known_macro s = |
485bce71 C |
196 | incr Stat.nMacroExpand; |
197 | pr2_cpp ("MACRO: found known macro = " ^ s) | |
198 | ||
ae4735db | 199 | let msg_apply_known_macro_hint s = |
485bce71 C |
200 | incr Stat.nMacroHint; |
201 | pr2_cpp ("MACRO: found known macro hint = " ^ s) | |
202 | ||
203 | ||
34e49164 | 204 | |
ae4735db C |
205 | |
206 | let msg_ifdef_bool_passing is_ifdef_positif = | |
485bce71 C |
207 | incr Stat.nIfdefZero; (* of Version ? *) |
208 | if is_ifdef_positif | |
209 | then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION" | |
210 | else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus" | |
211 | ||
212 | ||
213 | let msg_ifdef_mid_something () = | |
214 | incr Stat.nIfdefExprPassing; | |
215 | pr2_cpp "found ifdef-mid-something" | |
216 | ||
217 | let msg_ifdef_funheaders () = | |
218 | incr Stat.nIfdefFunheader; | |
219 | () | |
220 | ||
ae4735db | 221 | let msg_ifdef_cparen_else () = |
708f4980 C |
222 | incr Stat.nIfdefPassing; |
223 | pr2_cpp("found ifdef-cparen-else") | |
224 | ||
485bce71 | 225 | |
ae4735db | 226 | let msg_attribute s = |
485bce71 C |
227 | incr Stat.nMacroAttribute; |
228 | pr2_cpp("ATTR:" ^ s) | |
ae4735db | 229 | |
485bce71 C |
230 | |
231 | ||
34e49164 | 232 | (*****************************************************************************) |
485bce71 | 233 | (* The regexp and basic view definitions *) |
34e49164 C |
234 | (*****************************************************************************) |
235 | ||
236 | (* opti: better to built then once and for all, especially regexp_foreach *) | |
237 | ||
238 | let regexp_macro = Str.regexp | |
239 | "^[A-Z_][A-Z_0-9]*$" | |
240 | ||
241 | (* linuxext: *) | |
242 | let regexp_annot = Str.regexp | |
243 | "^__.*$" | |
244 | ||
245 | (* linuxext: *) | |
246 | let regexp_declare = Str.regexp | |
247 | ".*DECLARE.*" | |
248 | ||
249 | (* linuxext: *) | |
ae4735db | 250 | let regexp_foreach = Str.regexp_case_fold |
34e49164 C |
251 | ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)" |
252 | ||
253 | let regexp_typedef = Str.regexp | |
254 | ".*_t$" | |
255 | ||
34e49164 C |
256 | let false_typedef = [ |
257 | "printk"; | |
258 | ] | |
259 | ||
34e49164 | 260 | |
485bce71 C |
261 | let ok_typedef s = not (List.mem s false_typedef) |
262 | ||
ae4735db | 263 | let not_annot s = |
485bce71 C |
264 | not (s ==~ regexp_annot) |
265 | ||
266 | ||
34e49164 | 267 | |
34e49164 | 268 | |
485bce71 C |
269 | (*****************************************************************************) |
270 | (* Helpers *) | |
271 | (*****************************************************************************) | |
272 | ||
485bce71 | 273 | (* ------------------------------------------------------------------------- *) |
ae4735db C |
274 | (* the pair is the status of '()' and '{}', ex: (-1,0) |
275 | * if too much ')' and good '{}' | |
276 | * could do for [] too ? | |
485bce71 C |
277 | * could do for ',' if encounter ',' at "toplevel", not inside () or {} |
278 | * then if have ifdef, then certainly can lead to a problem. | |
279 | *) | |
708f4980 | 280 | let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))= |
ae4735db | 281 | fun xs -> |
485bce71 | 282 | let cnt_paren, cnt_brace = ref 0, ref 0 in |
ae4735db | 283 | xs +> TV.iter_token_ifdef (fun x -> |
485bce71 C |
284 | (match x.tok with |
285 | | x when TH.is_opar x -> incr cnt_paren | |
286 | | TOBrace _ -> incr cnt_brace | |
287 | | x when TH.is_cpar x -> decr cnt_paren | |
288 | | TCBrace _ -> decr cnt_brace | |
289 | | _ -> () | |
290 | ) | |
291 | ); | |
292 | !cnt_paren, !cnt_brace | |
293 | ||
294 | ||
295 | (* ------------------------------------------------------------------------- *) | |
296 | let forLOOKAHEAD = 30 | |
297 | ||
ae4735db | 298 | |
485bce71 | 299 | (* look if there is a '{' just after the closing ')', and handling the |
ae4735db C |
300 | * possibility to have nested expressions inside nested parenthesis |
301 | * | |
485bce71 C |
302 | * todo: use indentation instead of premier(statement) ? |
303 | *) | |
ae4735db | 304 | let rec is_really_foreach xs = |
485bce71 C |
305 | let rec is_foreach_aux = function |
306 | | [] -> false, [] | |
307 | | TCPar _::TOBrace _::xs -> true, xs | |
308 | (* the following attempts to handle the cases where there is a | |
309 | single statement in the body of the loop. undoubtedly more | |
ae4735db | 310 | cases are needed. |
485bce71 C |
311 | todo: premier(statement) - suivant(funcall) |
312 | *) | |
313 | | TCPar _::TIdent _::xs -> true, xs | |
314 | | TCPar _::Tif _::xs -> true, xs | |
315 | | TCPar _::Twhile _::xs -> true, xs | |
316 | | TCPar _::Tfor _::xs -> true, xs | |
317 | | TCPar _::Tswitch _::xs -> true, xs | |
318 | | TCPar _::Treturn _::xs -> true, xs | |
319 | ||
320 | ||
321 | | TCPar _::xs -> false, xs | |
ae4735db | 322 | | TOPar _::xs -> |
485bce71 C |
323 | let (_, xs') = is_foreach_aux xs in |
324 | is_foreach_aux xs' | |
325 | | x::xs -> is_foreach_aux xs | |
326 | in | |
327 | is_foreach_aux xs +> fst | |
328 | ||
329 | ||
330 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 331 | let set_ifdef_token_parenthize_info cnt x = |
485bce71 C |
332 | match x with |
333 | | TIfdef (tag, _) | |
334 | | TIfdefelse (tag, _) | |
335 | | TIfdefelif (tag, _) | |
336 | | TEndif (tag, _) | |
337 | ||
338 | | TIfdefBool (_, tag, _) | |
ae4735db | 339 | | TIfdefMisc (_, tag, _) |
485bce71 | 340 | | TIfdefVersion (_, tag, _) |
ae4735db | 341 | -> |
485bce71 C |
342 | tag := Some cnt; |
343 | ||
344 | | _ -> raise Impossible | |
485bce71 C |
345 | |
346 | ||
485bce71 | 347 | |
ae4735db | 348 | let ifdef_paren_cnt = ref 0 |
485bce71 | 349 | |
ae4735db C |
350 | |
351 | let rec set_ifdef_parenthize_info xs = | |
485bce71 C |
352 | xs +> List.iter (function |
353 | | NotIfdefLine xs -> () | |
ae4735db C |
354 | | Ifdefbool (_, xxs, info_ifdef) |
355 | | Ifdef (xxs, info_ifdef) -> | |
356 | ||
485bce71 C |
357 | incr ifdef_paren_cnt; |
358 | let total_directives = List.length info_ifdef in | |
359 | ||
ae4735db | 360 | info_ifdef +> List.iter (fun x -> |
485bce71 C |
361 | set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives) |
362 | x.tok); | |
363 | xxs +> List.iter set_ifdef_parenthize_info | |
364 | ) | |
365 | ||
366 | ||
978fd7e5 C |
367 | (*****************************************************************************) |
368 | (* The parsing hack for #define *) | |
369 | (*****************************************************************************) | |
370 | ||
ae4735db | 371 | (* To parse macro definitions I need to do some tricks |
978fd7e5 C |
372 | * as some information can be get only at the lexing level. For instance |
373 | * the space after the name of the macro in '#define foo (x)' is meaningful | |
374 | * but the grammar can not get this information. So define_ident below | |
375 | * look at such space and generate a special TOpardefine. In a similar | |
376 | * way macro definitions can contain some antislash and newlines | |
ae4735db C |
377 | * and the grammar need to know where the macro ends (which is |
378 | * a line-level and so low token-level information). Hence the | |
978fd7e5 | 379 | * function 'define_line' below and the TDefEol. |
ae4735db C |
380 | * |
381 | * update: TDefEol is handled in a special way at different places, | |
978fd7e5 C |
382 | * a little bit like EOF, especially for error recovery, so this |
383 | * is an important token that should not be retagged! | |
ae4735db C |
384 | * |
385 | * | |
386 | * ugly hack, a better solution perhaps would be to erase TDefEOL | |
387 | * from the Ast and list of tokens in parse_c. | |
388 | * | |
978fd7e5 | 389 | * note: I do a +1 somewhere, it's for the unparsing to correctly sync. |
ae4735db | 390 | * |
978fd7e5 C |
391 | * note: can't replace mark_end_define by simply a fakeInfo(). The reason |
392 | * is where is the \n TCommentSpace. Normally there is always a last token | |
393 | * to synchronize on, either EOF or the token of the next toplevel. | |
ae4735db | 394 | * In the case of the #define we got in list of token |
978fd7e5 C |
395 | * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will |
396 | * not synchronize on it and so we will not print the "\n". | |
397 | * A solution would be to put the TDefEOL before the "\n". | |
c491d8ee | 398 | * (jll: tried to do this, see the comment "Put end of line..." below) |
ae4735db C |
399 | * |
400 | * todo?: could put a ExpandedTok for that ? | |
978fd7e5 | 401 | *) |
ae4735db C |
402 | let mark_end_define ii = |
403 | let ii' = | |
404 | { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
405 | Common.str = ""; | |
978fd7e5 C |
406 | Common.charpos = Ast_c.pos_of_info ii + 1 |
407 | }; | |
408 | cocci_tag = ref Ast_c.emptyAnnot; | |
409 | comments_tag = ref Ast_c.emptyComments; | |
ae4735db | 410 | } |
978fd7e5 C |
411 | in |
412 | TDefEOL (ii') | |
413 | ||
414 | (* put the TDefEOL at the good place *) | |
ae4735db | 415 | let rec define_line_1 acc xs = |
978fd7e5 C |
416 | match xs with |
417 | | [] -> List.rev acc | |
418 | | TDefine ii::xs -> | |
419 | let line = Ast_c.line_of_info ii in | |
420 | let acc = (TDefine ii) :: acc in | |
421 | define_line_2 acc line ii xs | |
3a314143 C |
422 | | TUndef ii::xs -> |
423 | let line = Ast_c.line_of_info ii in | |
424 | let acc = (TUndef ii) :: acc in | |
425 | define_line_2 acc line ii xs | |
978fd7e5 C |
426 | | TCppEscapedNewline ii::xs -> |
427 | pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at"); | |
428 | pr2 (Ast_c.strloc_of_info ii); | |
429 | let acc = (TCommentSpace ii) :: acc in | |
430 | define_line_1 acc xs | |
431 | | x::xs -> define_line_1 (x::acc) xs | |
432 | ||
ae4735db C |
433 | and define_line_2 acc line lastinfo xs = |
434 | match xs with | |
435 | | [] -> | |
978fd7e5 | 436 | (* should not happened, should meet EOF before *) |
ae4735db | 437 | pr2 "PB: WEIRD"; |
978fd7e5 | 438 | List.rev (mark_end_define lastinfo::acc) |
ae4735db | 439 | | x::xs -> |
978fd7e5 C |
440 | let line' = TH.line_of_tok x in |
441 | let info = TH.info_of_tok x in | |
442 | ||
443 | (match x with | |
ae4735db | 444 | | EOF ii -> |
978fd7e5 C |
445 | let acc = (mark_end_define lastinfo) :: acc in |
446 | let acc = (EOF ii) :: acc in | |
447 | define_line_1 acc xs | |
ae4735db | 448 | | TCppEscapedNewline ii -> |
978fd7e5 C |
449 | if (line' <> line) then pr2 "PB: WEIRD: not same line number"; |
450 | let acc = (TCommentSpace ii) :: acc in | |
451 | define_line_2 acc (line+1) info xs | |
ae4735db | 452 | | x -> |
978fd7e5 | 453 | if line' =|= line |
ae4735db | 454 | then define_line_2 (x::acc) line info xs |
c491d8ee C |
455 | else |
456 | (* Put end of line token before the newline. A newline at least | |
457 | must be there because the line changed and because we saw a | |
458 | #define previously to get to this function at all *) | |
459 | define_line_1 | |
460 | ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc))) | |
461 | (x::xs) | |
978fd7e5 C |
462 | ) |
463 | ||
ae4735db | 464 | let rec define_ident acc xs = |
978fd7e5 C |
465 | match xs with |
466 | | [] -> List.rev acc | |
3a314143 C |
467 | | TUndef ii::xs -> |
468 | let acc = TUndef ii :: acc in | |
469 | (match xs with | |
470 | TCommentSpace i1::TIdent (s,i2)::xs -> | |
471 | let acc = (TCommentSpace i1) :: acc in | |
472 | let acc = (TIdentDefine (s,i2)) :: acc in | |
473 | define_ident acc xs | |
474 | | _ -> | |
475 | pr2 "WEIRD: weird #define body"; | |
476 | define_ident acc xs | |
477 | ) | |
ae4735db | 478 | | TDefine ii::xs -> |
978fd7e5 C |
479 | let acc = TDefine ii :: acc in |
480 | (match xs with | |
ae4735db | 481 | | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs -> |
978fd7e5 C |
482 | (* Change also the kind of TIdent to avoid bad interaction |
483 | * with other parsing_hack tricks. For instant if keep TIdent then | |
484 | * the stringication algo can believe the TIdent is a string-macro. | |
485 | * So simpler to change the kind of the ident too. | |
486 | *) | |
ae4735db | 487 | (* if TOParDefine sticked to the ident, then |
978fd7e5 C |
488 | * it's a macro-function. Change token to avoid ambiguity |
489 | * between #define foo(x) and #define foo (x) | |
490 | *) | |
491 | let acc = (TCommentSpace i1) :: acc in | |
492 | let acc = (TIdentDefine (s,i2)) :: acc in | |
493 | let acc = (TOParDefine i3) :: acc in | |
494 | define_ident acc xs | |
495 | ||
ae4735db | 496 | | TCommentSpace i1::TIdent (s,i2)::xs -> |
978fd7e5 C |
497 | let acc = (TCommentSpace i1) :: acc in |
498 | let acc = (TIdentDefine (s,i2)) :: acc in | |
499 | define_ident acc xs | |
500 | ||
501 | (* bugfix: ident of macro (as well as params, cf below) can be tricky | |
502 | * note, do we need to subst in the body of the define ? no cos | |
503 | * here the issue is the name of the macro, as in #define inline, | |
ae4735db | 504 | * so obviously the name of this macro will not be used in its |
978fd7e5 C |
505 | * body (it would be a recursive macro, which is forbidden). |
506 | *) | |
ae4735db C |
507 | |
508 | | TCommentSpace i1::t::xs -> | |
978fd7e5 C |
509 | |
510 | let s = TH.str_of_tok t in | |
511 | let ii = TH.info_of_tok t in | |
512 | if s ==~ Common.regexp_alpha | |
513 | then begin | |
514 | pr2 (spf "remapping: %s to an ident in macro name" s); | |
515 | let acc = (TCommentSpace i1) :: acc in | |
516 | let acc = (TIdentDefine (s,ii)) :: acc in | |
517 | define_ident acc xs | |
518 | end | |
519 | else begin | |
ae4735db | 520 | pr2 "WEIRD: weird #define body"; |
978fd7e5 C |
521 | define_ident acc xs |
522 | end | |
523 | ||
ae4735db C |
524 | | _ -> |
525 | pr2 "WEIRD: weird #define body"; | |
978fd7e5 C |
526 | define_ident acc xs |
527 | ) | |
528 | | x::xs -> | |
529 | let acc = x :: acc in | |
530 | define_ident acc xs | |
978fd7e5 C |
531 | |
532 | ||
ae4735db C |
533 | |
534 | let fix_tokens_define2 xs = | |
978fd7e5 C |
535 | define_ident [] (define_line_1 [] xs) |
536 | ||
ae4735db | 537 | let fix_tokens_define a = |
978fd7e5 | 538 | Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a) |
ae4735db | 539 | |
978fd7e5 C |
540 | |
541 | ||
542 | ||
543 | ||
544 | (* ------------------------------------------------------------------------- *) | |
545 | (* Other parsing hacks related to cpp, Include/Define hacks *) | |
546 | (* ------------------------------------------------------------------------- *) | |
547 | ||
548 | (* Sometimes I prefer to generate a single token for a list of things in the | |
549 | * lexer so that if I have to passed them, like for passing TInclude then | |
ae4735db C |
550 | * it's easy. Also if I don't do a single token, then I need to |
551 | * parse the rest which may not need special stuff, like detecting | |
978fd7e5 C |
552 | * end of line which the parser is not really ready for. So for instance |
553 | * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just | |
ae4735db | 554 | * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ? |
978fd7e5 C |
555 | * but this kind of token is valid only after a #include and the |
556 | * lexing and parsing rules are different for such tokens so not that | |
557 | * easy to parse such things in parser_c.mly. Hence the following hacks. | |
ae4735db | 558 | * |
978fd7e5 C |
559 | * less?: maybe could get rid of this like I get rid of some of fix_define. |
560 | *) | |
561 | ||
562 | (* helpers *) | |
563 | ||
564 | (* used to generate new token from existing one *) | |
565 | let new_info posadd str ii = | |
ae4735db C |
566 | { Ast_c.pinfo = |
567 | Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
978fd7e5 C |
568 | charpos = Ast_c.pos_of_info ii + posadd; |
569 | str = str; | |
570 | column = Ast_c.col_of_info ii + posadd; | |
571 | }; | |
572 | (* must generate a new ref each time, otherwise share *) | |
573 | cocci_tag = ref Ast_c.emptyAnnot; | |
574 | comments_tag = ref Ast_c.emptyComments; | |
575 | } | |
576 | ||
577 | ||
ae4735db | 578 | let rec comment_until_defeol xs = |
978fd7e5 | 579 | match xs with |
ae4735db | 580 | | [] -> |
978fd7e5 C |
581 | (* job not done in Cpp_token_c.define_parse ? *) |
582 | failwith "cant find end of define token TDefEOL" | |
ae4735db | 583 | | x::xs -> |
978fd7e5 | 584 | (match x with |
ae4735db | 585 | | Parser_c.TDefEOL i -> |
978fd7e5 C |
586 | Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x) |
587 | ::xs | |
ae4735db C |
588 | | _ -> |
589 | let x' = | |
978fd7e5 C |
590 | (* bugfix: otherwise may lose a TComment token *) |
591 | if TH.is_real_comment x | |
592 | then x | |
593 | else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x) | |
594 | in | |
595 | x'::comment_until_defeol xs | |
596 | ) | |
597 | ||
ae4735db C |
598 | let drop_until_defeol xs = |
599 | List.tl | |
978fd7e5 C |
600 | (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) |
601 | ||
602 | ||
603 | ||
604 | (* ------------------------------------------------------------------------- *) | |
605 | (* returns a pair (replaced token, list of next tokens) *) | |
606 | (* ------------------------------------------------------------------------- *) | |
607 | ||
ae4735db C |
608 | let tokens_include (info, includes, filename, inifdef) = |
609 | Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), | |
610 | [Parser_c.TIncludeFilename | |
978fd7e5 C |
611 | (filename, (new_info (String.length includes) filename info)) |
612 | ] | |
613 | ||
614 | ||
615 | ||
616 | ||
485bce71 C |
617 | (*****************************************************************************) |
618 | (* CPP handling: macros, ifdefs, macros defs *) | |
619 | (*****************************************************************************) | |
620 | ||
0708f913 C |
621 | (* ------------------------------------------------------------------------- *) |
622 | (* special skip_start skip_end handling *) | |
623 | (* ------------------------------------------------------------------------- *) | |
624 | ||
625 | (* note: after this normally the token list should not contain any more the | |
626 | * TCommentSkipTagStart and End tokens. | |
627 | *) | |
628 | let rec commentize_skip_start_to_end xs = | |
629 | match xs with | |
630 | | [] -> () | |
ae4735db | 631 | | x::xs -> |
0708f913 | 632 | (match x with |
ae4735db C |
633 | | {tok = TCommentSkipTagStart info} -> |
634 | (try | |
635 | let (before, x2, after) = | |
0708f913 C |
636 | xs +> Common.split_when (function |
637 | | {tok = TCommentSkipTagEnd _ } -> true | |
ae4735db | 638 | | _ -> false |
0708f913 C |
639 | ) |
640 | in | |
641 | let topass = x::x2::before in | |
ae4735db | 642 | topass +> List.iter (fun tok -> |
0708f913 C |
643 | set_as_comment Token_c.CppPassingExplicit tok |
644 | ); | |
645 | commentize_skip_start_to_end after | |
ae4735db | 646 | with Not_found -> |
0708f913 C |
647 | failwith "could not find end of skip_start special comment" |
648 | ) | |
ae4735db | 649 | | {tok = TCommentSkipTagEnd info} -> |
0708f913 | 650 | failwith "found skip_end comment but no skip_start" |
ae4735db | 651 | | _ -> |
0708f913 C |
652 | commentize_skip_start_to_end xs |
653 | ) | |
ae4735db C |
654 | |
655 | ||
0708f913 C |
656 | |
657 | ||
34e49164 C |
658 | (* ------------------------------------------------------------------------- *) |
659 | (* ifdef keeping/passing *) | |
660 | (* ------------------------------------------------------------------------- *) | |
661 | ||
662 | (* #if 0, #if 1, #if LINUX_VERSION handling *) | |
ae4735db C |
663 | let rec find_ifdef_bool xs = |
664 | xs +> List.iter (function | |
34e49164 | 665 | | NotIfdefLine _ -> () |
ae4735db | 666 | | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) -> |
485bce71 C |
667 | |
668 | msg_ifdef_bool_passing is_ifdef_positif; | |
34e49164 C |
669 | |
670 | (match xxs with | |
671 | | [] -> raise Impossible | |
ae4735db | 672 | | firstclause::xxs -> |
0708f913 | 673 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 674 | |
34e49164 | 675 | if is_ifdef_positif |
ae4735db | 676 | then xxs +> List.iter |
0708f913 | 677 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 | 678 | else begin |
0708f913 | 679 | firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal); |
34e49164 C |
680 | (match List.rev xxs with |
681 | (* keep only last *) | |
ae4735db C |
682 | | last::startxs -> |
683 | startxs +> List.iter | |
0708f913 | 684 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 C |
685 | | [] -> (* not #else *) () |
686 | ); | |
687 | end | |
688 | ); | |
ae4735db | 689 | |
34e49164 C |
690 | | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool |
691 | ) | |
692 | ||
693 | ||
694 | ||
34e49164 C |
695 | let thresholdIfdefSizeMid = 6 |
696 | ||
697 | (* infer ifdef involving not-closed expressions/statements *) | |
ae4735db C |
698 | let rec find_ifdef_mid xs = |
699 | xs +> List.iter (function | |
34e49164 | 700 | | NotIfdefLine _ -> () |
ae4735db C |
701 | | Ifdef (xxs, info_ifdef_stmt) -> |
702 | (match xxs with | |
34e49164 C |
703 | | [] -> raise Impossible |
704 | | [first] -> () | |
ae4735db | 705 | | first::second::rest -> |
34e49164 | 706 | (* don't analyse big ifdef *) |
ae4735db C |
707 | if xxs +> List.for_all |
708 | (fun xs -> List.length xs <= thresholdIfdefSizeMid) && | |
34e49164 | 709 | (* don't want nested ifdef *) |
ae4735db C |
710 | xxs +> List.for_all (fun xs -> |
711 | xs +> List.for_all | |
34e49164 C |
712 | (function NotIfdefLine _ -> true | _ -> false) |
713 | ) | |
ae4735db C |
714 | |
715 | then | |
34e49164 | 716 | let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in |
ae4735db C |
717 | let cnt1, cnt2 = List.hd counts in |
718 | if cnt1 <> 0 || cnt2 <> 0 && | |
b1b2de81 | 719 | counts +> List.for_all (fun x -> x =*= (cnt1, cnt2)) |
34e49164 | 720 | (* |
ae4735db C |
721 | if counts +> List.exists (fun (cnt1, cnt2) -> |
722 | cnt1 <> 0 || cnt2 <> 0 | |
723 | ) | |
34e49164 C |
724 | *) |
725 | then begin | |
485bce71 C |
726 | msg_ifdef_mid_something(); |
727 | ||
34e49164 | 728 | (* keep only first, treat the rest as comment *) |
0708f913 | 729 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 730 | (second::rest) +> List.iter |
0708f913 | 731 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
34e49164 | 732 | end |
ae4735db | 733 | |
34e49164 C |
734 | ); |
735 | List.iter find_ifdef_mid xxs | |
ae4735db | 736 | |
34e49164 | 737 | (* no need complex analysis for ifdefbool *) |
ae4735db | 738 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
34e49164 | 739 | List.iter find_ifdef_mid xxs |
ae4735db C |
740 | |
741 | ||
34e49164 C |
742 | ) |
743 | ||
744 | ||
745 | let thresholdFunheaderLimit = 4 | |
746 | ||
747 | (* ifdef defining alternate function header, type *) | |
748 | let rec find_ifdef_funheaders = function | |
749 | | [] -> () | |
ae4735db | 750 | | NotIfdefLine _::xs -> find_ifdef_funheaders xs |
34e49164 C |
751 | |
752 | (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *) | |
ae4735db | 753 | | Ifdef |
34e49164 C |
754 | ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1; |
755 | (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2 | |
ae4735db | 756 | ], info_ifdef_stmt |
34e49164 C |
757 | ) |
758 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3) | |
ae4735db | 759 | ::xs |
34e49164 C |
760 | when List.length ifdefblock1 <= thresholdFunheaderLimit && |
761 | List.length ifdefblock2 <= thresholdFunheaderLimit | |
ae4735db | 762 | -> |
34e49164 | 763 | find_ifdef_funheaders xs; |
485bce71 C |
764 | |
765 | msg_ifdef_funheaders (); | |
0708f913 | 766 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 767 | let all_toks = [xline2] @ line2 in |
0708f913 C |
768 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ; |
769 | ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError); | |
34e49164 C |
770 | |
771 | (* ifdef with nested ifdef *) | |
ae4735db | 772 | | Ifdef |
34e49164 | 773 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
ae4735db | 774 | [Ifdef |
34e49164 C |
775 | ([[NotIfdefLine (({col = 0} as xline2)::line2)]; |
776 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
777 | ], info_ifdef_stmt2 | |
778 | ) | |
779 | ] | |
ae4735db | 780 | ], info_ifdef_stmt |
34e49164 C |
781 | ) |
782 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
783 | ::xs |
784 | -> | |
34e49164 | 785 | find_ifdef_funheaders xs; |
485bce71 C |
786 | |
787 | msg_ifdef_funheaders (); | |
0708f913 C |
788 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
789 | info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective); | |
34e49164 | 790 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 791 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError); |
34e49164 C |
792 | |
793 | (* ifdef with elseif *) | |
ae4735db | 794 | | Ifdef |
34e49164 C |
795 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
796 | [NotIfdefLine (({col = 0} as xline2)::line2)]; | |
797 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
ae4735db | 798 | ], info_ifdef_stmt |
34e49164 C |
799 | ) |
800 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
801 | ::xs |
802 | -> | |
34e49164 | 803 | find_ifdef_funheaders xs; |
485bce71 C |
804 | |
805 | msg_ifdef_funheaders (); | |
0708f913 | 806 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 807 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 808 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) |
ae4735db | 809 | |
485bce71 | 810 | (* recurse *) |
ae4735db C |
811 | | Ifdef (xxs,info_ifdef_stmt)::xs |
812 | | Ifdefbool (_, xxs,info_ifdef_stmt)::xs -> | |
813 | List.iter find_ifdef_funheaders xxs; | |
34e49164 | 814 | find_ifdef_funheaders xs |
ae4735db | 815 | |
34e49164 C |
816 | |
817 | ||
485bce71 | 818 | (* ?? *) |
ae4735db C |
819 | let rec adjust_inifdef_include xs = |
820 | xs +> List.iter (function | |
34e49164 | 821 | | NotIfdefLine _ -> () |
ae4735db C |
822 | | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
823 | xxs +> List.iter (iter_token_ifdef (fun tokext -> | |
34e49164 | 824 | match tokext.tok with |
ae4735db | 825 | | Parser_c.TInclude (s1, s2, inifdef_ref, ii) -> |
34e49164 C |
826 | inifdef_ref := true; |
827 | | _ -> () | |
828 | )); | |
829 | ) | |
830 | ||
831 | ||
832 | ||
34e49164 | 833 | |
485bce71 | 834 | |
34e49164 C |
835 | |
836 | ||
ae4735db C |
837 | let rec find_ifdef_cparen_else xs = |
838 | let rec aux xs = | |
839 | xs +> List.iter (function | |
708f4980 | 840 | | NotIfdefLine _ -> () |
ae4735db C |
841 | | Ifdef (xxs, info_ifdef_stmt) -> |
842 | (match xxs with | |
708f4980 C |
843 | | [] -> raise Impossible |
844 | | [first] -> () | |
ae4735db | 845 | | first::second::rest -> |
34e49164 | 846 | |
708f4980 | 847 | (* found a closing ')' just after the #else *) |
34e49164 | 848 | |
708f4980 C |
849 | (* Too bad ocaml does not support better list pattern matching |
850 | * a la Prolog-III where can match the end of lists. | |
851 | *) | |
ae4735db C |
852 | let condition = |
853 | if List.length first = 0 then false | |
854 | else | |
708f4980 C |
855 | let last_line = Common.last first in |
856 | match last_line with | |
ae4735db C |
857 | | NotIfdefLine xs -> |
858 | if List.length xs = 0 then false | |
859 | else | |
708f4980 C |
860 | let last_tok = Common.last xs in |
861 | TH.is_cpar last_tok.tok | |
ae4735db | 862 | | Ifdef _ | Ifdefbool _ -> false |
708f4980 C |
863 | in |
864 | if condition then begin | |
865 | msg_ifdef_cparen_else(); | |
34e49164 | 866 | |
708f4980 C |
867 | (* keep only first, treat the rest as comment *) |
868 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); | |
ae4735db | 869 | (second::rest) +> List.iter |
708f4980 C |
870 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
871 | end | |
ae4735db | 872 | |
708f4980 C |
873 | ); |
874 | List.iter aux xxs | |
ae4735db | 875 | |
708f4980 | 876 | (* no need complex analysis for ifdefbool *) |
ae4735db | 877 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
708f4980 C |
878 | List.iter aux xxs |
879 | ) | |
880 | in aux xs | |
34e49164 C |
881 | |
882 | ||
708f4980 C |
883 | (* ------------------------------------------------------------------------- *) |
884 | (* cpp-builtin part2, macro, using standard.h or other defs *) | |
885 | (* ------------------------------------------------------------------------- *) | |
34e49164 | 886 | |
ae4735db | 887 | (* now in cpp_token_c.ml *) |
34e49164 C |
888 | |
889 | (* ------------------------------------------------------------------------- *) | |
890 | (* stringification *) | |
891 | (* ------------------------------------------------------------------------- *) | |
892 | ||
ae4735db | 893 | let rec find_string_macro_paren xs = |
34e49164 C |
894 | match xs with |
895 | | [] -> () | |
ae4735db C |
896 | | Parenthised(xxs, info_parens)::xs -> |
897 | xxs +> List.iter (fun xs -> | |
898 | if xs +> List.exists | |
485bce71 | 899 | (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) && |
ae4735db C |
900 | xs +> List.for_all |
901 | (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) -> | |
34e49164 C |
902 | true | _ -> false) |
903 | then | |
ae4735db | 904 | xs +> List.iter (fun tok -> |
34e49164 | 905 | match tok with |
ae4735db | 906 | | PToken({tok = TIdent (s,_)} as id) -> |
34e49164 | 907 | msg_stringification s; |
485bce71 | 908 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
909 | | _ -> () |
910 | ) | |
ae4735db | 911 | else |
34e49164 C |
912 | find_string_macro_paren xs |
913 | ); | |
914 | find_string_macro_paren xs | |
ae4735db | 915 | | PToken(tok)::xs -> |
34e49164 | 916 | find_string_macro_paren xs |
ae4735db | 917 | |
34e49164 C |
918 | |
919 | (* ------------------------------------------------------------------------- *) | |
920 | (* macro2 *) | |
921 | (* ------------------------------------------------------------------------- *) | |
922 | ||
923 | (* don't forget to recurse in each case *) | |
ae4735db | 924 | let rec find_macro_paren xs = |
34e49164 C |
925 | match xs with |
926 | | [] -> () | |
ae4735db | 927 | |
34e49164 C |
928 | (* attribute *) |
929 | | PToken ({tok = Tattribute _} as id) | |
930 | ::Parenthised (xxs,info_parens) | |
931 | ::xs | |
ae4735db | 932 | -> |
34e49164 | 933 | pr2_cpp ("MACRO: __attribute detected "); |
ae4735db | 934 | [Parenthised (xxs, info_parens)] +> |
0708f913 C |
935 | iter_token_paren (set_as_comment Token_c.CppAttr); |
936 | set_as_comment Token_c.CppAttr id; | |
34e49164 C |
937 | find_macro_paren xs |
938 | ||
978fd7e5 C |
939 | | PToken ({tok = TattributeNoarg _} as id) |
940 | ::xs | |
ae4735db | 941 | -> |
978fd7e5 C |
942 | pr2_cpp ("MACRO: __attributenoarg detected "); |
943 | set_as_comment Token_c.CppAttr id; | |
944 | find_macro_paren xs | |
945 | ||
485bce71 | 946 | (* |
708f4980 | 947 | (* attribute cpp, __xxx id *) |
485bce71 | 948 | | PToken ({tok = TIdent (s,i1)} as id) |
708f4980 | 949 | ::PToken ({tok = TIdent (s2, i2)} as id2) |
485bce71 | 950 | ::xs when s ==~ regexp_annot |
ae4735db | 951 | -> |
485bce71 C |
952 | msg_attribute s; |
953 | id.tok <- TMacroAttr (s, i1); | |
708f4980 | 954 | find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *) |
485bce71 | 955 | |
708f4980 C |
956 | (* attribute cpp, id __xxx *) |
957 | | PToken ({tok = TIdent (s,i1)} as _id) | |
958 | ::PToken ({tok = TIdent (s2, i2)} as id2) | |
959 | ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef)) | |
ae4735db | 960 | -> |
485bce71 | 961 | msg_attribute s2; |
708f4980 C |
962 | id2.tok <- TMacroAttr (s2, i2); |
963 | find_macro_paren xs | |
964 | ||
965 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
966 | ::PToken ({tok = TIdent (s,i1)} as attr) | |
967 | ::xs when s ==~ regexp_annot | |
ae4735db | 968 | -> |
708f4980 C |
969 | pr2_cpp ("storage attribute: " ^ s); |
970 | attr.tok <- TMacroAttrStorage (s,i1); | |
971 | (* recurse, may have other storage attributes *) | |
972 | find_macro_paren (PToken (tok1)::xs) | |
ae4735db | 973 | |
708f4980 | 974 | |
485bce71 C |
975 | *) |
976 | ||
977 | (* storage attribute *) | |
978 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
ae4735db C |
979 | ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs |
980 | -> | |
485bce71 C |
981 | pr2_cpp ("storage attribute: " ^ s); |
982 | attr.tok <- TMacroAttrStorage (s,i1); | |
983 | (* recurse, may have other storage attributes *) | |
984 | find_macro_paren (PToken (tok1)::xs) | |
708f4980 | 985 | |
485bce71 | 986 | |
34e49164 | 987 | (* stringification |
ae4735db | 988 | * |
34e49164 | 989 | * the order of the matching clause is important |
ae4735db | 990 | * |
34e49164 C |
991 | *) |
992 | ||
993 | (* string macro with params, before case *) | |
485bce71 | 994 | | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
34e49164 | 995 | ::Parenthised (xxs, info_parens) |
ae4735db | 996 | ::xs -> |
485bce71 C |
997 | |
998 | msg_stringification_params s; | |
999 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 1000 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1001 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1002 | find_macro_paren xs |
1003 | ||
1004 | (* after case *) | |
1005 | | PToken ({tok = TIdent (s,_)} as id) | |
1006 | ::Parenthised (xxs, info_parens) | |
485bce71 | 1007 | ::PToken ({tok = (TString _ | TMacroString _)}) |
ae4735db | 1008 | ::xs -> |
485bce71 C |
1009 | |
1010 | msg_stringification_params s; | |
1011 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 1012 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1013 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1014 | find_macro_paren xs |
1015 | ||
1016 | ||
1017 | (* for the case where the string is not inside a funcall, but | |
1018 | * for instance in an initializer. | |
1019 | *) | |
ae4735db | 1020 | |
34e49164 | 1021 | (* string macro variable, before case *) |
485bce71 | 1022 | | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
ae4735db | 1023 | ::xs -> |
485bce71 | 1024 | |
34e49164 | 1025 | msg_stringification s; |
485bce71 | 1026 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1027 | find_macro_paren xs |
1028 | ||
1029 | (* after case *) | |
485bce71 C |
1030 | | PToken ({tok = TIdent (s,_)} as id) |
1031 | ::PToken ({tok = (TString _ | TMacroString _)}) | |
ae4735db | 1032 | ::xs -> |
485bce71 | 1033 | |
34e49164 | 1034 | msg_stringification s; |
485bce71 | 1035 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1036 | find_macro_paren xs |
1037 | ||
1038 | ||
ae4735db | 1039 | |
34e49164 C |
1040 | |
1041 | ||
1042 | (* recurse *) | |
ae4735db C |
1043 | | (PToken x)::xs -> find_macro_paren xs |
1044 | | (Parenthised (xxs, info_parens))::xs -> | |
34e49164 C |
1045 | xxs +> List.iter find_macro_paren; |
1046 | find_macro_paren xs | |
1047 | ||
1048 | ||
1049 | ||
1050 | ||
1051 | ||
1052 | (* don't forget to recurse in each case *) | |
ae4735db | 1053 | let rec find_macro_lineparen xs = |
34e49164 C |
1054 | match xs with |
1055 | | [] -> () | |
1056 | ||
1057 | (* linuxext: ex: static [const] DEVICE_ATTR(); *) | |
ae4735db | 1058 | | (Line |
34e49164 C |
1059 | ( |
1060 | [PToken ({tok = Tstatic _}); | |
1061 | PToken ({tok = TIdent (s,_)} as macro); | |
1062 | Parenthised (xxs,info_parens); | |
1063 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1064 | ] |
34e49164 | 1065 | )) |
ae4735db C |
1066 | ::xs |
1067 | when (s ==~ regexp_macro) -> | |
485bce71 | 1068 | |
34e49164 C |
1069 | msg_declare_macro s; |
1070 | let info = TH.info_of_tok macro.tok in | |
1071 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1072 | ||
1073 | find_macro_lineparen (xs) | |
1074 | ||
1075 | (* the static const case *) | |
ae4735db | 1076 | | (Line |
34e49164 C |
1077 | ( |
1078 | [PToken ({tok = Tstatic _}); | |
1079 | PToken ({tok = Tconst _} as const); | |
1080 | PToken ({tok = TIdent (s,_)} as macro); | |
1081 | Parenthised (xxs,info_parens); | |
1082 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1083 | ] |
34e49164 C |
1084 | (*as line1*) |
1085 | ||
1086 | )) | |
ae4735db C |
1087 | ::xs |
1088 | when (s ==~ regexp_macro) -> | |
485bce71 | 1089 | |
34e49164 C |
1090 | msg_declare_macro s; |
1091 | let info = TH.info_of_tok macro.tok in | |
1092 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
ae4735db C |
1093 | |
1094 | (* need retag this const, otherwise ambiguity in grammar | |
34e49164 C |
1095 | 21: shift/reduce conflict (shift 121, reduce 137) on Tconst |
1096 | decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ... | |
1097 | decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ... | |
1098 | storage_class_spec : Tstatic . (137) | |
1099 | *) | |
1100 | const.tok <- TMacroDeclConst (TH.info_of_tok const.tok); | |
1101 | ||
1102 | find_macro_lineparen (xs) | |
1103 | ||
1104 | ||
1105 | (* same but without trailing ';' | |
ae4735db | 1106 | * |
34e49164 C |
1107 | * I do not put the final ';' because it can be on a multiline and |
1108 | * because of the way mk_line is coded, we will not have access to | |
1109 | * this ';' on the next line, even if next to the ')' *) | |
ae4735db | 1110 | | (Line |
34e49164 C |
1111 | ([PToken ({tok = Tstatic _}); |
1112 | PToken ({tok = TIdent (s,_)} as macro); | |
1113 | Parenthised (xxs,info_parens); | |
ae4735db | 1114 | ] |
34e49164 | 1115 | )) |
ae4735db C |
1116 | ::xs |
1117 | when s ==~ regexp_macro -> | |
34e49164 C |
1118 | |
1119 | msg_declare_macro s; | |
1120 | let info = TH.info_of_tok macro.tok in | |
1121 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1122 | ||
1123 | find_macro_lineparen (xs) | |
1124 | ||
1125 | ||
1126 | ||
1127 | ||
1128 | (* on multiple lines *) | |
ae4735db | 1129 | | (Line |
34e49164 C |
1130 | ( |
1131 | (PToken ({tok = Tstatic _})::[] | |
1132 | ))) | |
ae4735db | 1133 | ::(Line |
34e49164 C |
1134 | ( |
1135 | [PToken ({tok = TIdent (s,_)} as macro); | |
1136 | Parenthised (xxs,info_parens); | |
1137 | PToken ({tok = TPtVirg _}); | |
1138 | ] | |
ae4735db | 1139 | ) |
34e49164 | 1140 | ) |
ae4735db C |
1141 | ::xs |
1142 | when (s ==~ regexp_macro) -> | |
485bce71 | 1143 | |
34e49164 C |
1144 | msg_declare_macro s; |
1145 | let info = TH.info_of_tok macro.tok in | |
1146 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1147 | ||
1148 | find_macro_lineparen (xs) | |
1149 | ||
1150 | ||
ae4735db C |
1151 | (* linuxext: ex: DECLARE_BITMAP(); |
1152 | * | |
34e49164 C |
1153 | * Here I use regexp_declare and not regexp_macro because |
1154 | * Sometimes it can be a FunCallMacro such as DEBUG(foo()); | |
1155 | * Here we don't have the preceding 'static' so only way to | |
1156 | * not have positive is to restrict to .*DECLARE.* macros. | |
1157 | * | |
1158 | * but there is a grammar rule for that, so don't need this case anymore | |
0708f913 | 1159 | * unless the parameter of the DECLARE_xxx are weird and can not be mapped |
34e49164 C |
1160 | * on a argument_list |
1161 | *) | |
ae4735db C |
1162 | |
1163 | | (Line | |
34e49164 C |
1164 | ([PToken ({tok = TIdent (s,_)} as macro); |
1165 | Parenthised (xxs,info_parens); | |
1166 | PToken ({tok = TPtVirg _}); | |
1167 | ] | |
1168 | )) | |
ae4735db C |
1169 | ::xs |
1170 | when (s ==~ regexp_declare) -> | |
34e49164 C |
1171 | |
1172 | msg_declare_macro s; | |
1173 | let info = TH.info_of_tok macro.tok in | |
1174 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1175 | ||
1176 | find_macro_lineparen (xs) | |
1177 | ||
ae4735db | 1178 | |
34e49164 C |
1179 | (* toplevel macros. |
1180 | * module_init(xxx) | |
ae4735db | 1181 | * |
34e49164 C |
1182 | * Could also transform the TIdent in a TMacroTop but can have false |
1183 | * positive, so easier to just change the TCPar and so just solve | |
1184 | * the end-of-stream pb of ocamlyacc | |
1185 | *) | |
ae4735db | 1186 | | (Line |
34e49164 C |
1187 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro); |
1188 | Parenthised (xxs,info_parens); | |
1189 | ] as _line1 | |
1190 | )) | |
b1b2de81 | 1191 | ::xs when col1 =|= 0 |
ae4735db C |
1192 | -> |
1193 | let condition = | |
34e49164 C |
1194 | (* to reduce number of false positive *) |
1195 | (match xs with | |
ae4735db | 1196 | | (Line (PToken ({col = col2 } as other)::restline2))::_ -> |
b1b2de81 | 1197 | TH.is_eof other.tok || (col2 =|= 0 && |
34e49164 C |
1198 | (match other.tok with |
1199 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1200 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1201 | | TPtVirg _ |
34e49164 C |
1202 | | TDotDot _ |
1203 | -> false | |
1204 | | tok when TH.is_binary_operator tok -> false | |
ae4735db | 1205 | |
34e49164 C |
1206 | | _ -> true |
1207 | ) | |
1208 | ) | |
1209 | | _ -> false | |
1210 | ) | |
1211 | in | |
1212 | if condition | |
1213 | then begin | |
485bce71 | 1214 | |
34e49164 C |
1215 | msg_macro_toplevel_noptvirg s; |
1216 | (* just to avoid the end-of-stream pb of ocamlyacc *) | |
1217 | let tcpar = Common.last info_parens in | |
1218 | tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok); | |
ae4735db | 1219 | |
34e49164 | 1220 | (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*) |
ae4735db | 1221 | |
34e49164 C |
1222 | end; |
1223 | ||
1224 | find_macro_lineparen (xs) | |
1225 | ||
1226 | ||
1227 | ||
ae4735db | 1228 | (* macro with parameters |
34e49164 C |
1229 | * ex: DEBUG() |
1230 | * return x; | |
1231 | *) | |
ae4735db | 1232 | | (Line |
34e49164 C |
1233 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1234 | Parenthised (xxs,info_parens); | |
1235 | ] as _line1 | |
1236 | )) | |
ae4735db | 1237 | ::(Line |
34e49164 C |
1238 | (PToken ({col = col2 } as other)::restline2 |
1239 | ) as line2) | |
ae4735db | 1240 | ::xs |
34e49164 | 1241 | (* when s ==~ regexp_macro *) |
ae4735db C |
1242 | -> |
1243 | let condition = | |
1244 | (col1 =|= col2 && | |
34e49164 C |
1245 | (match other.tok with |
1246 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1247 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1248 | | TPtVirg _ |
34e49164 C |
1249 | | TDotDot _ |
1250 | -> false | |
1251 | | tok when TH.is_binary_operator tok -> false | |
1252 | ||
1253 | | _ -> true | |
1254 | ) | |
ae4735db C |
1255 | ) |
1256 | || | |
34e49164 | 1257 | (col2 <= col1 && |
113803cf | 1258 | (match other.tok, restline2 with |
b1b2de81 | 1259 | | TCBrace _, _ when ctx =*= InFunction -> true |
113803cf C |
1260 | | Treturn _, _ -> true |
1261 | | Tif _, _ -> true | |
1262 | | Telse _, _ -> true | |
1263 | ||
1264 | (* case of label, usually put in first line *) | |
ae4735db | 1265 | | TIdent _, (PToken ({tok = TDotDot _}))::_ -> |
113803cf C |
1266 | true |
1267 | ||
34e49164 C |
1268 | |
1269 | | _ -> false | |
1270 | ) | |
1271 | ) | |
1272 | ||
1273 | in | |
ae4735db | 1274 | |
34e49164 | 1275 | if condition |
ae4735db | 1276 | then |
b1b2de81 | 1277 | if col1 =|= 0 then () |
34e49164 C |
1278 | else begin |
1279 | msg_macro_noptvirg s; | |
485bce71 | 1280 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
ae4735db | 1281 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1282 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1283 | end; |
1284 | ||
1285 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1286 | |
1287 | (* linuxext:? single macro | |
34e49164 C |
1288 | * ex: LOCK |
1289 | * foo(); | |
1290 | * UNLOCK | |
ae4735db | 1291 | * |
113803cf | 1292 | * todo: factorize code with previous rule ? |
34e49164 | 1293 | *) |
ae4735db | 1294 | | (Line |
34e49164 C |
1295 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1296 | ] as _line1 | |
1297 | )) | |
ae4735db | 1298 | ::(Line |
34e49164 C |
1299 | (PToken ({col = col2 } as other)::restline2 |
1300 | ) as line2) | |
ae4735db | 1301 | ::xs -> |
34e49164 | 1302 | (* when s ==~ regexp_macro *) |
ae4735db C |
1303 | |
1304 | let condition = | |
1305 | (col1 =|= col2 && | |
34e49164 C |
1306 | col1 <> 0 && (* otherwise can match typedef of fundecl*) |
1307 | (match other.tok with | |
ae4735db C |
1308 | | TPtVirg _ -> false |
1309 | | TOr _ -> false | |
34e49164 C |
1310 | | TCBrace _ when ctx <> InFunction -> false |
1311 | | tok when TH.is_binary_operator tok -> false | |
1312 | ||
1313 | | _ -> true | |
1314 | )) || | |
1315 | (col2 <= col1 && | |
1316 | (match other.tok with | |
b1b2de81 | 1317 | | TCBrace _ when ctx =*= InFunction -> true |
34e49164 C |
1318 | | Treturn _ -> true |
1319 | | Tif _ -> true | |
1320 | | Telse _ -> true | |
1321 | | _ -> false | |
1322 | )) | |
1323 | in | |
ae4735db | 1324 | |
34e49164 C |
1325 | if condition |
1326 | then begin | |
1327 | msg_macro_noptvirg_single s; | |
485bce71 | 1328 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
34e49164 C |
1329 | end; |
1330 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1331 | |
1332 | | x::xs -> | |
34e49164 C |
1333 | find_macro_lineparen xs |
1334 | ||
1335 | ||
485bce71 C |
1336 | |
1337 | (* ------------------------------------------------------------------------- *) | |
1338 | (* define tobrace init *) | |
1339 | (* ------------------------------------------------------------------------- *) | |
1340 | ||
ae4735db C |
1341 | let rec find_define_init_brace_paren xs = |
1342 | let rec aux xs = | |
485bce71 C |
1343 | match xs with |
1344 | | [] -> () | |
1345 | ||
1346 | (* mainly for firefox *) | |
1347 | | (PToken {tok = TDefine _}) | |
1348 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1349 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1350 | ::(PToken tok2) | |
1351 | ::(PToken tok3) | |
ae4735db | 1352 | ::xs -> |
485bce71 C |
1353 | let is_init = |
1354 | match tok2.tok, tok3.tok with | |
1355 | | TInt _, TComma _ -> true | |
1356 | | TString _, TComma _ -> true | |
1357 | | TIdent _, TComma _ -> true | |
1358 | | _ -> false | |
ae4735db | 1359 | |
485bce71 C |
1360 | in |
1361 | if is_init | |
ae4735db | 1362 | then begin |
485bce71 C |
1363 | pr2_cpp("found define initializer: " ^s); |
1364 | tokbrace.tok <- TOBraceDefineInit i1; | |
1365 | end; | |
1366 | ||
1367 | aux xs | |
1368 | ||
1369 | (* mainly for linux, especially in sound/ *) | |
1370 | | (PToken {tok = TDefine _}) | |
1371 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1372 | ::(Parenthised(xxx, info_parens)) | |
1373 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1374 | ::(PToken tok2) | |
1375 | ::(PToken tok3) | |
ae4735db | 1376 | ::xs -> |
485bce71 C |
1377 | let is_init = |
1378 | match tok2.tok, tok3.tok with | |
1379 | | TInt _, TComma _ -> true | |
1380 | | TDot _, TIdent _ -> true | |
1381 | | TIdent _, TComma _ -> true | |
1382 | | _ -> false | |
ae4735db | 1383 | |
485bce71 C |
1384 | in |
1385 | if is_init | |
ae4735db | 1386 | then begin |
485bce71 C |
1387 | pr2_cpp("found define initializer with param: " ^ s); |
1388 | tokbrace.tok <- TOBraceDefineInit i1; | |
1389 | end; | |
1390 | ||
1391 | aux xs | |
1392 | ||
ae4735db | 1393 | |
485bce71 C |
1394 | |
1395 | (* recurse *) | |
ae4735db C |
1396 | | (PToken x)::xs -> aux xs |
1397 | | (Parenthised (xxs, info_parens))::xs -> | |
485bce71 | 1398 | (* not need for tobrace init: |
ae4735db | 1399 | * xxs +> List.iter aux; |
485bce71 C |
1400 | *) |
1401 | aux xs | |
1402 | in | |
1403 | aux xs | |
1404 | ||
1405 | ||
34e49164 C |
1406 | (* ------------------------------------------------------------------------- *) |
1407 | (* action *) | |
1408 | (* ------------------------------------------------------------------------- *) | |
1409 | ||
708f4980 | 1410 | (* obsolete now with macro expansion ? get some regression if comment. |
ae4735db | 1411 | * todo: if do bad decision here, then it can influence other phases |
708f4980 | 1412 | * and make it hard to parse. So maybe when have a parse error, should |
ae4735db | 1413 | * undo some of the guess those heuristics have done, and restore |
708f4980 C |
1414 | * the original token value. |
1415 | *) | |
1416 | ||
34e49164 C |
1417 | let rec find_actions = function |
1418 | | [] -> () | |
1419 | ||
1420 | | PToken ({tok = TIdent (s,ii)}) | |
1421 | ::Parenthised (xxs,info_parens) | |
ae4735db | 1422 | ::xs -> |
34e49164 C |
1423 | find_actions xs; |
1424 | xxs +> List.iter find_actions; | |
1425 | let modified = find_actions_params xxs in | |
ae4735db | 1426 | if modified |
34e49164 | 1427 | then msg_macro_higher_order s |
ae4735db C |
1428 | |
1429 | | x::xs -> | |
34e49164 C |
1430 | find_actions xs |
1431 | ||
ae4735db C |
1432 | and find_actions_params xxs = |
1433 | xxs +> List.fold_left (fun acc xs -> | |
34e49164 | 1434 | let toks = tokens_of_paren xs in |
ae4735db C |
1435 | if toks +> List.exists (fun x -> TH.is_statement x.tok) |
1436 | (* undo: && List.length toks > 1 | |
708f4980 C |
1437 | * good for sparse, not good for linux |
1438 | *) | |
34e49164 | 1439 | then begin |
ae4735db | 1440 | xs +> iter_token_paren (fun x -> |
34e49164 | 1441 | if TH.is_eof x.tok |
ae4735db | 1442 | then |
34e49164 | 1443 | (* certainly because paren detection had a pb because of |
708f4980 C |
1444 | * some ifdef-exp. Do similar additional checking than |
1445 | * what is done in set_as_comment. | |
34e49164 | 1446 | *) |
708f4980 | 1447 | pr2 "PB: weird, I try to tag an EOF token as an action" |
ae4735db | 1448 | else |
708f4980 | 1449 | (* cf tests-bis/no_cpar_macro.c *) |
ae4735db C |
1450 | if TH.is_eom x.tok |
1451 | then | |
708f4980 | 1452 | pr2 "PB: weird, I try to tag an EOM token as an action" |
ae4735db | 1453 | else |
708f4980 | 1454 | x.tok <- TAction (TH.info_of_tok x.tok); |
34e49164 C |
1455 | ); |
1456 | true (* modified *) | |
1457 | end | |
1458 | else acc | |
1459 | ) false | |
1460 | ||
1461 | ||
1462 | ||
1463 | (* ------------------------------------------------------------------------- *) | |
1464 | (* main fix cpp function *) | |
1465 | (* ------------------------------------------------------------------------- *) | |
1466 | ||
ae4735db | 1467 | let filter_cpp_stuff xs = |
951c7801 C |
1468 | List.filter |
1469 | (function x -> | |
1470 | (match x.tok with | |
1471 | | tok when TH.is_comment tok -> false | |
34e49164 C |
1472 | (* don't want drop the define, or if drop, have to drop |
1473 | * also its body otherwise the line heuristics may be lost | |
1474 | * by not finding the TDefine in column 0 but by finding | |
1475 | * a TDefineIdent in a column > 0 | |
1476 | *) | |
951c7801 C |
1477 | | Parser_c.TDefine _ -> true |
1478 | | tok when TH.is_cpp_instruction tok -> false | |
1479 | | _ -> true | |
1480 | )) | |
1481 | xs | |
34e49164 C |
1482 | |
1483 | let insert_virtual_positions l = | |
1484 | let strlen x = String.length (Ast_c.str_of_info x) in | |
708f4980 C |
1485 | let rec loop prev offset acc = function |
1486 | [] -> List.rev acc | |
34e49164 C |
1487 | | x::xs -> |
1488 | let ii = TH.info_of_tok x in | |
1489 | let inject pi = | |
1490 | TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in | |
1491 | match Ast_c.pinfo_of_info ii with | |
1492 | Ast_c.OriginTok pi -> | |
1493 | let prev = Ast_c.parse_info_of_info ii in | |
ae4735db | 1494 | loop prev (strlen ii) (x::acc) xs |
34e49164 | 1495 | | Ast_c.ExpandedTok (pi,_) -> |
708f4980 | 1496 | let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in |
ae4735db | 1497 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 | 1498 | | Ast_c.FakeTok (s,_) -> |
708f4980 | 1499 | let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in |
ae4735db | 1500 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 C |
1501 | | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in |
1502 | let rec skip_fake = function | |
708f4980 | 1503 | | [] -> [] |
34e49164 C |
1504 | | x::xs -> |
1505 | let ii = TH.info_of_tok x in | |
1506 | match Ast_c.pinfo_of_info ii with | |
708f4980 | 1507 | | Ast_c.OriginTok pi -> |
34e49164 | 1508 | let prev = Ast_c.parse_info_of_info ii in |
708f4980 C |
1509 | let res = loop prev (strlen ii) [] xs in |
1510 | x::res | |
34e49164 | 1511 | | _ -> x::skip_fake xs in |
ae4735db | 1512 | skip_fake l |
708f4980 | 1513 | |
34e49164 | 1514 | |
485bce71 | 1515 | (* ------------------------------------------------------------------------- *) |
ae4735db | 1516 | let fix_tokens_cpp2 ~macro_defs tokens = |
708f4980 | 1517 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in |
ae4735db C |
1518 | |
1519 | begin | |
34e49164 C |
1520 | (* the order is important, if you put the action heuristic first, |
1521 | * then because of ifdef, can have not closed paren | |
ae4735db C |
1522 | * and so may believe that higher order macro |
1523 | * and it will eat too much tokens. So important to do | |
34e49164 | 1524 | * first the ifdef. |
ae4735db | 1525 | * |
34e49164 C |
1526 | * I recompute multiple times cleaner cos the mutable |
1527 | * can have be changed and so may have more comments | |
1528 | * in the token original list. | |
ae4735db | 1529 | * |
34e49164 C |
1530 | *) |
1531 | ||
0708f913 C |
1532 | commentize_skip_start_to_end !tokens2; |
1533 | ||
34e49164 | 1534 | (* ifdef *) |
ae4735db C |
1535 | let cleaner = !tokens2 +> List.filter (fun x -> |
1536 | (* is_comment will also filter the TCommentCpp created in | |
0708f913 | 1537 | * commentize_skip_start_to_end *) |
34e49164 C |
1538 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1539 | ) in | |
708f4980 | 1540 | let ifdef_grouped = TV.mk_ifdef cleaner in |
485bce71 C |
1541 | set_ifdef_parenthize_info ifdef_grouped; |
1542 | ||
34e49164 C |
1543 | find_ifdef_funheaders ifdef_grouped; |
1544 | find_ifdef_bool ifdef_grouped; | |
1545 | find_ifdef_mid ifdef_grouped; | |
ae4735db | 1546 | (* change order ? maybe cparen_else heuristic make some of the funheaders |
708f4980 C |
1547 | * heuristics irrelevant ? |
1548 | *) | |
ae4735db | 1549 | find_ifdef_cparen_else ifdef_grouped; |
34e49164 C |
1550 | adjust_inifdef_include ifdef_grouped; |
1551 | ||
1552 | ||
1553 | (* macro 1 *) | |
1554 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1555 | ||
708f4980 C |
1556 | let paren_grouped = TV.mk_parenthised cleaner in |
1557 | Cpp_token_c.apply_macro_defs | |
ae4735db C |
1558 | ~msg_apply_known_macro |
1559 | ~msg_apply_known_macro_hint | |
708f4980 | 1560 | macro_defs paren_grouped; |
34e49164 | 1561 | (* because the before field is used by apply_macro_defs *) |
ae4735db | 1562 | tokens2 := TV.rebuild_tokens_extented !tokens2; |
34e49164 C |
1563 | |
1564 | (* tagging contextual info (InFunc, InStruct, etc). Better to do | |
1565 | * that after the "ifdef-simplification" phase. | |
1566 | *) | |
ae4735db | 1567 | let cleaner = !tokens2 +> List.filter (fun x -> |
34e49164 C |
1568 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1569 | ) in | |
1570 | ||
708f4980 | 1571 | let brace_grouped = TV.mk_braceised cleaner in |
34e49164 C |
1572 | set_context_tag brace_grouped; |
1573 | ||
34e49164 C |
1574 | (* macro *) |
1575 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1576 | ||
708f4980 C |
1577 | let paren_grouped = TV.mk_parenthised cleaner in |
1578 | let line_paren_grouped = TV.mk_line_parenthised paren_grouped in | |
485bce71 | 1579 | find_define_init_brace_paren paren_grouped; |
34e49164 C |
1580 | find_string_macro_paren paren_grouped; |
1581 | find_macro_lineparen line_paren_grouped; | |
1582 | find_macro_paren paren_grouped; | |
1583 | ||
1584 | ||
708f4980 | 1585 | (* obsolete: actions ? not yet *) |
34e49164 | 1586 | let cleaner = !tokens2 +> filter_cpp_stuff in |
708f4980 | 1587 | let paren_grouped = TV.mk_parenthised cleaner in |
34e49164 | 1588 | find_actions paren_grouped; |
ae4735db | 1589 | |
34e49164 C |
1590 | |
1591 | ||
708f4980 | 1592 | insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok)) |
34e49164 C |
1593 | end |
1594 | ||
ae4735db | 1595 | let time_hack1 ~macro_defs a = |
708f4980 | 1596 | Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a) |
34e49164 | 1597 | |
ae4735db | 1598 | let fix_tokens_cpp ~macro_defs a = |
708f4980 | 1599 | Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a) |
34e49164 | 1600 | |
34e49164 | 1601 | |
34e49164 | 1602 | |
34e49164 C |
1603 | |
1604 | (*****************************************************************************) | |
1605 | (* Lexing with lookahead *) | |
1606 | (*****************************************************************************) | |
1607 | ||
1608 | (* Why using yet another parsing_hack technique ? The fix_xxx where do | |
ae4735db | 1609 | * some pre-processing on the full list of tokens is not enough ? |
34e49164 C |
1610 | * No cos sometimes we need more contextual info, and even if |
1611 | * set_context() tries to give some contextual info, it's not completely | |
1612 | * accurate so the following code give yet another alternative, yet another | |
1613 | * chance to transform some tokens. | |
ae4735db | 1614 | * |
34e49164 C |
1615 | * todo?: maybe could try to get rid of this technique. Maybe a better |
1616 | * set_context() would make possible to move this code using a fix_xx | |
1617 | * technique. | |
ae4735db | 1618 | * |
485bce71 | 1619 | * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but |
34e49164 C |
1620 | * it is more general to do it via my LALR(k) tech. Because here we can |
1621 | * transform some token give some context information. So sometimes it | |
1622 | * makes sense to transform a token in one context, sometimes not, and | |
1623 | * lex can not provide us this context information. Note that the order | |
ae4735db C |
1624 | * in the pattern matching in lookahead is important. Do not cut/paste. |
1625 | * | |
34e49164 C |
1626 | * Note that in next there is only "clean" tokens, there is no comment |
1627 | * or space tokens. This is done by the caller. | |
ae4735db | 1628 | * |
34e49164 C |
1629 | *) |
1630 | ||
485bce71 C |
1631 | open Lexer_parser (* for the fields of lexer_hint type *) |
1632 | ||
1633 | let not_struct_enum = function | |
1634 | | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false | |
1635 | | _ -> true | |
34e49164 | 1636 | |
485bce71 | 1637 | |
ae4735db | 1638 | let lookahead2 ~pass next before = |
34e49164 C |
1639 | |
1640 | match (next, before) with | |
1641 | ||
1642 | (*-------------------------------------------------------------*) | |
1643 | (* typedef inference, parse_typedef_fix3 *) | |
1644 | (*-------------------------------------------------------------*) | |
1645 | (* xx xx *) | |
b1b2de81 | 1646 | | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2 |
34e49164 C |
1647 | && ok_typedef s |
1648 | (* (take_safe 1 !passed_tok <> [TOPar]) -> *) | |
ae4735db | 1649 | -> |
34e49164 C |
1650 | (* parse_typedef_fix3: |
1651 | * acpi_object acpi_object; | |
ae4735db | 1652 | * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec. |
34e49164 C |
1653 | * Le parser en interne a deja appelé le prochain token pour pouvoir |
1654 | * decider des choses. | |
1655 | * => special case in lexer_heuristic, again | |
1656 | *) | |
ae4735db C |
1657 | if !Flag_parsing_c.debug_typedef |
1658 | then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s); | |
34e49164 C |
1659 | |
1660 | LP.disable_typedef(); | |
1661 | ||
1662 | msg_typedef s; LP.add_typedef_root s; | |
1663 | TypedefIdent (s, i1) | |
1664 | ||
1665 | (* xx yy *) | |
ae4735db | 1666 | | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before |
34e49164 C |
1667 | && ok_typedef s |
1668 | -> | |
1669 | (* && not_annot s2 BUT lead to false positive*) | |
1670 | ||
1671 | msg_typedef s; LP.add_typedef_root s; | |
1672 | TypedefIdent (s, i1) | |
1673 | ||
1674 | ||
1675 | (* xx inline *) | |
ae4735db | 1676 | | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before |
34e49164 | 1677 | && ok_typedef s |
ae4735db | 1678 | -> |
34e49164 C |
1679 | msg_typedef s; LP.add_typedef_root s; |
1680 | TypedefIdent (s, i1) | |
1681 | ||
1682 | ||
1683 | (* [,(] xx [,)] AND param decl *) | |
1684 | | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ ) | |
b1b2de81 | 1685 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
34e49164 | 1686 | && ok_typedef s |
ae4735db | 1687 | -> |
34e49164 C |
1688 | msg_typedef s; LP.add_typedef_root s; |
1689 | TypedefIdent (s, i1) | |
1690 | ||
1691 | (* xx* [,)] *) | |
1692 | (* specialcase: [,(] xx* [,)] *) | |
1693 | | (TIdent (s, i1)::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) | |
1694 | when not_struct_enum before | |
1695 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1696 | && ok_typedef s | |
ae4735db | 1697 | -> |
34e49164 C |
1698 | msg_typedef s; LP.add_typedef_root s; |
1699 | TypedefIdent (s, i1) | |
1700 | ||
1701 | ||
1702 | (* xx** [,)] *) | |
1703 | (* specialcase: [,(] xx** [,)] *) | |
1704 | | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) | |
1705 | when not_struct_enum before | |
1706 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1707 | && ok_typedef s | |
ae4735db | 1708 | -> |
34e49164 C |
1709 | msg_typedef s; LP.add_typedef_root s; |
1710 | TypedefIdent (s, i1) | |
1711 | ||
1712 | ||
1713 | ||
1714 | (* xx const * USELESS because of next rule ? *) | |
ae4735db C |
1715 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ ) |
1716 | when not_struct_enum before | |
34e49164 C |
1717 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1718 | && ok_typedef s | |
1719 | -> | |
1720 | ||
1721 | msg_typedef s; LP.add_typedef_root s; | |
1722 | TypedefIdent (s, i1) | |
ae4735db | 1723 | |
34e49164 | 1724 | (* xx const *) |
ae4735db C |
1725 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ ) |
1726 | when not_struct_enum before | |
34e49164 C |
1727 | && ok_typedef s |
1728 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1729 | -> | |
1730 | ||
1731 | msg_typedef s; LP.add_typedef_root s; | |
1732 | TypedefIdent (s, i1) | |
1733 | ||
1734 | ||
1735 | (* xx * const *) | |
ae4735db C |
1736 | | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ ) |
1737 | when not_struct_enum before | |
34e49164 C |
1738 | && ok_typedef s |
1739 | -> | |
1740 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1741 | ||
1742 | msg_typedef s; LP.add_typedef_root s; | |
1743 | TypedefIdent (s, i1) | |
1744 | ||
1745 | ||
1746 | (* ( const xx) *) | |
485bce71 | 1747 | | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when |
34e49164 C |
1748 | ok_typedef s -> |
1749 | msg_typedef s; LP.add_typedef_root s; | |
1750 | TypedefIdent (s, i1) | |
ae4735db | 1751 | |
34e49164 C |
1752 | |
1753 | ||
1754 | (* ( xx ) [sizeof, ~] *) | |
485bce71 | 1755 | | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ ) |
34e49164 C |
1756 | when not_struct_enum before |
1757 | && ok_typedef s | |
ae4735db | 1758 | -> |
34e49164 C |
1759 | msg_typedef s; LP.add_typedef_root s; |
1760 | TypedefIdent (s, i1) | |
1761 | ||
1762 | (* [(,] xx [ AND parameterdeclaration *) | |
1763 | | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_) | |
b1b2de81 | 1764 | when (LP.current_context() =*= LP.InParameter) |
34e49164 | 1765 | && ok_typedef s |
ae4735db | 1766 | -> |
34e49164 C |
1767 | msg_typedef s; LP.add_typedef_root s; |
1768 | TypedefIdent (s, i1) | |
ae4735db | 1769 | |
34e49164 C |
1770 | (*------------------------------------------------------------*) |
1771 | (* if 'x*y' maybe an expr, maybe just a classic multiplication *) | |
1772 | (* but if have a '=', or ',' I think not *) | |
1773 | (*------------------------------------------------------------*) | |
1774 | ||
1775 | (* static xx * yy *) | |
ae4735db | 1776 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , |
485bce71 | 1777 | (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when |
ae4735db | 1778 | ok_typedef s |
34e49164 C |
1779 | -> |
1780 | msg_typedef s; LP.add_typedef_root s; | |
1781 | TypedefIdent (s, i1) | |
ae4735db | 1782 | |
34e49164 C |
1783 | (* TODO xx * yy ; AND in start of compound element *) |
1784 | ||
1785 | ||
1786 | (* xx * yy, AND in paramdecl *) | |
1787 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _) | |
b1b2de81 | 1788 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
ae4735db C |
1789 | && ok_typedef s |
1790 | -> | |
34e49164 C |
1791 | |
1792 | msg_typedef s; LP.add_typedef_root s; | |
1793 | TypedefIdent (s, i1) | |
1794 | ||
1795 | ||
1796 | (* xx * yy ; AND in Toplevel, except when have = before *) | |
1797 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) -> | |
1798 | TIdent (s, i1) | |
1799 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _) | |
485bce71 | 1800 | when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ())) |
ae4735db | 1801 | -> |
34e49164 C |
1802 | msg_typedef s; LP.add_typedef_root s; |
1803 | TypedefIdent (s, i1) | |
1804 | ||
1805 | (* xx * yy , AND in Toplevel *) | |
1806 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _) | |
b1b2de81 | 1807 | when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel) |
ae4735db C |
1808 | && ok_typedef s |
1809 | -> | |
34e49164 C |
1810 | |
1811 | msg_typedef s; LP.add_typedef_root s; | |
1812 | TypedefIdent (s, i1) | |
1813 | ||
1814 | (* xx * yy ( AND in Toplevel *) | |
1815 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _) | |
ae4735db | 1816 | when not_struct_enum before |
485bce71 | 1817 | && (LP.is_top_or_struct (LP.current_context ())) |
ae4735db | 1818 | && ok_typedef s |
34e49164 C |
1819 | -> |
1820 | msg_typedef s; LP.add_typedef_root s; | |
1821 | TypedefIdent (s, i1) | |
ae4735db | 1822 | |
34e49164 C |
1823 | (* xx * yy [ *) |
1824 | (* todo? enough ? cos in struct def we can have some expression ! *) | |
1825 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _) | |
ae4735db | 1826 | when not_struct_enum before && |
485bce71 | 1827 | (LP.is_top_or_struct (LP.current_context ())) |
ae4735db C |
1828 | && ok_typedef s |
1829 | -> | |
34e49164 C |
1830 | msg_typedef s; LP.add_typedef_root s; |
1831 | TypedefIdent (s, i1) | |
1832 | ||
1833 | (* u16: 10; in struct *) | |
1834 | | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_) | |
485bce71 | 1835 | when (LP.is_top_or_struct (LP.current_context ())) |
ae4735db C |
1836 | && ok_typedef s |
1837 | -> | |
34e49164 C |
1838 | msg_typedef s; LP.add_typedef_root s; |
1839 | TypedefIdent (s, i1) | |
ae4735db | 1840 | |
34e49164 C |
1841 | |
1842 | (* why need TOPar condition as stated in preceding rule ? really needed ? *) | |
1843 | (* YES cos at toplevel can have some expression !! for instance when *) | |
1844 | (* enter in the dimension of an array *) | |
1845 | (* | |
1846 | | (TIdent s::TMul::TIdent s2::_ , _) | |
1847 | when (take_safe 1 !passed_tok <> [Tstruct] && | |
1848 | (take_safe 1 !passed_tok <> [Tenum])) | |
1849 | && | |
ae4735db | 1850 | !LP._lexer_hint = Some LP.Toplevel -> |
708f4980 | 1851 | msg_typedef s; LP.add_typedef_root s; |
34e49164 C |
1852 | TypedefIdent s |
1853 | *) | |
1854 | ||
1855 | (* xx * yy = *) | |
1856 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _) | |
ae4735db C |
1857 | when not_struct_enum before |
1858 | && ok_typedef s | |
34e49164 C |
1859 | -> |
1860 | msg_typedef s; LP.add_typedef_root s; | |
1861 | TypedefIdent (s, i1) | |
1862 | ||
1863 | ||
1864 | (* xx * yy) AND in paramdecl *) | |
1865 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _) | |
b1b2de81 | 1866 | when not_struct_enum before && (LP.current_context () =*= LP.InParameter) |
ae4735db | 1867 | && ok_typedef s |
34e49164 C |
1868 | -> |
1869 | msg_typedef s; LP.add_typedef_root s; | |
1870 | TypedefIdent (s, i1) | |
ae4735db | 1871 | |
34e49164 C |
1872 | |
1873 | (* xx * yy; *) (* wrong ? *) | |
ae4735db C |
1874 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , |
1875 | (TOBrace _| TPtVirg _)::_) when not_struct_enum before | |
1876 | && ok_typedef s | |
34e49164 C |
1877 | -> |
1878 | msg_typedef s; LP.add_typedef_root s; | |
485bce71 | 1879 | msg_maybe_dangereous_typedef s; |
34e49164 C |
1880 | TypedefIdent (s, i1) |
1881 | ||
1882 | ||
1883 | (* xx * yy, and ';' before xx *) (* wrong ? *) | |
ae4735db | 1884 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , |
34e49164 | 1885 | (TOBrace _| TPtVirg _)::_) when |
ae4735db | 1886 | ok_typedef s |
34e49164 C |
1887 | -> |
1888 | msg_typedef s; LP.add_typedef_root s; | |
1889 | TypedefIdent (s, i1) | |
1890 | ||
1891 | ||
1892 | (* xx_t * yy *) | |
ae4735db C |
1893 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _) |
1894 | when s ==~ regexp_typedef && not_struct_enum before | |
1895 | (* struct user_info_t sometimes *) | |
1896 | && ok_typedef s | |
1897 | -> | |
34e49164 C |
1898 | msg_typedef s; LP.add_typedef_root s; |
1899 | TypedefIdent (s, i1) | |
1900 | ||
1901 | (* xx ** yy *) (* wrong ? *) | |
1902 | | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
1903 | when not_struct_enum before | |
1904 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
ae4735db | 1905 | && ok_typedef s |
34e49164 C |
1906 | -> |
1907 | msg_typedef s; LP.add_typedef_root s; | |
1908 | TypedefIdent (s, i1) | |
1909 | ||
1910 | (* xx *** yy *) | |
1911 | | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
ae4735db C |
1912 | when not_struct_enum before |
1913 | && ok_typedef s | |
34e49164 C |
1914 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1915 | -> | |
1916 | msg_typedef s; LP.add_typedef_root s; | |
1917 | TypedefIdent (s, i1) | |
1918 | ||
1919 | (* xx ** ) *) | |
1920 | | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _) | |
ae4735db | 1921 | when not_struct_enum before |
34e49164 | 1922 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
ae4735db | 1923 | && ok_typedef s |
34e49164 C |
1924 | -> |
1925 | msg_typedef s; LP.add_typedef_root s; | |
1926 | TypedefIdent (s, i1) | |
1927 | ||
1928 | ||
1929 | ||
1930 | (* ----------------------------------- *) | |
ae4735db | 1931 | (* old: why not do like for other rules and start with TIdent ? |
485bce71 C |
1932 | * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ? |
1933 | * new: prefer now start with TIdent because otherwise the add_typedef_root | |
1934 | * may have no effect if in second pass or if have disable the add_typedef. | |
1935 | *) | |
34e49164 C |
1936 | |
1937 | (* (xx) yy *) | |
ae4735db C |
1938 | | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ , |
1939 | (TOPar info)::x::_) | |
34e49164 | 1940 | when not (TH.is_stuff_taking_parenthized x) && |
b1b2de81 | 1941 | Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 |
ae4735db C |
1942 | && ok_typedef s |
1943 | -> | |
34e49164 C |
1944 | |
1945 | msg_typedef s; LP.add_typedef_root s; | |
485bce71 C |
1946 | (*TOPar info*) |
1947 | TypedefIdent (s, i1) | |
34e49164 C |
1948 | |
1949 | ||
ae4735db | 1950 | (* (xx) ( yy) |
91eba41f C |
1951 | * but false positif: typedef int (xxx_t)(...), so do specialisation below. |
1952 | *) | |
1953 | (* | |
ae4735db C |
1954 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_) |
1955 | when not (TH.is_stuff_taking_parenthized x) | |
1956 | && ok_typedef s | |
34e49164 C |
1957 | -> |
1958 | msg_typedef s; LP.add_typedef_root s; | |
485bce71 C |
1959 | (* TOPar info *) |
1960 | TypedefIdent (s, i1) | |
91eba41f C |
1961 | *) |
1962 | (* special case: = (xx) ( yy) *) | |
ae4735db | 1963 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , |
91eba41f | 1964 | (TOPar info)::(TEq _ |TEqEq _)::_) |
ae4735db | 1965 | when ok_typedef s |
91eba41f C |
1966 | -> |
1967 | msg_typedef s; LP.add_typedef_root s; | |
1968 | (* TOPar info *) | |
1969 | TypedefIdent (s, i1) | |
1970 | ||
34e49164 C |
1971 | |
1972 | (* (xx * ) yy *) | |
ae4735db C |
1973 | | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when |
1974 | ok_typedef s | |
1975 | -> | |
34e49164 | 1976 | msg_typedef s; LP.add_typedef_root s; |
485bce71 C |
1977 | (*TOPar info*) |
1978 | TypedefIdent (s,i1) | |
1979 | ||
34e49164 C |
1980 | |
1981 | (* (xx){ ... } constructor *) | |
ae4735db C |
1982 | | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_) |
1983 | when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x) | |
1984 | && ok_typedef s | |
34e49164 C |
1985 | -> |
1986 | msg_typedef s; LP.add_typedef_root s; | |
1987 | TypedefIdent (s, i1) | |
1988 | ||
1989 | ||
1990 | (* can have sizeof on expression | |
ae4735db | 1991 | | (Tsizeof::TOPar::TIdent s::TCPar::_, _) -> |
708f4980 | 1992 | msg_typedef s; LP.add_typedef_root s; |
34e49164 C |
1993 | Tsizeof |
1994 | *) | |
91eba41f C |
1995 | |
1996 | ||
1997 | (* ----------------------------------- *) | |
1998 | (* x ( *y )(params), function pointer *) | |
ae4735db | 1999 | | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
34e49164 | 2000 | when not_struct_enum before |
ae4735db | 2001 | && ok_typedef s |
34e49164 C |
2002 | -> |
2003 | msg_typedef s; LP.add_typedef_root s; | |
2004 | TypedefIdent (s, i1) | |
2005 | ||
91eba41f | 2006 | (* x* ( *y )(params), function pointer 2 *) |
ae4735db | 2007 | | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
91eba41f | 2008 | when not_struct_enum before |
ae4735db | 2009 | && ok_typedef s |
91eba41f C |
2010 | -> |
2011 | msg_typedef s; LP.add_typedef_root s; | |
2012 | TypedefIdent (s, i1) | |
2013 | ||
34e49164 C |
2014 | |
2015 | (*-------------------------------------------------------------*) | |
2016 | (* CPP *) | |
2017 | (*-------------------------------------------------------------*) | |
485bce71 C |
2018 | | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) | |
2019 | TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii)) | |
34e49164 | 2020 | as x) |
ae4735db C |
2021 | ::_, _ |
2022 | -> | |
485bce71 | 2023 | (* |
ae4735db | 2024 | if not !Flag_parsing_c.ifdef_to_if |
34e49164 | 2025 | then TCommentCpp (Ast_c.CppDirective, ii) |
ae4735db | 2026 | else |
485bce71 | 2027 | *) |
0708f913 C |
2028 | (* not !LP._lexer_hint.toplevel *) |
2029 | if !Flag_parsing_c.ifdef_directive_passing | |
708f4980 | 2030 | || (pass >= 2) |
0708f913 | 2031 | then begin |
ae4735db | 2032 | |
b1b2de81 | 2033 | if (LP.current_context () =*= LP.InInitializer) |
ae4735db | 2034 | then begin |
0708f913 C |
2035 | pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *) |
2036 | incr Stat.nIfdefInitializer; | |
ae4735db | 2037 | end else begin |
708f4980 | 2038 | pr2_cpp("IFDEF: or related inside function. I treat it as comment"); |
0708f913 C |
2039 | incr Stat.nIfdefPassing; |
2040 | end; | |
2041 | TCommentCpp (Token_c.CppDirective, ii) | |
2042 | end | |
2043 | else x | |
ae4735db | 2044 | |
3a314143 | 2045 | | (TUndef (ii) as x)::_, _ |
ae4735db | 2046 | -> |
708f4980 | 2047 | if (pass >= 2) |
485bce71 | 2048 | then begin |
0708f913 C |
2049 | pr2_cpp("UNDEF: I treat it as comment"); |
2050 | TCommentCpp (Token_c.CppDirective, ii) | |
113803cf C |
2051 | end |
2052 | else x | |
2053 | ||
ae4735db C |
2054 | | (TCppDirectiveOther (ii) as x)::_, _ |
2055 | -> | |
708f4980 | 2056 | if (pass >= 2) |
113803cf | 2057 | then begin |
0708f913 C |
2058 | pr2_cpp ("OTHER directive: I treat it as comment"); |
2059 | TCommentCpp (Token_c.CppDirective, ii) | |
485bce71 C |
2060 | end |
2061 | else x | |
34e49164 C |
2062 | |
2063 | (* If ident contain a for_each, then certainly a macro. But to be | |
2064 | * sure should look if there is a '{' after the ')', but it requires | |
2065 | * to count the '('. Because this can be expensive, we do that only | |
ae4735db | 2066 | * when the token contains "for_each". |
34e49164 | 2067 | *) |
ae4735db | 2068 | | (TIdent (s, i1)::TOPar _::rest, _) |
b1b2de81 | 2069 | when not (LP.current_context () =*= LP.InTopLevel) |
ae4735db C |
2070 | (* otherwise a function such as static void loopback_enable(int i) { |
2071 | * will be considered as a loop | |
34e49164 C |
2072 | *) |
2073 | -> | |
2074 | ||
ae4735db | 2075 | if s ==~ regexp_foreach && |
34e49164 | 2076 | is_really_foreach (Common.take_safe forLOOKAHEAD rest) |
ae4735db | 2077 | |
34e49164 C |
2078 | then begin |
2079 | msg_foreach s; | |
2080 | TMacroIterator (s, i1) | |
2081 | end | |
2082 | else TIdent (s, i1) | |
2083 | ||
2084 | ||
ae4735db | 2085 | |
34e49164 C |
2086 | (*-------------------------------------------------------------*) |
2087 | | v::xs, _ -> v | |
2088 | | _ -> raise Impossible | |
2089 | ||
ae4735db | 2090 | let lookahead ~pass a b = |
485bce71 | 2091 | Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b) |
34e49164 C |
2092 | |
2093 |