Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | ||
16 | open Common | |
17 | ||
ae4735db | 18 | module TH = Token_helpers |
708f4980 | 19 | module TV = Token_views_c |
34e49164 C |
20 | module LP = Lexer_parser |
21 | ||
485bce71 | 22 | module Stat = Parsing_stat |
34e49164 | 23 | |
ae4735db | 24 | open Parser_c |
34e49164 | 25 | |
ae4735db | 26 | open TV |
708f4980 | 27 | |
34e49164 C |
28 | (*****************************************************************************) |
29 | (* Some debugging functions *) | |
30 | (*****************************************************************************) | |
31 | ||
708f4980 | 32 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
113803cf | 33 | |
ae4735db | 34 | let pr2_cpp s = |
34e49164 C |
35 | if !Flag_parsing_c.debug_cpp |
36 | then Common.pr2_once ("CPP-" ^ s) | |
37 | ||
38 | ||
ae4735db | 39 | let msg_gen cond is_known printer s = |
34e49164 C |
40 | if cond |
41 | then | |
42 | if not (!Flag_parsing_c.filter_msg) | |
43 | then printer s | |
44 | else | |
45 | if not (is_known s) | |
46 | then printer s | |
ae4735db | 47 | |
34e49164 | 48 | |
485bce71 C |
49 | (* In the following, there are some harcoded names of types or macros |
50 | * but they are not used by our heuristics! They are just here to | |
51 | * enable to detect false positive by printing only the typedef/macros | |
52 | * that we don't know yet. If we print everything, then we can easily | |
53 | * get lost with too much verbose tracing information. So those | |
54 | * functions "filter" some messages. So our heuristics are still good, | |
55 | * there is no more (or not that much) hardcoded linux stuff. | |
34e49164 | 56 | *) |
485bce71 | 57 | |
ae4735db C |
58 | let is_known_typdef = |
59 | (fun s -> | |
34e49164 C |
60 | (match s with |
61 | | "u_char" | "u_short" | "u_int" | "u_long" | |
ae4735db C |
62 | | "u8" | "u16" | "u32" | "u64" |
63 | | "s8" | "s16" | "s32" | "s64" | |
64 | | "__u8" | "__u16" | "__u32" | "__u64" | |
34e49164 | 65 | -> true |
ae4735db C |
66 | |
67 | | "acpi_handle" | |
68 | | "acpi_status" | |
34e49164 C |
69 | -> true |
70 | ||
ae4735db C |
71 | | "FILE" |
72 | | "DIR" | |
34e49164 | 73 | -> true |
ae4735db | 74 | |
34e49164 | 75 | | s when s =~ ".*_t$" -> true |
ae4735db | 76 | | _ -> false |
34e49164 C |
77 | ) |
78 | ) | |
485bce71 | 79 | |
ae4735db C |
80 | (* note: cant use partial application with let msg_typedef = |
81 | * because it would compute msg_typedef at compile time when | |
485bce71 C |
82 | * the flag debug_typedef is always false |
83 | *) | |
ae4735db | 84 | let msg_typedef s = |
485bce71 C |
85 | incr Stat.nTypedefInfer; |
86 | msg_gen (!Flag_parsing_c.debug_typedef) | |
87 | is_known_typdef | |
ae4735db | 88 | (fun s -> |
34e49164 C |
89 | pr2_cpp ("TYPEDEF: promoting: " ^ s) |
90 | ) | |
91 | s | |
92 | ||
485bce71 C |
93 | let msg_maybe_dangereous_typedef s = |
94 | if not (is_known_typdef s) | |
ae4735db | 95 | then |
113803cf C |
96 | pr2 |
97 | ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s) | |
34e49164 C |
98 | |
99 | ||
100 | ||
ae4735db | 101 | let msg_declare_macro s = |
485bce71 | 102 | incr Stat.nMacroDecl; |
34e49164 | 103 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
104 | (fun s -> |
105 | (match s with | |
34e49164 | 106 | | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM" |
ae4735db | 107 | | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD" |
34e49164 C |
108 | | "DEFINE_SPINLOCK" | "DEFINE_TIMER" |
109 | | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR" | |
110 | | "SENSOR_DEVICE_ATTR" | |
111 | | "LIST_HEAD" | |
112 | | "DECLARE_WORK" | "DECLARE_TASKLET" | |
113 | | "PORT_ATTR_RO" | "PORT_PMA_ATTR" | |
114 | | "DECLARE_BITMAP" | |
115 | ||
116 | -> true | |
117 | (* | |
118 | | s when s =~ "^DECLARE_.*" -> true | |
119 | | s when s =~ ".*_ATTR$" -> true | |
120 | | s when s =~ "^DEFINE_.*" -> true | |
121 | *) | |
122 | ||
123 | | _ -> false | |
124 | ) | |
125 | ) | |
126 | (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s)) | |
127 | s | |
34e49164 | 128 | |
ae4735db C |
129 | |
130 | let msg_foreach s = | |
485bce71 | 131 | incr Stat.nIteratorHeuristic; |
34e49164 C |
132 | pr2_cpp ("MACRO: found foreach: " ^ s) |
133 | ||
134 | ||
ae4735db C |
135 | (* ?? |
136 | let msg_debug_macro s = | |
34e49164 | 137 | pr2_cpp ("MACRO: found debug-macro: " ^ s) |
485bce71 | 138 | *) |
34e49164 C |
139 | |
140 | ||
ae4735db | 141 | let msg_macro_noptvirg s = |
485bce71 | 142 | incr Stat.nMacroStmt; |
34e49164 C |
143 | pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s) |
144 | ||
ae4735db | 145 | let msg_macro_toplevel_noptvirg s = |
485bce71 | 146 | incr Stat.nMacroStmt; |
34e49164 C |
147 | pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s) |
148 | ||
ae4735db | 149 | let msg_macro_noptvirg_single s = |
485bce71 | 150 | incr Stat.nMacroStmt; |
34e49164 C |
151 | pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s) |
152 | ||
153 | ||
485bce71 C |
154 | |
155 | ||
ae4735db | 156 | let msg_macro_higher_order s = |
485bce71 | 157 | incr Stat.nMacroHigherOrder; |
34e49164 | 158 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
159 | (fun s -> |
160 | (match s with | |
34e49164 C |
161 | | "DBGINFO" |
162 | | "DBGPX" | |
163 | | "DFLOW" | |
164 | -> true | |
165 | | _ -> false | |
166 | ) | |
167 | ) | |
168 | (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s)) | |
169 | s | |
170 | ||
171 | ||
ae4735db | 172 | let msg_stringification s = |
485bce71 | 173 | incr Stat.nMacroString; |
34e49164 | 174 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
175 | (fun s -> |
176 | (match s with | |
34e49164 C |
177 | | "REVISION" |
178 | | "UTS_RELEASE" | |
179 | | "SIZE_STR" | |
180 | | "DMA_STR" | |
181 | -> true | |
ae4735db | 182 | (* s when s =~ ".*STR.*" -> true *) |
34e49164 C |
183 | | _ -> false |
184 | ) | |
185 | ) | |
186 | (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s)) | |
187 | s | |
188 | ||
485bce71 C |
189 | let msg_stringification_params s = |
190 | incr Stat.nMacroString; | |
191 | pr2_cpp ("MACRO: string-macro with params : " ^ s) | |
192 | ||
193 | ||
194 | ||
ae4735db | 195 | let msg_apply_known_macro s = |
485bce71 C |
196 | incr Stat.nMacroExpand; |
197 | pr2_cpp ("MACRO: found known macro = " ^ s) | |
198 | ||
ae4735db | 199 | let msg_apply_known_macro_hint s = |
485bce71 C |
200 | incr Stat.nMacroHint; |
201 | pr2_cpp ("MACRO: found known macro hint = " ^ s) | |
202 | ||
203 | ||
34e49164 | 204 | |
ae4735db C |
205 | |
206 | let msg_ifdef_bool_passing is_ifdef_positif = | |
485bce71 C |
207 | incr Stat.nIfdefZero; (* of Version ? *) |
208 | if is_ifdef_positif | |
209 | then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION" | |
210 | else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus" | |
211 | ||
212 | ||
213 | let msg_ifdef_mid_something () = | |
214 | incr Stat.nIfdefExprPassing; | |
215 | pr2_cpp "found ifdef-mid-something" | |
216 | ||
217 | let msg_ifdef_funheaders () = | |
218 | incr Stat.nIfdefFunheader; | |
219 | () | |
220 | ||
ae4735db | 221 | let msg_ifdef_cparen_else () = |
708f4980 C |
222 | incr Stat.nIfdefPassing; |
223 | pr2_cpp("found ifdef-cparen-else") | |
224 | ||
485bce71 | 225 | |
ae4735db | 226 | let msg_attribute s = |
485bce71 C |
227 | incr Stat.nMacroAttribute; |
228 | pr2_cpp("ATTR:" ^ s) | |
ae4735db | 229 | |
485bce71 C |
230 | |
231 | ||
34e49164 | 232 | (*****************************************************************************) |
485bce71 | 233 | (* The regexp and basic view definitions *) |
34e49164 C |
234 | (*****************************************************************************) |
235 | ||
236 | (* opti: better to built then once and for all, especially regexp_foreach *) | |
237 | ||
238 | let regexp_macro = Str.regexp | |
239 | "^[A-Z_][A-Z_0-9]*$" | |
240 | ||
241 | (* linuxext: *) | |
242 | let regexp_annot = Str.regexp | |
243 | "^__.*$" | |
244 | ||
245 | (* linuxext: *) | |
246 | let regexp_declare = Str.regexp | |
247 | ".*DECLARE.*" | |
248 | ||
249 | (* linuxext: *) | |
ae4735db | 250 | let regexp_foreach = Str.regexp_case_fold |
34e49164 C |
251 | ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)" |
252 | ||
253 | let regexp_typedef = Str.regexp | |
254 | ".*_t$" | |
255 | ||
34e49164 C |
256 | let false_typedef = [ |
257 | "printk"; | |
258 | ] | |
259 | ||
34e49164 | 260 | |
485bce71 C |
261 | let ok_typedef s = not (List.mem s false_typedef) |
262 | ||
ae4735db | 263 | let not_annot s = |
485bce71 C |
264 | not (s ==~ regexp_annot) |
265 | ||
266 | ||
34e49164 | 267 | |
34e49164 | 268 | |
485bce71 C |
269 | (*****************************************************************************) |
270 | (* Helpers *) | |
271 | (*****************************************************************************) | |
272 | ||
485bce71 | 273 | (* ------------------------------------------------------------------------- *) |
ae4735db C |
274 | (* the pair is the status of '()' and '{}', ex: (-1,0) |
275 | * if too much ')' and good '{}' | |
276 | * could do for [] too ? | |
485bce71 C |
277 | * could do for ',' if encounter ',' at "toplevel", not inside () or {} |
278 | * then if have ifdef, then certainly can lead to a problem. | |
279 | *) | |
708f4980 | 280 | let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))= |
ae4735db | 281 | fun xs -> |
485bce71 | 282 | let cnt_paren, cnt_brace = ref 0, ref 0 in |
ae4735db | 283 | xs +> TV.iter_token_ifdef (fun x -> |
485bce71 C |
284 | (match x.tok with |
285 | | x when TH.is_opar x -> incr cnt_paren | |
286 | | TOBrace _ -> incr cnt_brace | |
287 | | x when TH.is_cpar x -> decr cnt_paren | |
288 | | TCBrace _ -> decr cnt_brace | |
289 | | _ -> () | |
290 | ) | |
291 | ); | |
292 | !cnt_paren, !cnt_brace | |
293 | ||
294 | ||
295 | (* ------------------------------------------------------------------------- *) | |
296 | let forLOOKAHEAD = 30 | |
297 | ||
ae4735db | 298 | |
485bce71 | 299 | (* look if there is a '{' just after the closing ')', and handling the |
ae4735db C |
300 | * possibility to have nested expressions inside nested parenthesis |
301 | * | |
485bce71 C |
302 | * todo: use indentation instead of premier(statement) ? |
303 | *) | |
ae4735db | 304 | let rec is_really_foreach xs = |
485bce71 C |
305 | let rec is_foreach_aux = function |
306 | | [] -> false, [] | |
307 | | TCPar _::TOBrace _::xs -> true, xs | |
308 | (* the following attempts to handle the cases where there is a | |
309 | single statement in the body of the loop. undoubtedly more | |
ae4735db | 310 | cases are needed. |
485bce71 C |
311 | todo: premier(statement) - suivant(funcall) |
312 | *) | |
313 | | TCPar _::TIdent _::xs -> true, xs | |
314 | | TCPar _::Tif _::xs -> true, xs | |
315 | | TCPar _::Twhile _::xs -> true, xs | |
316 | | TCPar _::Tfor _::xs -> true, xs | |
317 | | TCPar _::Tswitch _::xs -> true, xs | |
318 | | TCPar _::Treturn _::xs -> true, xs | |
319 | ||
320 | ||
321 | | TCPar _::xs -> false, xs | |
ae4735db | 322 | | TOPar _::xs -> |
485bce71 C |
323 | let (_, xs') = is_foreach_aux xs in |
324 | is_foreach_aux xs' | |
325 | | x::xs -> is_foreach_aux xs | |
326 | in | |
327 | is_foreach_aux xs +> fst | |
328 | ||
329 | ||
330 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 331 | let set_ifdef_token_parenthize_info cnt x = |
485bce71 C |
332 | match x with |
333 | | TIfdef (tag, _) | |
334 | | TIfdefelse (tag, _) | |
335 | | TIfdefelif (tag, _) | |
336 | | TEndif (tag, _) | |
337 | ||
338 | | TIfdefBool (_, tag, _) | |
ae4735db | 339 | | TIfdefMisc (_, tag, _) |
485bce71 | 340 | | TIfdefVersion (_, tag, _) |
ae4735db | 341 | -> |
485bce71 C |
342 | tag := Some cnt; |
343 | ||
344 | | _ -> raise Impossible | |
485bce71 C |
345 | |
346 | ||
485bce71 | 347 | |
ae4735db | 348 | let ifdef_paren_cnt = ref 0 |
485bce71 | 349 | |
ae4735db C |
350 | |
351 | let rec set_ifdef_parenthize_info xs = | |
485bce71 C |
352 | xs +> List.iter (function |
353 | | NotIfdefLine xs -> () | |
ae4735db C |
354 | | Ifdefbool (_, xxs, info_ifdef) |
355 | | Ifdef (xxs, info_ifdef) -> | |
356 | ||
485bce71 C |
357 | incr ifdef_paren_cnt; |
358 | let total_directives = List.length info_ifdef in | |
359 | ||
ae4735db | 360 | info_ifdef +> List.iter (fun x -> |
485bce71 C |
361 | set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives) |
362 | x.tok); | |
363 | xxs +> List.iter set_ifdef_parenthize_info | |
364 | ) | |
365 | ||
366 | ||
978fd7e5 C |
367 | (*****************************************************************************) |
368 | (* The parsing hack for #define *) | |
369 | (*****************************************************************************) | |
370 | ||
ae4735db | 371 | (* To parse macro definitions I need to do some tricks |
978fd7e5 C |
372 | * as some information can be get only at the lexing level. For instance |
373 | * the space after the name of the macro in '#define foo (x)' is meaningful | |
374 | * but the grammar can not get this information. So define_ident below | |
375 | * look at such space and generate a special TOpardefine. In a similar | |
376 | * way macro definitions can contain some antislash and newlines | |
ae4735db C |
377 | * and the grammar need to know where the macro ends (which is |
378 | * a line-level and so low token-level information). Hence the | |
978fd7e5 | 379 | * function 'define_line' below and the TDefEol. |
ae4735db C |
380 | * |
381 | * update: TDefEol is handled in a special way at different places, | |
978fd7e5 C |
382 | * a little bit like EOF, especially for error recovery, so this |
383 | * is an important token that should not be retagged! | |
ae4735db C |
384 | * |
385 | * | |
386 | * ugly hack, a better solution perhaps would be to erase TDefEOL | |
387 | * from the Ast and list of tokens in parse_c. | |
388 | * | |
978fd7e5 | 389 | * note: I do a +1 somewhere, it's for the unparsing to correctly sync. |
ae4735db | 390 | * |
978fd7e5 C |
391 | * note: can't replace mark_end_define by simply a fakeInfo(). The reason |
392 | * is where is the \n TCommentSpace. Normally there is always a last token | |
393 | * to synchronize on, either EOF or the token of the next toplevel. | |
ae4735db | 394 | * In the case of the #define we got in list of token |
978fd7e5 C |
395 | * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will |
396 | * not synchronize on it and so we will not print the "\n". | |
397 | * A solution would be to put the TDefEOL before the "\n". | |
c491d8ee | 398 | * (jll: tried to do this, see the comment "Put end of line..." below) |
ae4735db C |
399 | * |
400 | * todo?: could put a ExpandedTok for that ? | |
978fd7e5 | 401 | *) |
ae4735db C |
402 | let mark_end_define ii = |
403 | let ii' = | |
404 | { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
405 | Common.str = ""; | |
978fd7e5 C |
406 | Common.charpos = Ast_c.pos_of_info ii + 1 |
407 | }; | |
408 | cocci_tag = ref Ast_c.emptyAnnot; | |
409 | comments_tag = ref Ast_c.emptyComments; | |
ae4735db | 410 | } |
978fd7e5 C |
411 | in |
412 | TDefEOL (ii') | |
413 | ||
414 | (* put the TDefEOL at the good place *) | |
ae4735db | 415 | let rec define_line_1 acc xs = |
978fd7e5 C |
416 | match xs with |
417 | | [] -> List.rev acc | |
418 | | TDefine ii::xs -> | |
419 | let line = Ast_c.line_of_info ii in | |
420 | let acc = (TDefine ii) :: acc in | |
421 | define_line_2 acc line ii xs | |
422 | | TCppEscapedNewline ii::xs -> | |
423 | pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at"); | |
424 | pr2 (Ast_c.strloc_of_info ii); | |
425 | let acc = (TCommentSpace ii) :: acc in | |
426 | define_line_1 acc xs | |
427 | | x::xs -> define_line_1 (x::acc) xs | |
428 | ||
ae4735db C |
429 | and define_line_2 acc line lastinfo xs = |
430 | match xs with | |
431 | | [] -> | |
978fd7e5 | 432 | (* should not happened, should meet EOF before *) |
ae4735db | 433 | pr2 "PB: WEIRD"; |
978fd7e5 | 434 | List.rev (mark_end_define lastinfo::acc) |
ae4735db | 435 | | x::xs -> |
978fd7e5 C |
436 | let line' = TH.line_of_tok x in |
437 | let info = TH.info_of_tok x in | |
438 | ||
439 | (match x with | |
ae4735db | 440 | | EOF ii -> |
978fd7e5 C |
441 | let acc = (mark_end_define lastinfo) :: acc in |
442 | let acc = (EOF ii) :: acc in | |
443 | define_line_1 acc xs | |
ae4735db | 444 | | TCppEscapedNewline ii -> |
978fd7e5 C |
445 | if (line' <> line) then pr2 "PB: WEIRD: not same line number"; |
446 | let acc = (TCommentSpace ii) :: acc in | |
447 | define_line_2 acc (line+1) info xs | |
ae4735db | 448 | | x -> |
978fd7e5 | 449 | if line' =|= line |
ae4735db | 450 | then define_line_2 (x::acc) line info xs |
c491d8ee C |
451 | else |
452 | (* Put end of line token before the newline. A newline at least | |
453 | must be there because the line changed and because we saw a | |
454 | #define previously to get to this function at all *) | |
455 | define_line_1 | |
456 | ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc))) | |
457 | (x::xs) | |
978fd7e5 C |
458 | ) |
459 | ||
ae4735db | 460 | let rec define_ident acc xs = |
978fd7e5 C |
461 | match xs with |
462 | | [] -> List.rev acc | |
ae4735db | 463 | | TDefine ii::xs -> |
978fd7e5 C |
464 | let acc = TDefine ii :: acc in |
465 | (match xs with | |
ae4735db | 466 | | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs -> |
978fd7e5 C |
467 | (* Change also the kind of TIdent to avoid bad interaction |
468 | * with other parsing_hack tricks. For instant if keep TIdent then | |
469 | * the stringication algo can believe the TIdent is a string-macro. | |
470 | * So simpler to change the kind of the ident too. | |
471 | *) | |
ae4735db | 472 | (* if TOParDefine sticked to the ident, then |
978fd7e5 C |
473 | * it's a macro-function. Change token to avoid ambiguity |
474 | * between #define foo(x) and #define foo (x) | |
475 | *) | |
476 | let acc = (TCommentSpace i1) :: acc in | |
477 | let acc = (TIdentDefine (s,i2)) :: acc in | |
478 | let acc = (TOParDefine i3) :: acc in | |
479 | define_ident acc xs | |
480 | ||
ae4735db | 481 | | TCommentSpace i1::TIdent (s,i2)::xs -> |
978fd7e5 C |
482 | let acc = (TCommentSpace i1) :: acc in |
483 | let acc = (TIdentDefine (s,i2)) :: acc in | |
484 | define_ident acc xs | |
485 | ||
486 | (* bugfix: ident of macro (as well as params, cf below) can be tricky | |
487 | * note, do we need to subst in the body of the define ? no cos | |
488 | * here the issue is the name of the macro, as in #define inline, | |
ae4735db | 489 | * so obviously the name of this macro will not be used in its |
978fd7e5 C |
490 | * body (it would be a recursive macro, which is forbidden). |
491 | *) | |
ae4735db C |
492 | |
493 | | TCommentSpace i1::t::xs -> | |
978fd7e5 C |
494 | |
495 | let s = TH.str_of_tok t in | |
496 | let ii = TH.info_of_tok t in | |
497 | if s ==~ Common.regexp_alpha | |
498 | then begin | |
499 | pr2 (spf "remapping: %s to an ident in macro name" s); | |
500 | let acc = (TCommentSpace i1) :: acc in | |
501 | let acc = (TIdentDefine (s,ii)) :: acc in | |
502 | define_ident acc xs | |
503 | end | |
504 | else begin | |
ae4735db | 505 | pr2 "WEIRD: weird #define body"; |
978fd7e5 C |
506 | define_ident acc xs |
507 | end | |
508 | ||
ae4735db C |
509 | | _ -> |
510 | pr2 "WEIRD: weird #define body"; | |
978fd7e5 C |
511 | define_ident acc xs |
512 | ) | |
513 | | x::xs -> | |
514 | let acc = x :: acc in | |
515 | define_ident acc xs | |
978fd7e5 C |
516 | |
517 | ||
ae4735db C |
518 | |
519 | let fix_tokens_define2 xs = | |
978fd7e5 C |
520 | define_ident [] (define_line_1 [] xs) |
521 | ||
ae4735db | 522 | let fix_tokens_define a = |
978fd7e5 | 523 | Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a) |
ae4735db | 524 | |
978fd7e5 C |
525 | |
526 | ||
527 | ||
528 | ||
529 | (* ------------------------------------------------------------------------- *) | |
530 | (* Other parsing hacks related to cpp, Include/Define hacks *) | |
531 | (* ------------------------------------------------------------------------- *) | |
532 | ||
533 | (* Sometimes I prefer to generate a single token for a list of things in the | |
534 | * lexer so that if I have to passed them, like for passing TInclude then | |
ae4735db C |
535 | * it's easy. Also if I don't do a single token, then I need to |
536 | * parse the rest which may not need special stuff, like detecting | |
978fd7e5 C |
537 | * end of line which the parser is not really ready for. So for instance |
538 | * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just | |
ae4735db | 539 | * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ? |
978fd7e5 C |
540 | * but this kind of token is valid only after a #include and the |
541 | * lexing and parsing rules are different for such tokens so not that | |
542 | * easy to parse such things in parser_c.mly. Hence the following hacks. | |
ae4735db | 543 | * |
978fd7e5 C |
544 | * less?: maybe could get rid of this like I get rid of some of fix_define. |
545 | *) | |
546 | ||
547 | (* helpers *) | |
548 | ||
549 | (* used to generate new token from existing one *) | |
550 | let new_info posadd str ii = | |
ae4735db C |
551 | { Ast_c.pinfo = |
552 | Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
978fd7e5 C |
553 | charpos = Ast_c.pos_of_info ii + posadd; |
554 | str = str; | |
555 | column = Ast_c.col_of_info ii + posadd; | |
556 | }; | |
557 | (* must generate a new ref each time, otherwise share *) | |
558 | cocci_tag = ref Ast_c.emptyAnnot; | |
559 | comments_tag = ref Ast_c.emptyComments; | |
560 | } | |
561 | ||
562 | ||
ae4735db | 563 | let rec comment_until_defeol xs = |
978fd7e5 | 564 | match xs with |
ae4735db | 565 | | [] -> |
978fd7e5 C |
566 | (* job not done in Cpp_token_c.define_parse ? *) |
567 | failwith "cant find end of define token TDefEOL" | |
ae4735db | 568 | | x::xs -> |
978fd7e5 | 569 | (match x with |
ae4735db | 570 | | Parser_c.TDefEOL i -> |
978fd7e5 C |
571 | Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x) |
572 | ::xs | |
ae4735db C |
573 | | _ -> |
574 | let x' = | |
978fd7e5 C |
575 | (* bugfix: otherwise may lose a TComment token *) |
576 | if TH.is_real_comment x | |
577 | then x | |
578 | else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x) | |
579 | in | |
580 | x'::comment_until_defeol xs | |
581 | ) | |
582 | ||
ae4735db C |
583 | let drop_until_defeol xs = |
584 | List.tl | |
978fd7e5 C |
585 | (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) |
586 | ||
587 | ||
588 | ||
589 | (* ------------------------------------------------------------------------- *) | |
590 | (* returns a pair (replaced token, list of next tokens) *) | |
591 | (* ------------------------------------------------------------------------- *) | |
592 | ||
ae4735db C |
593 | let tokens_include (info, includes, filename, inifdef) = |
594 | Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), | |
595 | [Parser_c.TIncludeFilename | |
978fd7e5 C |
596 | (filename, (new_info (String.length includes) filename info)) |
597 | ] | |
598 | ||
599 | ||
600 | ||
601 | ||
485bce71 C |
602 | (*****************************************************************************) |
603 | (* CPP handling: macros, ifdefs, macros defs *) | |
604 | (*****************************************************************************) | |
605 | ||
0708f913 C |
606 | (* ------------------------------------------------------------------------- *) |
607 | (* special skip_start skip_end handling *) | |
608 | (* ------------------------------------------------------------------------- *) | |
609 | ||
610 | (* note: after this normally the token list should not contain any more the | |
611 | * TCommentSkipTagStart and End tokens. | |
612 | *) | |
613 | let rec commentize_skip_start_to_end xs = | |
614 | match xs with | |
615 | | [] -> () | |
ae4735db | 616 | | x::xs -> |
0708f913 | 617 | (match x with |
ae4735db C |
618 | | {tok = TCommentSkipTagStart info} -> |
619 | (try | |
620 | let (before, x2, after) = | |
0708f913 C |
621 | xs +> Common.split_when (function |
622 | | {tok = TCommentSkipTagEnd _ } -> true | |
ae4735db | 623 | | _ -> false |
0708f913 C |
624 | ) |
625 | in | |
626 | let topass = x::x2::before in | |
ae4735db | 627 | topass +> List.iter (fun tok -> |
0708f913 C |
628 | set_as_comment Token_c.CppPassingExplicit tok |
629 | ); | |
630 | commentize_skip_start_to_end after | |
ae4735db | 631 | with Not_found -> |
0708f913 C |
632 | failwith "could not find end of skip_start special comment" |
633 | ) | |
ae4735db | 634 | | {tok = TCommentSkipTagEnd info} -> |
0708f913 | 635 | failwith "found skip_end comment but no skip_start" |
ae4735db | 636 | | _ -> |
0708f913 C |
637 | commentize_skip_start_to_end xs |
638 | ) | |
ae4735db C |
639 | |
640 | ||
0708f913 C |
641 | |
642 | ||
34e49164 C |
643 | (* ------------------------------------------------------------------------- *) |
644 | (* ifdef keeping/passing *) | |
645 | (* ------------------------------------------------------------------------- *) | |
646 | ||
647 | (* #if 0, #if 1, #if LINUX_VERSION handling *) | |
ae4735db C |
648 | let rec find_ifdef_bool xs = |
649 | xs +> List.iter (function | |
34e49164 | 650 | | NotIfdefLine _ -> () |
ae4735db | 651 | | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) -> |
485bce71 C |
652 | |
653 | msg_ifdef_bool_passing is_ifdef_positif; | |
34e49164 C |
654 | |
655 | (match xxs with | |
656 | | [] -> raise Impossible | |
ae4735db | 657 | | firstclause::xxs -> |
0708f913 | 658 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 659 | |
34e49164 | 660 | if is_ifdef_positif |
ae4735db | 661 | then xxs +> List.iter |
0708f913 | 662 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 | 663 | else begin |
0708f913 | 664 | firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal); |
34e49164 C |
665 | (match List.rev xxs with |
666 | (* keep only last *) | |
ae4735db C |
667 | | last::startxs -> |
668 | startxs +> List.iter | |
0708f913 | 669 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 C |
670 | | [] -> (* not #else *) () |
671 | ); | |
672 | end | |
673 | ); | |
ae4735db | 674 | |
34e49164 C |
675 | | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool |
676 | ) | |
677 | ||
678 | ||
679 | ||
34e49164 C |
680 | let thresholdIfdefSizeMid = 6 |
681 | ||
682 | (* infer ifdef involving not-closed expressions/statements *) | |
ae4735db C |
683 | let rec find_ifdef_mid xs = |
684 | xs +> List.iter (function | |
34e49164 | 685 | | NotIfdefLine _ -> () |
ae4735db C |
686 | | Ifdef (xxs, info_ifdef_stmt) -> |
687 | (match xxs with | |
34e49164 C |
688 | | [] -> raise Impossible |
689 | | [first] -> () | |
ae4735db | 690 | | first::second::rest -> |
34e49164 | 691 | (* don't analyse big ifdef *) |
ae4735db C |
692 | if xxs +> List.for_all |
693 | (fun xs -> List.length xs <= thresholdIfdefSizeMid) && | |
34e49164 | 694 | (* don't want nested ifdef *) |
ae4735db C |
695 | xxs +> List.for_all (fun xs -> |
696 | xs +> List.for_all | |
34e49164 C |
697 | (function NotIfdefLine _ -> true | _ -> false) |
698 | ) | |
ae4735db C |
699 | |
700 | then | |
34e49164 | 701 | let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in |
ae4735db C |
702 | let cnt1, cnt2 = List.hd counts in |
703 | if cnt1 <> 0 || cnt2 <> 0 && | |
b1b2de81 | 704 | counts +> List.for_all (fun x -> x =*= (cnt1, cnt2)) |
34e49164 | 705 | (* |
ae4735db C |
706 | if counts +> List.exists (fun (cnt1, cnt2) -> |
707 | cnt1 <> 0 || cnt2 <> 0 | |
708 | ) | |
34e49164 C |
709 | *) |
710 | then begin | |
485bce71 C |
711 | msg_ifdef_mid_something(); |
712 | ||
34e49164 | 713 | (* keep only first, treat the rest as comment *) |
0708f913 | 714 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 715 | (second::rest) +> List.iter |
0708f913 | 716 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
34e49164 | 717 | end |
ae4735db | 718 | |
34e49164 C |
719 | ); |
720 | List.iter find_ifdef_mid xxs | |
ae4735db | 721 | |
34e49164 | 722 | (* no need complex analysis for ifdefbool *) |
ae4735db | 723 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
34e49164 | 724 | List.iter find_ifdef_mid xxs |
ae4735db C |
725 | |
726 | ||
34e49164 C |
727 | ) |
728 | ||
729 | ||
730 | let thresholdFunheaderLimit = 4 | |
731 | ||
732 | (* ifdef defining alternate function header, type *) | |
733 | let rec find_ifdef_funheaders = function | |
734 | | [] -> () | |
ae4735db | 735 | | NotIfdefLine _::xs -> find_ifdef_funheaders xs |
34e49164 C |
736 | |
737 | (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *) | |
ae4735db | 738 | | Ifdef |
34e49164 C |
739 | ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1; |
740 | (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2 | |
ae4735db | 741 | ], info_ifdef_stmt |
34e49164 C |
742 | ) |
743 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3) | |
ae4735db | 744 | ::xs |
34e49164 C |
745 | when List.length ifdefblock1 <= thresholdFunheaderLimit && |
746 | List.length ifdefblock2 <= thresholdFunheaderLimit | |
ae4735db | 747 | -> |
34e49164 | 748 | find_ifdef_funheaders xs; |
485bce71 C |
749 | |
750 | msg_ifdef_funheaders (); | |
0708f913 | 751 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 752 | let all_toks = [xline2] @ line2 in |
0708f913 C |
753 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ; |
754 | ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError); | |
34e49164 C |
755 | |
756 | (* ifdef with nested ifdef *) | |
ae4735db | 757 | | Ifdef |
34e49164 | 758 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
ae4735db | 759 | [Ifdef |
34e49164 C |
760 | ([[NotIfdefLine (({col = 0} as xline2)::line2)]; |
761 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
762 | ], info_ifdef_stmt2 | |
763 | ) | |
764 | ] | |
ae4735db | 765 | ], info_ifdef_stmt |
34e49164 C |
766 | ) |
767 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
768 | ::xs |
769 | -> | |
34e49164 | 770 | find_ifdef_funheaders xs; |
485bce71 C |
771 | |
772 | msg_ifdef_funheaders (); | |
0708f913 C |
773 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
774 | info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective); | |
34e49164 | 775 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 776 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError); |
34e49164 C |
777 | |
778 | (* ifdef with elseif *) | |
ae4735db | 779 | | Ifdef |
34e49164 C |
780 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
781 | [NotIfdefLine (({col = 0} as xline2)::line2)]; | |
782 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
ae4735db | 783 | ], info_ifdef_stmt |
34e49164 C |
784 | ) |
785 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
786 | ::xs |
787 | -> | |
34e49164 | 788 | find_ifdef_funheaders xs; |
485bce71 C |
789 | |
790 | msg_ifdef_funheaders (); | |
0708f913 | 791 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 792 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 793 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) |
ae4735db | 794 | |
485bce71 | 795 | (* recurse *) |
ae4735db C |
796 | | Ifdef (xxs,info_ifdef_stmt)::xs |
797 | | Ifdefbool (_, xxs,info_ifdef_stmt)::xs -> | |
798 | List.iter find_ifdef_funheaders xxs; | |
34e49164 | 799 | find_ifdef_funheaders xs |
ae4735db | 800 | |
34e49164 C |
801 | |
802 | ||
485bce71 | 803 | (* ?? *) |
ae4735db C |
804 | let rec adjust_inifdef_include xs = |
805 | xs +> List.iter (function | |
34e49164 | 806 | | NotIfdefLine _ -> () |
ae4735db C |
807 | | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
808 | xxs +> List.iter (iter_token_ifdef (fun tokext -> | |
34e49164 | 809 | match tokext.tok with |
ae4735db | 810 | | Parser_c.TInclude (s1, s2, inifdef_ref, ii) -> |
34e49164 C |
811 | inifdef_ref := true; |
812 | | _ -> () | |
813 | )); | |
814 | ) | |
815 | ||
816 | ||
817 | ||
34e49164 | 818 | |
485bce71 | 819 | |
34e49164 C |
820 | |
821 | ||
ae4735db C |
822 | let rec find_ifdef_cparen_else xs = |
823 | let rec aux xs = | |
824 | xs +> List.iter (function | |
708f4980 | 825 | | NotIfdefLine _ -> () |
ae4735db C |
826 | | Ifdef (xxs, info_ifdef_stmt) -> |
827 | (match xxs with | |
708f4980 C |
828 | | [] -> raise Impossible |
829 | | [first] -> () | |
ae4735db | 830 | | first::second::rest -> |
34e49164 | 831 | |
708f4980 | 832 | (* found a closing ')' just after the #else *) |
34e49164 | 833 | |
708f4980 C |
834 | (* Too bad ocaml does not support better list pattern matching |
835 | * a la Prolog-III where can match the end of lists. | |
836 | *) | |
ae4735db C |
837 | let condition = |
838 | if List.length first = 0 then false | |
839 | else | |
708f4980 C |
840 | let last_line = Common.last first in |
841 | match last_line with | |
ae4735db C |
842 | | NotIfdefLine xs -> |
843 | if List.length xs = 0 then false | |
844 | else | |
708f4980 C |
845 | let last_tok = Common.last xs in |
846 | TH.is_cpar last_tok.tok | |
ae4735db | 847 | | Ifdef _ | Ifdefbool _ -> false |
708f4980 C |
848 | in |
849 | if condition then begin | |
850 | msg_ifdef_cparen_else(); | |
34e49164 | 851 | |
708f4980 C |
852 | (* keep only first, treat the rest as comment *) |
853 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); | |
ae4735db | 854 | (second::rest) +> List.iter |
708f4980 C |
855 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
856 | end | |
ae4735db | 857 | |
708f4980 C |
858 | ); |
859 | List.iter aux xxs | |
ae4735db | 860 | |
708f4980 | 861 | (* no need complex analysis for ifdefbool *) |
ae4735db | 862 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
708f4980 C |
863 | List.iter aux xxs |
864 | ) | |
865 | in aux xs | |
34e49164 C |
866 | |
867 | ||
708f4980 C |
868 | (* ------------------------------------------------------------------------- *) |
869 | (* cpp-builtin part2, macro, using standard.h or other defs *) | |
870 | (* ------------------------------------------------------------------------- *) | |
34e49164 | 871 | |
ae4735db | 872 | (* now in cpp_token_c.ml *) |
34e49164 C |
873 | |
874 | (* ------------------------------------------------------------------------- *) | |
875 | (* stringification *) | |
876 | (* ------------------------------------------------------------------------- *) | |
877 | ||
ae4735db | 878 | let rec find_string_macro_paren xs = |
34e49164 C |
879 | match xs with |
880 | | [] -> () | |
ae4735db C |
881 | | Parenthised(xxs, info_parens)::xs -> |
882 | xxs +> List.iter (fun xs -> | |
883 | if xs +> List.exists | |
485bce71 | 884 | (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) && |
ae4735db C |
885 | xs +> List.for_all |
886 | (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) -> | |
34e49164 C |
887 | true | _ -> false) |
888 | then | |
ae4735db | 889 | xs +> List.iter (fun tok -> |
34e49164 | 890 | match tok with |
ae4735db | 891 | | PToken({tok = TIdent (s,_)} as id) -> |
34e49164 | 892 | msg_stringification s; |
485bce71 | 893 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
894 | | _ -> () |
895 | ) | |
ae4735db | 896 | else |
34e49164 C |
897 | find_string_macro_paren xs |
898 | ); | |
899 | find_string_macro_paren xs | |
ae4735db | 900 | | PToken(tok)::xs -> |
34e49164 | 901 | find_string_macro_paren xs |
ae4735db | 902 | |
34e49164 C |
903 | |
904 | (* ------------------------------------------------------------------------- *) | |
905 | (* macro2 *) | |
906 | (* ------------------------------------------------------------------------- *) | |
907 | ||
908 | (* don't forget to recurse in each case *) | |
ae4735db | 909 | let rec find_macro_paren xs = |
34e49164 C |
910 | match xs with |
911 | | [] -> () | |
ae4735db | 912 | |
34e49164 C |
913 | (* attribute *) |
914 | | PToken ({tok = Tattribute _} as id) | |
915 | ::Parenthised (xxs,info_parens) | |
916 | ::xs | |
ae4735db | 917 | -> |
34e49164 | 918 | pr2_cpp ("MACRO: __attribute detected "); |
ae4735db | 919 | [Parenthised (xxs, info_parens)] +> |
0708f913 C |
920 | iter_token_paren (set_as_comment Token_c.CppAttr); |
921 | set_as_comment Token_c.CppAttr id; | |
34e49164 C |
922 | find_macro_paren xs |
923 | ||
978fd7e5 C |
924 | | PToken ({tok = TattributeNoarg _} as id) |
925 | ::xs | |
ae4735db | 926 | -> |
978fd7e5 C |
927 | pr2_cpp ("MACRO: __attributenoarg detected "); |
928 | set_as_comment Token_c.CppAttr id; | |
929 | find_macro_paren xs | |
930 | ||
485bce71 | 931 | (* |
708f4980 | 932 | (* attribute cpp, __xxx id *) |
485bce71 | 933 | | PToken ({tok = TIdent (s,i1)} as id) |
708f4980 | 934 | ::PToken ({tok = TIdent (s2, i2)} as id2) |
485bce71 | 935 | ::xs when s ==~ regexp_annot |
ae4735db | 936 | -> |
485bce71 C |
937 | msg_attribute s; |
938 | id.tok <- TMacroAttr (s, i1); | |
708f4980 | 939 | find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *) |
485bce71 | 940 | |
708f4980 C |
941 | (* attribute cpp, id __xxx *) |
942 | | PToken ({tok = TIdent (s,i1)} as _id) | |
943 | ::PToken ({tok = TIdent (s2, i2)} as id2) | |
944 | ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef)) | |
ae4735db | 945 | -> |
485bce71 | 946 | msg_attribute s2; |
708f4980 C |
947 | id2.tok <- TMacroAttr (s2, i2); |
948 | find_macro_paren xs | |
949 | ||
950 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
951 | ::PToken ({tok = TIdent (s,i1)} as attr) | |
952 | ::xs when s ==~ regexp_annot | |
ae4735db | 953 | -> |
708f4980 C |
954 | pr2_cpp ("storage attribute: " ^ s); |
955 | attr.tok <- TMacroAttrStorage (s,i1); | |
956 | (* recurse, may have other storage attributes *) | |
957 | find_macro_paren (PToken (tok1)::xs) | |
ae4735db | 958 | |
708f4980 | 959 | |
485bce71 C |
960 | *) |
961 | ||
962 | (* storage attribute *) | |
963 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
ae4735db C |
964 | ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs |
965 | -> | |
485bce71 C |
966 | pr2_cpp ("storage attribute: " ^ s); |
967 | attr.tok <- TMacroAttrStorage (s,i1); | |
968 | (* recurse, may have other storage attributes *) | |
969 | find_macro_paren (PToken (tok1)::xs) | |
708f4980 | 970 | |
485bce71 | 971 | |
34e49164 | 972 | (* stringification |
ae4735db | 973 | * |
34e49164 | 974 | * the order of the matching clause is important |
ae4735db | 975 | * |
34e49164 C |
976 | *) |
977 | ||
978 | (* string macro with params, before case *) | |
485bce71 | 979 | | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
34e49164 | 980 | ::Parenthised (xxs, info_parens) |
ae4735db | 981 | ::xs -> |
485bce71 C |
982 | |
983 | msg_stringification_params s; | |
984 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 985 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 986 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
987 | find_macro_paren xs |
988 | ||
989 | (* after case *) | |
990 | | PToken ({tok = TIdent (s,_)} as id) | |
991 | ::Parenthised (xxs, info_parens) | |
485bce71 | 992 | ::PToken ({tok = (TString _ | TMacroString _)}) |
ae4735db | 993 | ::xs -> |
485bce71 C |
994 | |
995 | msg_stringification_params s; | |
996 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 997 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 998 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
999 | find_macro_paren xs |
1000 | ||
1001 | ||
1002 | (* for the case where the string is not inside a funcall, but | |
1003 | * for instance in an initializer. | |
1004 | *) | |
ae4735db | 1005 | |
34e49164 | 1006 | (* string macro variable, before case *) |
485bce71 | 1007 | | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
ae4735db | 1008 | ::xs -> |
485bce71 | 1009 | |
34e49164 | 1010 | msg_stringification s; |
485bce71 | 1011 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1012 | find_macro_paren xs |
1013 | ||
1014 | (* after case *) | |
485bce71 C |
1015 | | PToken ({tok = TIdent (s,_)} as id) |
1016 | ::PToken ({tok = (TString _ | TMacroString _)}) | |
ae4735db | 1017 | ::xs -> |
485bce71 | 1018 | |
34e49164 | 1019 | msg_stringification s; |
485bce71 | 1020 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1021 | find_macro_paren xs |
1022 | ||
1023 | ||
ae4735db | 1024 | |
34e49164 C |
1025 | |
1026 | ||
1027 | (* recurse *) | |
ae4735db C |
1028 | | (PToken x)::xs -> find_macro_paren xs |
1029 | | (Parenthised (xxs, info_parens))::xs -> | |
34e49164 C |
1030 | xxs +> List.iter find_macro_paren; |
1031 | find_macro_paren xs | |
1032 | ||
1033 | ||
1034 | ||
1035 | ||
1036 | ||
1037 | (* don't forget to recurse in each case *) | |
ae4735db | 1038 | let rec find_macro_lineparen xs = |
34e49164 C |
1039 | match xs with |
1040 | | [] -> () | |
1041 | ||
1042 | (* linuxext: ex: static [const] DEVICE_ATTR(); *) | |
ae4735db | 1043 | | (Line |
34e49164 C |
1044 | ( |
1045 | [PToken ({tok = Tstatic _}); | |
1046 | PToken ({tok = TIdent (s,_)} as macro); | |
1047 | Parenthised (xxs,info_parens); | |
1048 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1049 | ] |
34e49164 | 1050 | )) |
ae4735db C |
1051 | ::xs |
1052 | when (s ==~ regexp_macro) -> | |
485bce71 | 1053 | |
34e49164 C |
1054 | msg_declare_macro s; |
1055 | let info = TH.info_of_tok macro.tok in | |
1056 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1057 | ||
1058 | find_macro_lineparen (xs) | |
1059 | ||
1060 | (* the static const case *) | |
ae4735db | 1061 | | (Line |
34e49164 C |
1062 | ( |
1063 | [PToken ({tok = Tstatic _}); | |
1064 | PToken ({tok = Tconst _} as const); | |
1065 | PToken ({tok = TIdent (s,_)} as macro); | |
1066 | Parenthised (xxs,info_parens); | |
1067 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1068 | ] |
34e49164 C |
1069 | (*as line1*) |
1070 | ||
1071 | )) | |
ae4735db C |
1072 | ::xs |
1073 | when (s ==~ regexp_macro) -> | |
485bce71 | 1074 | |
34e49164 C |
1075 | msg_declare_macro s; |
1076 | let info = TH.info_of_tok macro.tok in | |
1077 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
ae4735db C |
1078 | |
1079 | (* need retag this const, otherwise ambiguity in grammar | |
34e49164 C |
1080 | 21: shift/reduce conflict (shift 121, reduce 137) on Tconst |
1081 | decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ... | |
1082 | decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ... | |
1083 | storage_class_spec : Tstatic . (137) | |
1084 | *) | |
1085 | const.tok <- TMacroDeclConst (TH.info_of_tok const.tok); | |
1086 | ||
1087 | find_macro_lineparen (xs) | |
1088 | ||
1089 | ||
1090 | (* same but without trailing ';' | |
ae4735db | 1091 | * |
34e49164 C |
1092 | * I do not put the final ';' because it can be on a multiline and |
1093 | * because of the way mk_line is coded, we will not have access to | |
1094 | * this ';' on the next line, even if next to the ')' *) | |
ae4735db | 1095 | | (Line |
34e49164 C |
1096 | ([PToken ({tok = Tstatic _}); |
1097 | PToken ({tok = TIdent (s,_)} as macro); | |
1098 | Parenthised (xxs,info_parens); | |
ae4735db | 1099 | ] |
34e49164 | 1100 | )) |
ae4735db C |
1101 | ::xs |
1102 | when s ==~ regexp_macro -> | |
34e49164 C |
1103 | |
1104 | msg_declare_macro s; | |
1105 | let info = TH.info_of_tok macro.tok in | |
1106 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1107 | ||
1108 | find_macro_lineparen (xs) | |
1109 | ||
1110 | ||
1111 | ||
1112 | ||
1113 | (* on multiple lines *) | |
ae4735db | 1114 | | (Line |
34e49164 C |
1115 | ( |
1116 | (PToken ({tok = Tstatic _})::[] | |
1117 | ))) | |
ae4735db | 1118 | ::(Line |
34e49164 C |
1119 | ( |
1120 | [PToken ({tok = TIdent (s,_)} as macro); | |
1121 | Parenthised (xxs,info_parens); | |
1122 | PToken ({tok = TPtVirg _}); | |
1123 | ] | |
ae4735db | 1124 | ) |
34e49164 | 1125 | ) |
ae4735db C |
1126 | ::xs |
1127 | when (s ==~ regexp_macro) -> | |
485bce71 | 1128 | |
34e49164 C |
1129 | msg_declare_macro s; |
1130 | let info = TH.info_of_tok macro.tok in | |
1131 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1132 | ||
1133 | find_macro_lineparen (xs) | |
1134 | ||
1135 | ||
ae4735db C |
1136 | (* linuxext: ex: DECLARE_BITMAP(); |
1137 | * | |
34e49164 C |
1138 | * Here I use regexp_declare and not regexp_macro because |
1139 | * Sometimes it can be a FunCallMacro such as DEBUG(foo()); | |
1140 | * Here we don't have the preceding 'static' so only way to | |
1141 | * not have positive is to restrict to .*DECLARE.* macros. | |
1142 | * | |
1143 | * but there is a grammar rule for that, so don't need this case anymore | |
0708f913 | 1144 | * unless the parameter of the DECLARE_xxx are weird and can not be mapped |
34e49164 C |
1145 | * on a argument_list |
1146 | *) | |
ae4735db C |
1147 | |
1148 | | (Line | |
34e49164 C |
1149 | ([PToken ({tok = TIdent (s,_)} as macro); |
1150 | Parenthised (xxs,info_parens); | |
1151 | PToken ({tok = TPtVirg _}); | |
1152 | ] | |
1153 | )) | |
ae4735db C |
1154 | ::xs |
1155 | when (s ==~ regexp_declare) -> | |
34e49164 C |
1156 | |
1157 | msg_declare_macro s; | |
1158 | let info = TH.info_of_tok macro.tok in | |
1159 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1160 | ||
1161 | find_macro_lineparen (xs) | |
1162 | ||
ae4735db | 1163 | |
34e49164 C |
1164 | (* toplevel macros. |
1165 | * module_init(xxx) | |
ae4735db | 1166 | * |
34e49164 C |
1167 | * Could also transform the TIdent in a TMacroTop but can have false |
1168 | * positive, so easier to just change the TCPar and so just solve | |
1169 | * the end-of-stream pb of ocamlyacc | |
1170 | *) | |
ae4735db | 1171 | | (Line |
34e49164 C |
1172 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro); |
1173 | Parenthised (xxs,info_parens); | |
1174 | ] as _line1 | |
1175 | )) | |
b1b2de81 | 1176 | ::xs when col1 =|= 0 |
ae4735db C |
1177 | -> |
1178 | let condition = | |
34e49164 C |
1179 | (* to reduce number of false positive *) |
1180 | (match xs with | |
ae4735db | 1181 | | (Line (PToken ({col = col2 } as other)::restline2))::_ -> |
b1b2de81 | 1182 | TH.is_eof other.tok || (col2 =|= 0 && |
34e49164 C |
1183 | (match other.tok with |
1184 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1185 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1186 | | TPtVirg _ |
34e49164 C |
1187 | | TDotDot _ |
1188 | -> false | |
1189 | | tok when TH.is_binary_operator tok -> false | |
ae4735db | 1190 | |
34e49164 C |
1191 | | _ -> true |
1192 | ) | |
1193 | ) | |
1194 | | _ -> false | |
1195 | ) | |
1196 | in | |
1197 | if condition | |
1198 | then begin | |
485bce71 | 1199 | |
34e49164 C |
1200 | msg_macro_toplevel_noptvirg s; |
1201 | (* just to avoid the end-of-stream pb of ocamlyacc *) | |
1202 | let tcpar = Common.last info_parens in | |
1203 | tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok); | |
ae4735db | 1204 | |
34e49164 | 1205 | (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*) |
ae4735db | 1206 | |
34e49164 C |
1207 | end; |
1208 | ||
1209 | find_macro_lineparen (xs) | |
1210 | ||
1211 | ||
1212 | ||
ae4735db | 1213 | (* macro with parameters |
34e49164 C |
1214 | * ex: DEBUG() |
1215 | * return x; | |
1216 | *) | |
ae4735db | 1217 | | (Line |
34e49164 C |
1218 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1219 | Parenthised (xxs,info_parens); | |
1220 | ] as _line1 | |
1221 | )) | |
ae4735db | 1222 | ::(Line |
34e49164 C |
1223 | (PToken ({col = col2 } as other)::restline2 |
1224 | ) as line2) | |
ae4735db | 1225 | ::xs |
34e49164 | 1226 | (* when s ==~ regexp_macro *) |
ae4735db C |
1227 | -> |
1228 | let condition = | |
1229 | (col1 =|= col2 && | |
34e49164 C |
1230 | (match other.tok with |
1231 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1232 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1233 | | TPtVirg _ |
34e49164 C |
1234 | | TDotDot _ |
1235 | -> false | |
1236 | | tok when TH.is_binary_operator tok -> false | |
1237 | ||
1238 | | _ -> true | |
1239 | ) | |
ae4735db C |
1240 | ) |
1241 | || | |
34e49164 | 1242 | (col2 <= col1 && |
113803cf | 1243 | (match other.tok, restline2 with |
b1b2de81 | 1244 | | TCBrace _, _ when ctx =*= InFunction -> true |
113803cf C |
1245 | | Treturn _, _ -> true |
1246 | | Tif _, _ -> true | |
1247 | | Telse _, _ -> true | |
1248 | ||
1249 | (* case of label, usually put in first line *) | |
ae4735db | 1250 | | TIdent _, (PToken ({tok = TDotDot _}))::_ -> |
113803cf C |
1251 | true |
1252 | ||
34e49164 C |
1253 | |
1254 | | _ -> false | |
1255 | ) | |
1256 | ) | |
1257 | ||
1258 | in | |
ae4735db | 1259 | |
34e49164 | 1260 | if condition |
ae4735db | 1261 | then |
b1b2de81 | 1262 | if col1 =|= 0 then () |
34e49164 C |
1263 | else begin |
1264 | msg_macro_noptvirg s; | |
485bce71 | 1265 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
ae4735db | 1266 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1267 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1268 | end; |
1269 | ||
1270 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1271 | |
1272 | (* linuxext:? single macro | |
34e49164 C |
1273 | * ex: LOCK |
1274 | * foo(); | |
1275 | * UNLOCK | |
ae4735db | 1276 | * |
113803cf | 1277 | * todo: factorize code with previous rule ? |
34e49164 | 1278 | *) |
ae4735db | 1279 | | (Line |
34e49164 C |
1280 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1281 | ] as _line1 | |
1282 | )) | |
ae4735db | 1283 | ::(Line |
34e49164 C |
1284 | (PToken ({col = col2 } as other)::restline2 |
1285 | ) as line2) | |
ae4735db | 1286 | ::xs -> |
34e49164 | 1287 | (* when s ==~ regexp_macro *) |
ae4735db C |
1288 | |
1289 | let condition = | |
1290 | (col1 =|= col2 && | |
34e49164 C |
1291 | col1 <> 0 && (* otherwise can match typedef of fundecl*) |
1292 | (match other.tok with | |
ae4735db C |
1293 | | TPtVirg _ -> false |
1294 | | TOr _ -> false | |
34e49164 C |
1295 | | TCBrace _ when ctx <> InFunction -> false |
1296 | | tok when TH.is_binary_operator tok -> false | |
1297 | ||
1298 | | _ -> true | |
1299 | )) || | |
1300 | (col2 <= col1 && | |
1301 | (match other.tok with | |
b1b2de81 | 1302 | | TCBrace _ when ctx =*= InFunction -> true |
34e49164 C |
1303 | | Treturn _ -> true |
1304 | | Tif _ -> true | |
1305 | | Telse _ -> true | |
1306 | | _ -> false | |
1307 | )) | |
1308 | in | |
ae4735db | 1309 | |
34e49164 C |
1310 | if condition |
1311 | then begin | |
1312 | msg_macro_noptvirg_single s; | |
485bce71 | 1313 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
34e49164 C |
1314 | end; |
1315 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1316 | |
1317 | | x::xs -> | |
34e49164 C |
1318 | find_macro_lineparen xs |
1319 | ||
1320 | ||
485bce71 C |
1321 | |
1322 | (* ------------------------------------------------------------------------- *) | |
1323 | (* define tobrace init *) | |
1324 | (* ------------------------------------------------------------------------- *) | |
1325 | ||
ae4735db C |
1326 | let rec find_define_init_brace_paren xs = |
1327 | let rec aux xs = | |
485bce71 C |
1328 | match xs with |
1329 | | [] -> () | |
1330 | ||
1331 | (* mainly for firefox *) | |
1332 | | (PToken {tok = TDefine _}) | |
1333 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1334 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1335 | ::(PToken tok2) | |
1336 | ::(PToken tok3) | |
ae4735db | 1337 | ::xs -> |
485bce71 C |
1338 | let is_init = |
1339 | match tok2.tok, tok3.tok with | |
1340 | | TInt _, TComma _ -> true | |
1341 | | TString _, TComma _ -> true | |
1342 | | TIdent _, TComma _ -> true | |
1343 | | _ -> false | |
ae4735db | 1344 | |
485bce71 C |
1345 | in |
1346 | if is_init | |
ae4735db | 1347 | then begin |
485bce71 C |
1348 | pr2_cpp("found define initializer: " ^s); |
1349 | tokbrace.tok <- TOBraceDefineInit i1; | |
1350 | end; | |
1351 | ||
1352 | aux xs | |
1353 | ||
1354 | (* mainly for linux, especially in sound/ *) | |
1355 | | (PToken {tok = TDefine _}) | |
1356 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1357 | ::(Parenthised(xxx, info_parens)) | |
1358 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1359 | ::(PToken tok2) | |
1360 | ::(PToken tok3) | |
ae4735db | 1361 | ::xs -> |
485bce71 C |
1362 | let is_init = |
1363 | match tok2.tok, tok3.tok with | |
1364 | | TInt _, TComma _ -> true | |
1365 | | TDot _, TIdent _ -> true | |
1366 | | TIdent _, TComma _ -> true | |
1367 | | _ -> false | |
ae4735db | 1368 | |
485bce71 C |
1369 | in |
1370 | if is_init | |
ae4735db | 1371 | then begin |
485bce71 C |
1372 | pr2_cpp("found define initializer with param: " ^ s); |
1373 | tokbrace.tok <- TOBraceDefineInit i1; | |
1374 | end; | |
1375 | ||
1376 | aux xs | |
1377 | ||
ae4735db | 1378 | |
485bce71 C |
1379 | |
1380 | (* recurse *) | |
ae4735db C |
1381 | | (PToken x)::xs -> aux xs |
1382 | | (Parenthised (xxs, info_parens))::xs -> | |
485bce71 | 1383 | (* not need for tobrace init: |
ae4735db | 1384 | * xxs +> List.iter aux; |
485bce71 C |
1385 | *) |
1386 | aux xs | |
1387 | in | |
1388 | aux xs | |
1389 | ||
1390 | ||
34e49164 C |
1391 | (* ------------------------------------------------------------------------- *) |
1392 | (* action *) | |
1393 | (* ------------------------------------------------------------------------- *) | |
1394 | ||
708f4980 | 1395 | (* obsolete now with macro expansion ? get some regression if comment. |
ae4735db | 1396 | * todo: if do bad decision here, then it can influence other phases |
708f4980 | 1397 | * and make it hard to parse. So maybe when have a parse error, should |
ae4735db | 1398 | * undo some of the guess those heuristics have done, and restore |
708f4980 C |
1399 | * the original token value. |
1400 | *) | |
1401 | ||
34e49164 C |
1402 | let rec find_actions = function |
1403 | | [] -> () | |
1404 | ||
1405 | | PToken ({tok = TIdent (s,ii)}) | |
1406 | ::Parenthised (xxs,info_parens) | |
ae4735db | 1407 | ::xs -> |
34e49164 C |
1408 | find_actions xs; |
1409 | xxs +> List.iter find_actions; | |
1410 | let modified = find_actions_params xxs in | |
ae4735db | 1411 | if modified |
34e49164 | 1412 | then msg_macro_higher_order s |
ae4735db C |
1413 | |
1414 | | x::xs -> | |
34e49164 C |
1415 | find_actions xs |
1416 | ||
ae4735db C |
1417 | and find_actions_params xxs = |
1418 | xxs +> List.fold_left (fun acc xs -> | |
34e49164 | 1419 | let toks = tokens_of_paren xs in |
ae4735db C |
1420 | if toks +> List.exists (fun x -> TH.is_statement x.tok) |
1421 | (* undo: && List.length toks > 1 | |
708f4980 C |
1422 | * good for sparse, not good for linux |
1423 | *) | |
34e49164 | 1424 | then begin |
ae4735db | 1425 | xs +> iter_token_paren (fun x -> |
34e49164 | 1426 | if TH.is_eof x.tok |
ae4735db | 1427 | then |
34e49164 | 1428 | (* certainly because paren detection had a pb because of |
708f4980 C |
1429 | * some ifdef-exp. Do similar additional checking than |
1430 | * what is done in set_as_comment. | |
34e49164 | 1431 | *) |
708f4980 | 1432 | pr2 "PB: weird, I try to tag an EOF token as an action" |
ae4735db | 1433 | else |
708f4980 | 1434 | (* cf tests-bis/no_cpar_macro.c *) |
ae4735db C |
1435 | if TH.is_eom x.tok |
1436 | then | |
708f4980 | 1437 | pr2 "PB: weird, I try to tag an EOM token as an action" |
ae4735db | 1438 | else |
708f4980 | 1439 | x.tok <- TAction (TH.info_of_tok x.tok); |
34e49164 C |
1440 | ); |
1441 | true (* modified *) | |
1442 | end | |
1443 | else acc | |
1444 | ) false | |
1445 | ||
1446 | ||
1447 | ||
1448 | (* ------------------------------------------------------------------------- *) | |
1449 | (* main fix cpp function *) | |
1450 | (* ------------------------------------------------------------------------- *) | |
1451 | ||
ae4735db | 1452 | let filter_cpp_stuff xs = |
951c7801 C |
1453 | List.filter |
1454 | (function x -> | |
1455 | (match x.tok with | |
1456 | | tok when TH.is_comment tok -> false | |
34e49164 C |
1457 | (* don't want drop the define, or if drop, have to drop |
1458 | * also its body otherwise the line heuristics may be lost | |
1459 | * by not finding the TDefine in column 0 but by finding | |
1460 | * a TDefineIdent in a column > 0 | |
1461 | *) | |
951c7801 C |
1462 | | Parser_c.TDefine _ -> true |
1463 | | tok when TH.is_cpp_instruction tok -> false | |
1464 | | _ -> true | |
1465 | )) | |
1466 | xs | |
34e49164 C |
1467 | |
1468 | let insert_virtual_positions l = | |
1469 | let strlen x = String.length (Ast_c.str_of_info x) in | |
708f4980 C |
1470 | let rec loop prev offset acc = function |
1471 | [] -> List.rev acc | |
34e49164 C |
1472 | | x::xs -> |
1473 | let ii = TH.info_of_tok x in | |
1474 | let inject pi = | |
1475 | TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in | |
1476 | match Ast_c.pinfo_of_info ii with | |
1477 | Ast_c.OriginTok pi -> | |
1478 | let prev = Ast_c.parse_info_of_info ii in | |
ae4735db | 1479 | loop prev (strlen ii) (x::acc) xs |
34e49164 | 1480 | | Ast_c.ExpandedTok (pi,_) -> |
708f4980 | 1481 | let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in |
ae4735db | 1482 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 | 1483 | | Ast_c.FakeTok (s,_) -> |
708f4980 | 1484 | let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in |
ae4735db | 1485 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 C |
1486 | | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in |
1487 | let rec skip_fake = function | |
708f4980 | 1488 | | [] -> [] |
34e49164 C |
1489 | | x::xs -> |
1490 | let ii = TH.info_of_tok x in | |
1491 | match Ast_c.pinfo_of_info ii with | |
708f4980 | 1492 | | Ast_c.OriginTok pi -> |
34e49164 | 1493 | let prev = Ast_c.parse_info_of_info ii in |
708f4980 C |
1494 | let res = loop prev (strlen ii) [] xs in |
1495 | x::res | |
34e49164 | 1496 | | _ -> x::skip_fake xs in |
ae4735db | 1497 | skip_fake l |
708f4980 | 1498 | |
34e49164 | 1499 | |
485bce71 | 1500 | (* ------------------------------------------------------------------------- *) |
ae4735db | 1501 | let fix_tokens_cpp2 ~macro_defs tokens = |
708f4980 | 1502 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in |
ae4735db C |
1503 | |
1504 | begin | |
34e49164 C |
1505 | (* the order is important, if you put the action heuristic first, |
1506 | * then because of ifdef, can have not closed paren | |
ae4735db C |
1507 | * and so may believe that higher order macro |
1508 | * and it will eat too much tokens. So important to do | |
34e49164 | 1509 | * first the ifdef. |
ae4735db | 1510 | * |
34e49164 C |
1511 | * I recompute multiple times cleaner cos the mutable |
1512 | * can have be changed and so may have more comments | |
1513 | * in the token original list. | |
ae4735db | 1514 | * |
34e49164 C |
1515 | *) |
1516 | ||
0708f913 C |
1517 | commentize_skip_start_to_end !tokens2; |
1518 | ||
34e49164 | 1519 | (* ifdef *) |
ae4735db C |
1520 | let cleaner = !tokens2 +> List.filter (fun x -> |
1521 | (* is_comment will also filter the TCommentCpp created in | |
0708f913 | 1522 | * commentize_skip_start_to_end *) |
34e49164 C |
1523 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1524 | ) in | |
708f4980 | 1525 | let ifdef_grouped = TV.mk_ifdef cleaner in |
485bce71 C |
1526 | set_ifdef_parenthize_info ifdef_grouped; |
1527 | ||
34e49164 C |
1528 | find_ifdef_funheaders ifdef_grouped; |
1529 | find_ifdef_bool ifdef_grouped; | |
1530 | find_ifdef_mid ifdef_grouped; | |
ae4735db | 1531 | (* change order ? maybe cparen_else heuristic make some of the funheaders |
708f4980 C |
1532 | * heuristics irrelevant ? |
1533 | *) | |
ae4735db | 1534 | find_ifdef_cparen_else ifdef_grouped; |
34e49164 C |
1535 | adjust_inifdef_include ifdef_grouped; |
1536 | ||
1537 | ||
1538 | (* macro 1 *) | |
1539 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1540 | ||
708f4980 C |
1541 | let paren_grouped = TV.mk_parenthised cleaner in |
1542 | Cpp_token_c.apply_macro_defs | |
ae4735db C |
1543 | ~msg_apply_known_macro |
1544 | ~msg_apply_known_macro_hint | |
708f4980 | 1545 | macro_defs paren_grouped; |
34e49164 | 1546 | (* because the before field is used by apply_macro_defs *) |
ae4735db | 1547 | tokens2 := TV.rebuild_tokens_extented !tokens2; |
34e49164 C |
1548 | |
1549 | (* tagging contextual info (InFunc, InStruct, etc). Better to do | |
1550 | * that after the "ifdef-simplification" phase. | |
1551 | *) | |
ae4735db | 1552 | let cleaner = !tokens2 +> List.filter (fun x -> |
34e49164 C |
1553 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1554 | ) in | |
1555 | ||
708f4980 | 1556 | let brace_grouped = TV.mk_braceised cleaner in |
34e49164 C |
1557 | set_context_tag brace_grouped; |
1558 | ||
34e49164 C |
1559 | (* macro *) |
1560 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1561 | ||
708f4980 C |
1562 | let paren_grouped = TV.mk_parenthised cleaner in |
1563 | let line_paren_grouped = TV.mk_line_parenthised paren_grouped in | |
485bce71 | 1564 | find_define_init_brace_paren paren_grouped; |
34e49164 C |
1565 | find_string_macro_paren paren_grouped; |
1566 | find_macro_lineparen line_paren_grouped; | |
1567 | find_macro_paren paren_grouped; | |
1568 | ||
1569 | ||
708f4980 | 1570 | (* obsolete: actions ? not yet *) |
34e49164 | 1571 | let cleaner = !tokens2 +> filter_cpp_stuff in |
708f4980 | 1572 | let paren_grouped = TV.mk_parenthised cleaner in |
34e49164 | 1573 | find_actions paren_grouped; |
ae4735db | 1574 | |
34e49164 C |
1575 | |
1576 | ||
708f4980 | 1577 | insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok)) |
34e49164 C |
1578 | end |
1579 | ||
ae4735db | 1580 | let time_hack1 ~macro_defs a = |
708f4980 | 1581 | Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a) |
34e49164 | 1582 | |
ae4735db | 1583 | let fix_tokens_cpp ~macro_defs a = |
708f4980 | 1584 | Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a) |
34e49164 | 1585 | |
34e49164 | 1586 | |
34e49164 | 1587 | |
34e49164 C |
1588 | |
1589 | (*****************************************************************************) | |
1590 | (* Lexing with lookahead *) | |
1591 | (*****************************************************************************) | |
1592 | ||
1593 | (* Why using yet another parsing_hack technique ? The fix_xxx where do | |
ae4735db | 1594 | * some pre-processing on the full list of tokens is not enough ? |
34e49164 C |
1595 | * No cos sometimes we need more contextual info, and even if |
1596 | * set_context() tries to give some contextual info, it's not completely | |
1597 | * accurate so the following code give yet another alternative, yet another | |
1598 | * chance to transform some tokens. | |
ae4735db | 1599 | * |
34e49164 C |
1600 | * todo?: maybe could try to get rid of this technique. Maybe a better |
1601 | * set_context() would make possible to move this code using a fix_xx | |
1602 | * technique. | |
ae4735db | 1603 | * |
485bce71 | 1604 | * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but |
34e49164 C |
1605 | * it is more general to do it via my LALR(k) tech. Because here we can |
1606 | * transform some token give some context information. So sometimes it | |
1607 | * makes sense to transform a token in one context, sometimes not, and | |
1608 | * lex can not provide us this context information. Note that the order | |
ae4735db C |
1609 | * in the pattern matching in lookahead is important. Do not cut/paste. |
1610 | * | |
34e49164 C |
1611 | * Note that in next there is only "clean" tokens, there is no comment |
1612 | * or space tokens. This is done by the caller. | |
ae4735db | 1613 | * |
34e49164 C |
1614 | *) |
1615 | ||
485bce71 C |
1616 | open Lexer_parser (* for the fields of lexer_hint type *) |
1617 | ||
1618 | let not_struct_enum = function | |
1619 | | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false | |
1620 | | _ -> true | |
34e49164 | 1621 | |
485bce71 | 1622 | |
ae4735db | 1623 | let lookahead2 ~pass next before = |
34e49164 C |
1624 | |
1625 | match (next, before) with | |
1626 | ||
1627 | (*-------------------------------------------------------------*) | |
1628 | (* typedef inference, parse_typedef_fix3 *) | |
1629 | (*-------------------------------------------------------------*) | |
1630 | (* xx xx *) | |
b1b2de81 | 1631 | | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2 |
34e49164 C |
1632 | && ok_typedef s |
1633 | (* (take_safe 1 !passed_tok <> [TOPar]) -> *) | |
ae4735db | 1634 | -> |
34e49164 C |
1635 | (* parse_typedef_fix3: |
1636 | * acpi_object acpi_object; | |
ae4735db | 1637 | * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec. |
34e49164 C |
1638 | * Le parser en interne a deja appelé le prochain token pour pouvoir |
1639 | * decider des choses. | |
1640 | * => special case in lexer_heuristic, again | |
1641 | *) | |
ae4735db C |
1642 | if !Flag_parsing_c.debug_typedef |
1643 | then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s); | |
34e49164 C |
1644 | |
1645 | LP.disable_typedef(); | |
1646 | ||
1647 | msg_typedef s; LP.add_typedef_root s; | |
1648 | TypedefIdent (s, i1) | |
1649 | ||
1650 | (* xx yy *) | |
ae4735db | 1651 | | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before |
34e49164 C |
1652 | && ok_typedef s |
1653 | -> | |
1654 | (* && not_annot s2 BUT lead to false positive*) | |
1655 | ||
1656 | msg_typedef s; LP.add_typedef_root s; | |
1657 | TypedefIdent (s, i1) | |
1658 | ||
1659 | ||
1660 | (* xx inline *) | |
ae4735db | 1661 | | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before |
34e49164 | 1662 | && ok_typedef s |
ae4735db | 1663 | -> |
34e49164 C |
1664 | msg_typedef s; LP.add_typedef_root s; |
1665 | TypedefIdent (s, i1) | |
1666 | ||
1667 | ||
1668 | (* [,(] xx [,)] AND param decl *) | |
1669 | | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ ) | |
b1b2de81 | 1670 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
34e49164 | 1671 | && ok_typedef s |
ae4735db | 1672 | -> |
34e49164 C |
1673 | msg_typedef s; LP.add_typedef_root s; |
1674 | TypedefIdent (s, i1) | |
1675 | ||
1676 | (* xx* [,)] *) | |
1677 | (* specialcase: [,(] xx* [,)] *) | |
1678 | | (TIdent (s, i1)::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) | |
1679 | when not_struct_enum before | |
1680 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1681 | && ok_typedef s | |
ae4735db | 1682 | -> |
34e49164 C |
1683 | msg_typedef s; LP.add_typedef_root s; |
1684 | TypedefIdent (s, i1) | |
1685 | ||
1686 | ||
1687 | (* xx** [,)] *) | |
1688 | (* specialcase: [,(] xx** [,)] *) | |
1689 | | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) | |
1690 | when not_struct_enum before | |
1691 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1692 | && ok_typedef s | |
ae4735db | 1693 | -> |
34e49164 C |
1694 | msg_typedef s; LP.add_typedef_root s; |
1695 | TypedefIdent (s, i1) | |
1696 | ||
1697 | ||
1698 | ||
1699 | (* xx const * USELESS because of next rule ? *) | |
ae4735db C |
1700 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ ) |
1701 | when not_struct_enum before | |
34e49164 C |
1702 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1703 | && ok_typedef s | |
1704 | -> | |
1705 | ||
1706 | msg_typedef s; LP.add_typedef_root s; | |
1707 | TypedefIdent (s, i1) | |
ae4735db | 1708 | |
34e49164 | 1709 | (* xx const *) |
ae4735db C |
1710 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ ) |
1711 | when not_struct_enum before | |
34e49164 C |
1712 | && ok_typedef s |
1713 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1714 | -> | |
1715 | ||
1716 | msg_typedef s; LP.add_typedef_root s; | |
1717 | TypedefIdent (s, i1) | |
1718 | ||
1719 | ||
1720 | (* xx * const *) | |
ae4735db C |
1721 | | (TIdent (s, i1)::TMul _::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ ) |
1722 | when not_struct_enum before | |
34e49164 C |
1723 | && ok_typedef s |
1724 | -> | |
1725 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1726 | ||
1727 | msg_typedef s; LP.add_typedef_root s; | |
1728 | TypedefIdent (s, i1) | |
1729 | ||
1730 | ||
1731 | (* ( const xx) *) | |
485bce71 | 1732 | | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when |
34e49164 C |
1733 | ok_typedef s -> |
1734 | msg_typedef s; LP.add_typedef_root s; | |
1735 | TypedefIdent (s, i1) | |
ae4735db | 1736 | |
34e49164 C |
1737 | |
1738 | ||
1739 | (* ( xx ) [sizeof, ~] *) | |
485bce71 | 1740 | | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ ) |
34e49164 C |
1741 | when not_struct_enum before |
1742 | && ok_typedef s | |
ae4735db | 1743 | -> |
34e49164 C |
1744 | msg_typedef s; LP.add_typedef_root s; |
1745 | TypedefIdent (s, i1) | |
1746 | ||
1747 | (* [(,] xx [ AND parameterdeclaration *) | |
1748 | | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_) | |
b1b2de81 | 1749 | when (LP.current_context() =*= LP.InParameter) |
34e49164 | 1750 | && ok_typedef s |
ae4735db | 1751 | -> |
34e49164 C |
1752 | msg_typedef s; LP.add_typedef_root s; |
1753 | TypedefIdent (s, i1) | |
ae4735db | 1754 | |
34e49164 C |
1755 | (*------------------------------------------------------------*) |
1756 | (* if 'x*y' maybe an expr, maybe just a classic multiplication *) | |
1757 | (* but if have a '=', or ',' I think not *) | |
1758 | (*------------------------------------------------------------*) | |
1759 | ||
1760 | (* static xx * yy *) | |
ae4735db | 1761 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , |
485bce71 | 1762 | (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when |
ae4735db | 1763 | ok_typedef s |
34e49164 C |
1764 | -> |
1765 | msg_typedef s; LP.add_typedef_root s; | |
1766 | TypedefIdent (s, i1) | |
ae4735db | 1767 | |
34e49164 C |
1768 | (* TODO xx * yy ; AND in start of compound element *) |
1769 | ||
1770 | ||
1771 | (* xx * yy, AND in paramdecl *) | |
1772 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _) | |
b1b2de81 | 1773 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
ae4735db C |
1774 | && ok_typedef s |
1775 | -> | |
34e49164 C |
1776 | |
1777 | msg_typedef s; LP.add_typedef_root s; | |
1778 | TypedefIdent (s, i1) | |
1779 | ||
1780 | ||
1781 | (* xx * yy ; AND in Toplevel, except when have = before *) | |
1782 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) -> | |
1783 | TIdent (s, i1) | |
1784 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , _) | |
485bce71 | 1785 | when not_struct_enum before && (LP.is_top_or_struct (LP.current_context ())) |
ae4735db | 1786 | -> |
34e49164 C |
1787 | msg_typedef s; LP.add_typedef_root s; |
1788 | TypedefIdent (s, i1) | |
1789 | ||
1790 | (* xx * yy , AND in Toplevel *) | |
1791 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , _) | |
b1b2de81 | 1792 | when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel) |
ae4735db C |
1793 | && ok_typedef s |
1794 | -> | |
34e49164 C |
1795 | |
1796 | msg_typedef s; LP.add_typedef_root s; | |
1797 | TypedefIdent (s, i1) | |
1798 | ||
1799 | (* xx * yy ( AND in Toplevel *) | |
1800 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOPar _::_ , _) | |
ae4735db | 1801 | when not_struct_enum before |
485bce71 | 1802 | && (LP.is_top_or_struct (LP.current_context ())) |
ae4735db | 1803 | && ok_typedef s |
34e49164 C |
1804 | -> |
1805 | msg_typedef s; LP.add_typedef_root s; | |
1806 | TypedefIdent (s, i1) | |
ae4735db | 1807 | |
34e49164 C |
1808 | (* xx * yy [ *) |
1809 | (* todo? enough ? cos in struct def we can have some expression ! *) | |
1810 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TOCro _::_ , _) | |
ae4735db | 1811 | when not_struct_enum before && |
485bce71 | 1812 | (LP.is_top_or_struct (LP.current_context ())) |
ae4735db C |
1813 | && ok_typedef s |
1814 | -> | |
34e49164 C |
1815 | msg_typedef s; LP.add_typedef_root s; |
1816 | TypedefIdent (s, i1) | |
1817 | ||
1818 | (* u16: 10; in struct *) | |
1819 | | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_) | |
485bce71 | 1820 | when (LP.is_top_or_struct (LP.current_context ())) |
ae4735db C |
1821 | && ok_typedef s |
1822 | -> | |
34e49164 C |
1823 | msg_typedef s; LP.add_typedef_root s; |
1824 | TypedefIdent (s, i1) | |
ae4735db | 1825 | |
34e49164 C |
1826 | |
1827 | (* why need TOPar condition as stated in preceding rule ? really needed ? *) | |
1828 | (* YES cos at toplevel can have some expression !! for instance when *) | |
1829 | (* enter in the dimension of an array *) | |
1830 | (* | |
1831 | | (TIdent s::TMul::TIdent s2::_ , _) | |
1832 | when (take_safe 1 !passed_tok <> [Tstruct] && | |
1833 | (take_safe 1 !passed_tok <> [Tenum])) | |
1834 | && | |
ae4735db | 1835 | !LP._lexer_hint = Some LP.Toplevel -> |
708f4980 | 1836 | msg_typedef s; LP.add_typedef_root s; |
34e49164 C |
1837 | TypedefIdent s |
1838 | *) | |
1839 | ||
1840 | (* xx * yy = *) | |
1841 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TEq _::_ , _) | |
ae4735db C |
1842 | when not_struct_enum before |
1843 | && ok_typedef s | |
34e49164 C |
1844 | -> |
1845 | msg_typedef s; LP.add_typedef_root s; | |
1846 | TypedefIdent (s, i1) | |
1847 | ||
1848 | ||
1849 | (* xx * yy) AND in paramdecl *) | |
1850 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TCPar _::_ , _) | |
b1b2de81 | 1851 | when not_struct_enum before && (LP.current_context () =*= LP.InParameter) |
ae4735db | 1852 | && ok_typedef s |
34e49164 C |
1853 | -> |
1854 | msg_typedef s; LP.add_typedef_root s; | |
1855 | TypedefIdent (s, i1) | |
ae4735db | 1856 | |
34e49164 C |
1857 | |
1858 | (* xx * yy; *) (* wrong ? *) | |
ae4735db C |
1859 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , |
1860 | (TOBrace _| TPtVirg _)::_) when not_struct_enum before | |
1861 | && ok_typedef s | |
34e49164 C |
1862 | -> |
1863 | msg_typedef s; LP.add_typedef_root s; | |
485bce71 | 1864 | msg_maybe_dangereous_typedef s; |
34e49164 C |
1865 | TypedefIdent (s, i1) |
1866 | ||
1867 | ||
1868 | (* xx * yy, and ';' before xx *) (* wrong ? *) | |
ae4735db | 1869 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TComma _::_ , |
34e49164 | 1870 | (TOBrace _| TPtVirg _)::_) when |
ae4735db | 1871 | ok_typedef s |
34e49164 C |
1872 | -> |
1873 | msg_typedef s; LP.add_typedef_root s; | |
1874 | TypedefIdent (s, i1) | |
1875 | ||
1876 | ||
1877 | (* xx_t * yy *) | |
ae4735db C |
1878 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::_ , _) |
1879 | when s ==~ regexp_typedef && not_struct_enum before | |
1880 | (* struct user_info_t sometimes *) | |
1881 | && ok_typedef s | |
1882 | -> | |
34e49164 C |
1883 | msg_typedef s; LP.add_typedef_root s; |
1884 | TypedefIdent (s, i1) | |
1885 | ||
1886 | (* xx ** yy *) (* wrong ? *) | |
1887 | | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
1888 | when not_struct_enum before | |
1889 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
ae4735db | 1890 | && ok_typedef s |
34e49164 C |
1891 | -> |
1892 | msg_typedef s; LP.add_typedef_root s; | |
1893 | TypedefIdent (s, i1) | |
1894 | ||
1895 | (* xx *** yy *) | |
1896 | | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
ae4735db C |
1897 | when not_struct_enum before |
1898 | && ok_typedef s | |
34e49164 C |
1899 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1900 | -> | |
1901 | msg_typedef s; LP.add_typedef_root s; | |
1902 | TypedefIdent (s, i1) | |
1903 | ||
1904 | (* xx ** ) *) | |
1905 | | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _) | |
ae4735db | 1906 | when not_struct_enum before |
34e49164 | 1907 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
ae4735db | 1908 | && ok_typedef s |
34e49164 C |
1909 | -> |
1910 | msg_typedef s; LP.add_typedef_root s; | |
1911 | TypedefIdent (s, i1) | |
1912 | ||
1913 | ||
1914 | ||
1915 | (* ----------------------------------- *) | |
ae4735db | 1916 | (* old: why not do like for other rules and start with TIdent ? |
485bce71 C |
1917 | * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ? |
1918 | * new: prefer now start with TIdent because otherwise the add_typedef_root | |
1919 | * may have no effect if in second pass or if have disable the add_typedef. | |
1920 | *) | |
34e49164 C |
1921 | |
1922 | (* (xx) yy *) | |
ae4735db C |
1923 | | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ , |
1924 | (TOPar info)::x::_) | |
34e49164 | 1925 | when not (TH.is_stuff_taking_parenthized x) && |
b1b2de81 | 1926 | Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 |
ae4735db C |
1927 | && ok_typedef s |
1928 | -> | |
34e49164 C |
1929 | |
1930 | msg_typedef s; LP.add_typedef_root s; | |
485bce71 C |
1931 | (*TOPar info*) |
1932 | TypedefIdent (s, i1) | |
34e49164 C |
1933 | |
1934 | ||
ae4735db | 1935 | (* (xx) ( yy) |
91eba41f C |
1936 | * but false positif: typedef int (xxx_t)(...), so do specialisation below. |
1937 | *) | |
1938 | (* | |
ae4735db C |
1939 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_) |
1940 | when not (TH.is_stuff_taking_parenthized x) | |
1941 | && ok_typedef s | |
34e49164 C |
1942 | -> |
1943 | msg_typedef s; LP.add_typedef_root s; | |
485bce71 C |
1944 | (* TOPar info *) |
1945 | TypedefIdent (s, i1) | |
91eba41f C |
1946 | *) |
1947 | (* special case: = (xx) ( yy) *) | |
ae4735db | 1948 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , |
91eba41f | 1949 | (TOPar info)::(TEq _ |TEqEq _)::_) |
ae4735db | 1950 | when ok_typedef s |
91eba41f C |
1951 | -> |
1952 | msg_typedef s; LP.add_typedef_root s; | |
1953 | (* TOPar info *) | |
1954 | TypedefIdent (s, i1) | |
1955 | ||
34e49164 C |
1956 | |
1957 | (* (xx * ) yy *) | |
ae4735db C |
1958 | | (TIdent (s, i1)::TMul _::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) when |
1959 | ok_typedef s | |
1960 | -> | |
34e49164 | 1961 | msg_typedef s; LP.add_typedef_root s; |
485bce71 C |
1962 | (*TOPar info*) |
1963 | TypedefIdent (s,i1) | |
1964 | ||
34e49164 C |
1965 | |
1966 | (* (xx){ ... } constructor *) | |
ae4735db C |
1967 | | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_) |
1968 | when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x) | |
1969 | && ok_typedef s | |
34e49164 C |
1970 | -> |
1971 | msg_typedef s; LP.add_typedef_root s; | |
1972 | TypedefIdent (s, i1) | |
1973 | ||
1974 | ||
1975 | (* can have sizeof on expression | |
ae4735db | 1976 | | (Tsizeof::TOPar::TIdent s::TCPar::_, _) -> |
708f4980 | 1977 | msg_typedef s; LP.add_typedef_root s; |
34e49164 C |
1978 | Tsizeof |
1979 | *) | |
91eba41f C |
1980 | |
1981 | ||
1982 | (* ----------------------------------- *) | |
1983 | (* x ( *y )(params), function pointer *) | |
ae4735db | 1984 | | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
34e49164 | 1985 | when not_struct_enum before |
ae4735db | 1986 | && ok_typedef s |
34e49164 C |
1987 | -> |
1988 | msg_typedef s; LP.add_typedef_root s; | |
1989 | TypedefIdent (s, i1) | |
1990 | ||
91eba41f | 1991 | (* x* ( *y )(params), function pointer 2 *) |
ae4735db | 1992 | | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
91eba41f | 1993 | when not_struct_enum before |
ae4735db | 1994 | && ok_typedef s |
91eba41f C |
1995 | -> |
1996 | msg_typedef s; LP.add_typedef_root s; | |
1997 | TypedefIdent (s, i1) | |
1998 | ||
34e49164 C |
1999 | |
2000 | (*-------------------------------------------------------------*) | |
2001 | (* CPP *) | |
2002 | (*-------------------------------------------------------------*) | |
485bce71 C |
2003 | | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) | |
2004 | TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii)) | |
34e49164 | 2005 | as x) |
ae4735db C |
2006 | ::_, _ |
2007 | -> | |
485bce71 | 2008 | (* |
ae4735db | 2009 | if not !Flag_parsing_c.ifdef_to_if |
34e49164 | 2010 | then TCommentCpp (Ast_c.CppDirective, ii) |
ae4735db | 2011 | else |
485bce71 | 2012 | *) |
0708f913 C |
2013 | (* not !LP._lexer_hint.toplevel *) |
2014 | if !Flag_parsing_c.ifdef_directive_passing | |
708f4980 | 2015 | || (pass >= 2) |
0708f913 | 2016 | then begin |
ae4735db | 2017 | |
b1b2de81 | 2018 | if (LP.current_context () =*= LP.InInitializer) |
ae4735db | 2019 | then begin |
0708f913 C |
2020 | pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *) |
2021 | incr Stat.nIfdefInitializer; | |
ae4735db | 2022 | end else begin |
708f4980 | 2023 | pr2_cpp("IFDEF: or related inside function. I treat it as comment"); |
0708f913 C |
2024 | incr Stat.nIfdefPassing; |
2025 | end; | |
2026 | TCommentCpp (Token_c.CppDirective, ii) | |
2027 | end | |
2028 | else x | |
ae4735db C |
2029 | |
2030 | | (TUndef (id, ii) as x)::_, _ | |
2031 | -> | |
708f4980 | 2032 | if (pass >= 2) |
485bce71 | 2033 | then begin |
0708f913 C |
2034 | pr2_cpp("UNDEF: I treat it as comment"); |
2035 | TCommentCpp (Token_c.CppDirective, ii) | |
113803cf C |
2036 | end |
2037 | else x | |
2038 | ||
ae4735db C |
2039 | | (TCppDirectiveOther (ii) as x)::_, _ |
2040 | -> | |
708f4980 | 2041 | if (pass >= 2) |
113803cf | 2042 | then begin |
0708f913 C |
2043 | pr2_cpp ("OTHER directive: I treat it as comment"); |
2044 | TCommentCpp (Token_c.CppDirective, ii) | |
485bce71 C |
2045 | end |
2046 | else x | |
34e49164 C |
2047 | |
2048 | (* If ident contain a for_each, then certainly a macro. But to be | |
2049 | * sure should look if there is a '{' after the ')', but it requires | |
2050 | * to count the '('. Because this can be expensive, we do that only | |
ae4735db | 2051 | * when the token contains "for_each". |
34e49164 | 2052 | *) |
ae4735db | 2053 | | (TIdent (s, i1)::TOPar _::rest, _) |
b1b2de81 | 2054 | when not (LP.current_context () =*= LP.InTopLevel) |
ae4735db C |
2055 | (* otherwise a function such as static void loopback_enable(int i) { |
2056 | * will be considered as a loop | |
34e49164 C |
2057 | *) |
2058 | -> | |
2059 | ||
ae4735db | 2060 | if s ==~ regexp_foreach && |
34e49164 | 2061 | is_really_foreach (Common.take_safe forLOOKAHEAD rest) |
ae4735db | 2062 | |
34e49164 C |
2063 | then begin |
2064 | msg_foreach s; | |
2065 | TMacroIterator (s, i1) | |
2066 | end | |
2067 | else TIdent (s, i1) | |
2068 | ||
2069 | ||
ae4735db | 2070 | |
34e49164 C |
2071 | (*-------------------------------------------------------------*) |
2072 | | v::xs, _ -> v | |
2073 | | _ -> raise Impossible | |
2074 | ||
ae4735db | 2075 | let lookahead ~pass a b = |
485bce71 | 2076 | Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b) |
34e49164 C |
2077 | |
2078 |