Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | ||
16 | open Common | |
17 | ||
ae4735db | 18 | module TH = Token_helpers |
708f4980 | 19 | module TV = Token_views_c |
34e49164 C |
20 | module LP = Lexer_parser |
21 | ||
485bce71 | 22 | module Stat = Parsing_stat |
34e49164 | 23 | |
ae4735db | 24 | open Parser_c |
34e49164 | 25 | |
ae4735db | 26 | open TV |
708f4980 | 27 | |
34e49164 C |
28 | (*****************************************************************************) |
29 | (* Some debugging functions *) | |
30 | (*****************************************************************************) | |
31 | ||
708f4980 | 32 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
113803cf | 33 | |
ae4735db | 34 | let pr2_cpp s = |
34e49164 C |
35 | if !Flag_parsing_c.debug_cpp |
36 | then Common.pr2_once ("CPP-" ^ s) | |
37 | ||
38 | ||
ae4735db | 39 | let msg_gen cond is_known printer s = |
34e49164 C |
40 | if cond |
41 | then | |
42 | if not (!Flag_parsing_c.filter_msg) | |
43 | then printer s | |
44 | else | |
45 | if not (is_known s) | |
46 | then printer s | |
ae4735db | 47 | |
34e49164 | 48 | |
485bce71 C |
49 | (* In the following, there are some harcoded names of types or macros |
50 | * but they are not used by our heuristics! They are just here to | |
51 | * enable to detect false positive by printing only the typedef/macros | |
52 | * that we don't know yet. If we print everything, then we can easily | |
53 | * get lost with too much verbose tracing information. So those | |
54 | * functions "filter" some messages. So our heuristics are still good, | |
55 | * there is no more (or not that much) hardcoded linux stuff. | |
34e49164 | 56 | *) |
485bce71 | 57 | |
ae4735db C |
58 | let is_known_typdef = |
59 | (fun s -> | |
34e49164 C |
60 | (match s with |
61 | | "u_char" | "u_short" | "u_int" | "u_long" | |
ae4735db C |
62 | | "u8" | "u16" | "u32" | "u64" |
63 | | "s8" | "s16" | "s32" | "s64" | |
64 | | "__u8" | "__u16" | "__u32" | "__u64" | |
34e49164 | 65 | -> true |
ae4735db C |
66 | |
67 | | "acpi_handle" | |
68 | | "acpi_status" | |
34e49164 C |
69 | -> true |
70 | ||
ae4735db C |
71 | | "FILE" |
72 | | "DIR" | |
34e49164 | 73 | -> true |
ae4735db | 74 | |
34e49164 | 75 | | s when s =~ ".*_t$" -> true |
ae4735db | 76 | | _ -> false |
34e49164 C |
77 | ) |
78 | ) | |
485bce71 | 79 | |
ae4735db C |
80 | (* note: cant use partial application with let msg_typedef = |
81 | * because it would compute msg_typedef at compile time when | |
485bce71 C |
82 | * the flag debug_typedef is always false |
83 | *) | |
4dfbc1c2 | 84 | let msg_typedef s n = |
485bce71 C |
85 | incr Stat.nTypedefInfer; |
86 | msg_gen (!Flag_parsing_c.debug_typedef) | |
87 | is_known_typdef | |
ae4735db | 88 | (fun s -> |
4dfbc1c2 C |
89 | (*pr2_cpp (Printf.sprintf "TYPEDEF: promoting(%d): %s" n s)*) |
90 | pr2_cpp (Printf.sprintf "TYPEDEF: promoting: %s" s) | |
34e49164 C |
91 | ) |
92 | s | |
93 | ||
485bce71 C |
94 | let msg_maybe_dangereous_typedef s = |
95 | if not (is_known_typdef s) | |
ae4735db | 96 | then |
113803cf C |
97 | pr2 |
98 | ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s) | |
34e49164 C |
99 | |
100 | ||
101 | ||
ae4735db | 102 | let msg_declare_macro s = |
485bce71 | 103 | incr Stat.nMacroDecl; |
34e49164 | 104 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
105 | (fun s -> |
106 | (match s with | |
34e49164 | 107 | | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM" |
ae4735db | 108 | | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD" |
34e49164 C |
109 | | "DEFINE_SPINLOCK" | "DEFINE_TIMER" |
110 | | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR" | |
111 | | "SENSOR_DEVICE_ATTR" | |
112 | | "LIST_HEAD" | |
113 | | "DECLARE_WORK" | "DECLARE_TASKLET" | |
114 | | "PORT_ATTR_RO" | "PORT_PMA_ATTR" | |
115 | | "DECLARE_BITMAP" | |
116 | ||
117 | -> true | |
118 | (* | |
119 | | s when s =~ "^DECLARE_.*" -> true | |
120 | | s when s =~ ".*_ATTR$" -> true | |
121 | | s when s =~ "^DEFINE_.*" -> true | |
122 | *) | |
123 | ||
124 | | _ -> false | |
125 | ) | |
126 | ) | |
127 | (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s)) | |
128 | s | |
34e49164 | 129 | |
ae4735db C |
130 | |
131 | let msg_foreach s = | |
485bce71 | 132 | incr Stat.nIteratorHeuristic; |
34e49164 C |
133 | pr2_cpp ("MACRO: found foreach: " ^ s) |
134 | ||
135 | ||
ae4735db C |
136 | (* ?? |
137 | let msg_debug_macro s = | |
34e49164 | 138 | pr2_cpp ("MACRO: found debug-macro: " ^ s) |
485bce71 | 139 | *) |
34e49164 C |
140 | |
141 | ||
ae4735db | 142 | let msg_macro_noptvirg s = |
485bce71 | 143 | incr Stat.nMacroStmt; |
34e49164 C |
144 | pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s) |
145 | ||
ae4735db | 146 | let msg_macro_toplevel_noptvirg s = |
485bce71 | 147 | incr Stat.nMacroStmt; |
34e49164 C |
148 | pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s) |
149 | ||
ae4735db | 150 | let msg_macro_noptvirg_single s = |
485bce71 | 151 | incr Stat.nMacroStmt; |
34e49164 C |
152 | pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s) |
153 | ||
154 | ||
485bce71 C |
155 | |
156 | ||
ae4735db | 157 | let msg_macro_higher_order s = |
485bce71 | 158 | incr Stat.nMacroHigherOrder; |
34e49164 | 159 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
160 | (fun s -> |
161 | (match s with | |
34e49164 C |
162 | | "DBGINFO" |
163 | | "DBGPX" | |
164 | | "DFLOW" | |
165 | -> true | |
166 | | _ -> false | |
167 | ) | |
168 | ) | |
169 | (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s)) | |
170 | s | |
171 | ||
172 | ||
ae4735db | 173 | let msg_stringification s = |
485bce71 | 174 | incr Stat.nMacroString; |
34e49164 | 175 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
176 | (fun s -> |
177 | (match s with | |
34e49164 C |
178 | | "REVISION" |
179 | | "UTS_RELEASE" | |
180 | | "SIZE_STR" | |
181 | | "DMA_STR" | |
182 | -> true | |
ae4735db | 183 | (* s when s =~ ".*STR.*" -> true *) |
34e49164 C |
184 | | _ -> false |
185 | ) | |
186 | ) | |
187 | (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s)) | |
188 | s | |
189 | ||
485bce71 C |
190 | let msg_stringification_params s = |
191 | incr Stat.nMacroString; | |
192 | pr2_cpp ("MACRO: string-macro with params : " ^ s) | |
193 | ||
194 | ||
195 | ||
ae4735db | 196 | let msg_apply_known_macro s = |
485bce71 C |
197 | incr Stat.nMacroExpand; |
198 | pr2_cpp ("MACRO: found known macro = " ^ s) | |
199 | ||
ae4735db | 200 | let msg_apply_known_macro_hint s = |
485bce71 C |
201 | incr Stat.nMacroHint; |
202 | pr2_cpp ("MACRO: found known macro hint = " ^ s) | |
203 | ||
204 | ||
34e49164 | 205 | |
ae4735db C |
206 | |
207 | let msg_ifdef_bool_passing is_ifdef_positif = | |
485bce71 C |
208 | incr Stat.nIfdefZero; (* of Version ? *) |
209 | if is_ifdef_positif | |
210 | then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION" | |
211 | else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus" | |
212 | ||
213 | ||
214 | let msg_ifdef_mid_something () = | |
215 | incr Stat.nIfdefExprPassing; | |
216 | pr2_cpp "found ifdef-mid-something" | |
217 | ||
218 | let msg_ifdef_funheaders () = | |
219 | incr Stat.nIfdefFunheader; | |
220 | () | |
221 | ||
ae4735db | 222 | let msg_ifdef_cparen_else () = |
708f4980 C |
223 | incr Stat.nIfdefPassing; |
224 | pr2_cpp("found ifdef-cparen-else") | |
225 | ||
485bce71 | 226 | |
ae4735db | 227 | let msg_attribute s = |
485bce71 C |
228 | incr Stat.nMacroAttribute; |
229 | pr2_cpp("ATTR:" ^ s) | |
ae4735db | 230 | |
485bce71 C |
231 | |
232 | ||
34e49164 | 233 | (*****************************************************************************) |
485bce71 | 234 | (* The regexp and basic view definitions *) |
34e49164 C |
235 | (*****************************************************************************) |
236 | ||
237 | (* opti: better to built then once and for all, especially regexp_foreach *) | |
238 | ||
239 | let regexp_macro = Str.regexp | |
240 | "^[A-Z_][A-Z_0-9]*$" | |
241 | ||
242 | (* linuxext: *) | |
243 | let regexp_annot = Str.regexp | |
244 | "^__.*$" | |
245 | ||
246 | (* linuxext: *) | |
247 | let regexp_declare = Str.regexp | |
248 | ".*DECLARE.*" | |
249 | ||
250 | (* linuxext: *) | |
ae4735db | 251 | let regexp_foreach = Str.regexp_case_fold |
34e49164 C |
252 | ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)" |
253 | ||
254 | let regexp_typedef = Str.regexp | |
255 | ".*_t$" | |
256 | ||
34e49164 C |
257 | let false_typedef = [ |
258 | "printk"; | |
259 | ] | |
260 | ||
34e49164 | 261 | |
485bce71 C |
262 | let ok_typedef s = not (List.mem s false_typedef) |
263 | ||
ae4735db | 264 | let not_annot s = |
485bce71 C |
265 | not (s ==~ regexp_annot) |
266 | ||
267 | ||
34e49164 | 268 | |
34e49164 | 269 | |
485bce71 C |
270 | (*****************************************************************************) |
271 | (* Helpers *) | |
272 | (*****************************************************************************) | |
273 | ||
485bce71 | 274 | (* ------------------------------------------------------------------------- *) |
ae4735db C |
275 | (* the pair is the status of '()' and '{}', ex: (-1,0) |
276 | * if too much ')' and good '{}' | |
277 | * could do for [] too ? | |
485bce71 C |
278 | * could do for ',' if encounter ',' at "toplevel", not inside () or {} |
279 | * then if have ifdef, then certainly can lead to a problem. | |
280 | *) | |
708f4980 | 281 | let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))= |
ae4735db | 282 | fun xs -> |
485bce71 | 283 | let cnt_paren, cnt_brace = ref 0, ref 0 in |
ae4735db | 284 | xs +> TV.iter_token_ifdef (fun x -> |
485bce71 C |
285 | (match x.tok with |
286 | | x when TH.is_opar x -> incr cnt_paren | |
287 | | TOBrace _ -> incr cnt_brace | |
288 | | x when TH.is_cpar x -> decr cnt_paren | |
289 | | TCBrace _ -> decr cnt_brace | |
290 | | _ -> () | |
291 | ) | |
292 | ); | |
293 | !cnt_paren, !cnt_brace | |
294 | ||
295 | ||
296 | (* ------------------------------------------------------------------------- *) | |
297 | let forLOOKAHEAD = 30 | |
298 | ||
ae4735db | 299 | |
485bce71 | 300 | (* look if there is a '{' just after the closing ')', and handling the |
ae4735db C |
301 | * possibility to have nested expressions inside nested parenthesis |
302 | * | |
485bce71 C |
303 | * todo: use indentation instead of premier(statement) ? |
304 | *) | |
ae4735db | 305 | let rec is_really_foreach xs = |
485bce71 C |
306 | let rec is_foreach_aux = function |
307 | | [] -> false, [] | |
308 | | TCPar _::TOBrace _::xs -> true, xs | |
309 | (* the following attempts to handle the cases where there is a | |
310 | single statement in the body of the loop. undoubtedly more | |
ae4735db | 311 | cases are needed. |
485bce71 C |
312 | todo: premier(statement) - suivant(funcall) |
313 | *) | |
314 | | TCPar _::TIdent _::xs -> true, xs | |
315 | | TCPar _::Tif _::xs -> true, xs | |
316 | | TCPar _::Twhile _::xs -> true, xs | |
317 | | TCPar _::Tfor _::xs -> true, xs | |
318 | | TCPar _::Tswitch _::xs -> true, xs | |
319 | | TCPar _::Treturn _::xs -> true, xs | |
320 | ||
321 | ||
322 | | TCPar _::xs -> false, xs | |
ae4735db | 323 | | TOPar _::xs -> |
485bce71 C |
324 | let (_, xs') = is_foreach_aux xs in |
325 | is_foreach_aux xs' | |
326 | | x::xs -> is_foreach_aux xs | |
327 | in | |
328 | is_foreach_aux xs +> fst | |
329 | ||
330 | ||
331 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 332 | let set_ifdef_token_parenthize_info cnt x = |
485bce71 C |
333 | match x with |
334 | | TIfdef (tag, _) | |
335 | | TIfdefelse (tag, _) | |
336 | | TIfdefelif (tag, _) | |
337 | | TEndif (tag, _) | |
338 | ||
339 | | TIfdefBool (_, tag, _) | |
ae4735db | 340 | | TIfdefMisc (_, tag, _) |
485bce71 | 341 | | TIfdefVersion (_, tag, _) |
ae4735db | 342 | -> |
485bce71 C |
343 | tag := Some cnt; |
344 | ||
345 | | _ -> raise Impossible | |
485bce71 C |
346 | |
347 | ||
485bce71 | 348 | |
ae4735db | 349 | let ifdef_paren_cnt = ref 0 |
485bce71 | 350 | |
ae4735db C |
351 | |
352 | let rec set_ifdef_parenthize_info xs = | |
485bce71 C |
353 | xs +> List.iter (function |
354 | | NotIfdefLine xs -> () | |
ae4735db C |
355 | | Ifdefbool (_, xxs, info_ifdef) |
356 | | Ifdef (xxs, info_ifdef) -> | |
357 | ||
485bce71 C |
358 | incr ifdef_paren_cnt; |
359 | let total_directives = List.length info_ifdef in | |
360 | ||
ae4735db | 361 | info_ifdef +> List.iter (fun x -> |
485bce71 C |
362 | set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives) |
363 | x.tok); | |
364 | xxs +> List.iter set_ifdef_parenthize_info | |
365 | ) | |
366 | ||
367 | ||
978fd7e5 C |
368 | (*****************************************************************************) |
369 | (* The parsing hack for #define *) | |
370 | (*****************************************************************************) | |
371 | ||
ae4735db | 372 | (* To parse macro definitions I need to do some tricks |
978fd7e5 C |
373 | * as some information can be get only at the lexing level. For instance |
374 | * the space after the name of the macro in '#define foo (x)' is meaningful | |
375 | * but the grammar can not get this information. So define_ident below | |
376 | * look at such space and generate a special TOpardefine. In a similar | |
377 | * way macro definitions can contain some antislash and newlines | |
ae4735db C |
378 | * and the grammar need to know where the macro ends (which is |
379 | * a line-level and so low token-level information). Hence the | |
978fd7e5 | 380 | * function 'define_line' below and the TDefEol. |
ae4735db C |
381 | * |
382 | * update: TDefEol is handled in a special way at different places, | |
978fd7e5 C |
383 | * a little bit like EOF, especially for error recovery, so this |
384 | * is an important token that should not be retagged! | |
ae4735db C |
385 | * |
386 | * | |
387 | * ugly hack, a better solution perhaps would be to erase TDefEOL | |
388 | * from the Ast and list of tokens in parse_c. | |
389 | * | |
978fd7e5 | 390 | * note: I do a +1 somewhere, it's for the unparsing to correctly sync. |
ae4735db | 391 | * |
978fd7e5 C |
392 | * note: can't replace mark_end_define by simply a fakeInfo(). The reason |
393 | * is where is the \n TCommentSpace. Normally there is always a last token | |
394 | * to synchronize on, either EOF or the token of the next toplevel. | |
ae4735db | 395 | * In the case of the #define we got in list of token |
978fd7e5 C |
396 | * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will |
397 | * not synchronize on it and so we will not print the "\n". | |
398 | * A solution would be to put the TDefEOL before the "\n". | |
c491d8ee | 399 | * (jll: tried to do this, see the comment "Put end of line..." below) |
ae4735db C |
400 | * |
401 | * todo?: could put a ExpandedTok for that ? | |
978fd7e5 | 402 | *) |
ae4735db C |
403 | let mark_end_define ii = |
404 | let ii' = | |
405 | { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
406 | Common.str = ""; | |
978fd7e5 C |
407 | Common.charpos = Ast_c.pos_of_info ii + 1 |
408 | }; | |
409 | cocci_tag = ref Ast_c.emptyAnnot; | |
410 | comments_tag = ref Ast_c.emptyComments; | |
ae4735db | 411 | } |
978fd7e5 C |
412 | in |
413 | TDefEOL (ii') | |
414 | ||
415 | (* put the TDefEOL at the good place *) | |
ae4735db | 416 | let rec define_line_1 acc xs = |
978fd7e5 C |
417 | match xs with |
418 | | [] -> List.rev acc | |
419 | | TDefine ii::xs -> | |
420 | let line = Ast_c.line_of_info ii in | |
421 | let acc = (TDefine ii) :: acc in | |
422 | define_line_2 acc line ii xs | |
3a314143 C |
423 | | TUndef ii::xs -> |
424 | let line = Ast_c.line_of_info ii in | |
425 | let acc = (TUndef ii) :: acc in | |
426 | define_line_2 acc line ii xs | |
978fd7e5 C |
427 | | TCppEscapedNewline ii::xs -> |
428 | pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at"); | |
429 | pr2 (Ast_c.strloc_of_info ii); | |
430 | let acc = (TCommentSpace ii) :: acc in | |
431 | define_line_1 acc xs | |
432 | | x::xs -> define_line_1 (x::acc) xs | |
433 | ||
ae4735db C |
434 | and define_line_2 acc line lastinfo xs = |
435 | match xs with | |
436 | | [] -> | |
978fd7e5 | 437 | (* should not happened, should meet EOF before *) |
ae4735db | 438 | pr2 "PB: WEIRD"; |
978fd7e5 | 439 | List.rev (mark_end_define lastinfo::acc) |
ae4735db | 440 | | x::xs -> |
978fd7e5 C |
441 | let line' = TH.line_of_tok x in |
442 | let info = TH.info_of_tok x in | |
443 | ||
444 | (match x with | |
ae4735db | 445 | | EOF ii -> |
978fd7e5 C |
446 | let acc = (mark_end_define lastinfo) :: acc in |
447 | let acc = (EOF ii) :: acc in | |
448 | define_line_1 acc xs | |
ae4735db | 449 | | TCppEscapedNewline ii -> |
978fd7e5 C |
450 | if (line' <> line) then pr2 "PB: WEIRD: not same line number"; |
451 | let acc = (TCommentSpace ii) :: acc in | |
452 | define_line_2 acc (line+1) info xs | |
ae4735db | 453 | | x -> |
978fd7e5 | 454 | if line' =|= line |
ae4735db | 455 | then define_line_2 (x::acc) line info xs |
c491d8ee C |
456 | else |
457 | (* Put end of line token before the newline. A newline at least | |
458 | must be there because the line changed and because we saw a | |
459 | #define previously to get to this function at all *) | |
460 | define_line_1 | |
461 | ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc))) | |
462 | (x::xs) | |
978fd7e5 C |
463 | ) |
464 | ||
ae4735db | 465 | let rec define_ident acc xs = |
978fd7e5 C |
466 | match xs with |
467 | | [] -> List.rev acc | |
3a314143 C |
468 | | TUndef ii::xs -> |
469 | let acc = TUndef ii :: acc in | |
470 | (match xs with | |
471 | TCommentSpace i1::TIdent (s,i2)::xs -> | |
472 | let acc = (TCommentSpace i1) :: acc in | |
473 | let acc = (TIdentDefine (s,i2)) :: acc in | |
474 | define_ident acc xs | |
475 | | _ -> | |
476 | pr2 "WEIRD: weird #define body"; | |
477 | define_ident acc xs | |
478 | ) | |
ae4735db | 479 | | TDefine ii::xs -> |
978fd7e5 C |
480 | let acc = TDefine ii :: acc in |
481 | (match xs with | |
ae4735db | 482 | | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs -> |
978fd7e5 C |
483 | (* Change also the kind of TIdent to avoid bad interaction |
484 | * with other parsing_hack tricks. For instant if keep TIdent then | |
485 | * the stringication algo can believe the TIdent is a string-macro. | |
486 | * So simpler to change the kind of the ident too. | |
487 | *) | |
ae4735db | 488 | (* if TOParDefine sticked to the ident, then |
978fd7e5 C |
489 | * it's a macro-function. Change token to avoid ambiguity |
490 | * between #define foo(x) and #define foo (x) | |
491 | *) | |
492 | let acc = (TCommentSpace i1) :: acc in | |
493 | let acc = (TIdentDefine (s,i2)) :: acc in | |
494 | let acc = (TOParDefine i3) :: acc in | |
495 | define_ident acc xs | |
496 | ||
ae4735db | 497 | | TCommentSpace i1::TIdent (s,i2)::xs -> |
978fd7e5 C |
498 | let acc = (TCommentSpace i1) :: acc in |
499 | let acc = (TIdentDefine (s,i2)) :: acc in | |
500 | define_ident acc xs | |
501 | ||
502 | (* bugfix: ident of macro (as well as params, cf below) can be tricky | |
503 | * note, do we need to subst in the body of the define ? no cos | |
504 | * here the issue is the name of the macro, as in #define inline, | |
ae4735db | 505 | * so obviously the name of this macro will not be used in its |
978fd7e5 C |
506 | * body (it would be a recursive macro, which is forbidden). |
507 | *) | |
ae4735db C |
508 | |
509 | | TCommentSpace i1::t::xs -> | |
978fd7e5 C |
510 | |
511 | let s = TH.str_of_tok t in | |
512 | let ii = TH.info_of_tok t in | |
513 | if s ==~ Common.regexp_alpha | |
514 | then begin | |
515 | pr2 (spf "remapping: %s to an ident in macro name" s); | |
516 | let acc = (TCommentSpace i1) :: acc in | |
517 | let acc = (TIdentDefine (s,ii)) :: acc in | |
518 | define_ident acc xs | |
519 | end | |
520 | else begin | |
ae4735db | 521 | pr2 "WEIRD: weird #define body"; |
978fd7e5 C |
522 | define_ident acc xs |
523 | end | |
524 | ||
ae4735db C |
525 | | _ -> |
526 | pr2 "WEIRD: weird #define body"; | |
978fd7e5 C |
527 | define_ident acc xs |
528 | ) | |
529 | | x::xs -> | |
530 | let acc = x :: acc in | |
531 | define_ident acc xs | |
978fd7e5 C |
532 | |
533 | ||
ae4735db C |
534 | |
535 | let fix_tokens_define2 xs = | |
978fd7e5 C |
536 | define_ident [] (define_line_1 [] xs) |
537 | ||
ae4735db | 538 | let fix_tokens_define a = |
978fd7e5 | 539 | Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a) |
ae4735db | 540 | |
978fd7e5 C |
541 | |
542 | ||
543 | ||
544 | ||
545 | (* ------------------------------------------------------------------------- *) | |
546 | (* Other parsing hacks related to cpp, Include/Define hacks *) | |
547 | (* ------------------------------------------------------------------------- *) | |
548 | ||
549 | (* Sometimes I prefer to generate a single token for a list of things in the | |
550 | * lexer so that if I have to passed them, like for passing TInclude then | |
ae4735db C |
551 | * it's easy. Also if I don't do a single token, then I need to |
552 | * parse the rest which may not need special stuff, like detecting | |
978fd7e5 C |
553 | * end of line which the parser is not really ready for. So for instance |
554 | * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just | |
ae4735db | 555 | * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ? |
978fd7e5 C |
556 | * but this kind of token is valid only after a #include and the |
557 | * lexing and parsing rules are different for such tokens so not that | |
558 | * easy to parse such things in parser_c.mly. Hence the following hacks. | |
ae4735db | 559 | * |
978fd7e5 C |
560 | * less?: maybe could get rid of this like I get rid of some of fix_define. |
561 | *) | |
562 | ||
563 | (* helpers *) | |
564 | ||
565 | (* used to generate new token from existing one *) | |
566 | let new_info posadd str ii = | |
ae4735db C |
567 | { Ast_c.pinfo = |
568 | Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
978fd7e5 C |
569 | charpos = Ast_c.pos_of_info ii + posadd; |
570 | str = str; | |
571 | column = Ast_c.col_of_info ii + posadd; | |
572 | }; | |
573 | (* must generate a new ref each time, otherwise share *) | |
574 | cocci_tag = ref Ast_c.emptyAnnot; | |
575 | comments_tag = ref Ast_c.emptyComments; | |
576 | } | |
577 | ||
578 | ||
ae4735db | 579 | let rec comment_until_defeol xs = |
978fd7e5 | 580 | match xs with |
ae4735db | 581 | | [] -> |
978fd7e5 C |
582 | (* job not done in Cpp_token_c.define_parse ? *) |
583 | failwith "cant find end of define token TDefEOL" | |
ae4735db | 584 | | x::xs -> |
978fd7e5 | 585 | (match x with |
ae4735db | 586 | | Parser_c.TDefEOL i -> |
978fd7e5 C |
587 | Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x) |
588 | ::xs | |
ae4735db C |
589 | | _ -> |
590 | let x' = | |
978fd7e5 C |
591 | (* bugfix: otherwise may lose a TComment token *) |
592 | if TH.is_real_comment x | |
593 | then x | |
594 | else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x) | |
595 | in | |
596 | x'::comment_until_defeol xs | |
597 | ) | |
598 | ||
ae4735db C |
599 | let drop_until_defeol xs = |
600 | List.tl | |
978fd7e5 C |
601 | (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) |
602 | ||
603 | ||
604 | ||
605 | (* ------------------------------------------------------------------------- *) | |
606 | (* returns a pair (replaced token, list of next tokens) *) | |
607 | (* ------------------------------------------------------------------------- *) | |
608 | ||
ae4735db C |
609 | let tokens_include (info, includes, filename, inifdef) = |
610 | Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), | |
611 | [Parser_c.TIncludeFilename | |
978fd7e5 C |
612 | (filename, (new_info (String.length includes) filename info)) |
613 | ] | |
614 | ||
615 | ||
616 | ||
617 | ||
485bce71 C |
618 | (*****************************************************************************) |
619 | (* CPP handling: macros, ifdefs, macros defs *) | |
620 | (*****************************************************************************) | |
621 | ||
0708f913 C |
622 | (* ------------------------------------------------------------------------- *) |
623 | (* special skip_start skip_end handling *) | |
624 | (* ------------------------------------------------------------------------- *) | |
625 | ||
626 | (* note: after this normally the token list should not contain any more the | |
627 | * TCommentSkipTagStart and End tokens. | |
628 | *) | |
629 | let rec commentize_skip_start_to_end xs = | |
630 | match xs with | |
631 | | [] -> () | |
ae4735db | 632 | | x::xs -> |
0708f913 | 633 | (match x with |
ae4735db C |
634 | | {tok = TCommentSkipTagStart info} -> |
635 | (try | |
636 | let (before, x2, after) = | |
0708f913 C |
637 | xs +> Common.split_when (function |
638 | | {tok = TCommentSkipTagEnd _ } -> true | |
ae4735db | 639 | | _ -> false |
0708f913 C |
640 | ) |
641 | in | |
642 | let topass = x::x2::before in | |
ae4735db | 643 | topass +> List.iter (fun tok -> |
0708f913 C |
644 | set_as_comment Token_c.CppPassingExplicit tok |
645 | ); | |
646 | commentize_skip_start_to_end after | |
ae4735db | 647 | with Not_found -> |
0708f913 C |
648 | failwith "could not find end of skip_start special comment" |
649 | ) | |
ae4735db | 650 | | {tok = TCommentSkipTagEnd info} -> |
0708f913 | 651 | failwith "found skip_end comment but no skip_start" |
ae4735db | 652 | | _ -> |
0708f913 C |
653 | commentize_skip_start_to_end xs |
654 | ) | |
ae4735db C |
655 | |
656 | ||
0708f913 C |
657 | |
658 | ||
34e49164 C |
659 | (* ------------------------------------------------------------------------- *) |
660 | (* ifdef keeping/passing *) | |
661 | (* ------------------------------------------------------------------------- *) | |
662 | ||
663 | (* #if 0, #if 1, #if LINUX_VERSION handling *) | |
ae4735db C |
664 | let rec find_ifdef_bool xs = |
665 | xs +> List.iter (function | |
34e49164 | 666 | | NotIfdefLine _ -> () |
ae4735db | 667 | | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) -> |
485bce71 C |
668 | |
669 | msg_ifdef_bool_passing is_ifdef_positif; | |
34e49164 C |
670 | |
671 | (match xxs with | |
672 | | [] -> raise Impossible | |
ae4735db | 673 | | firstclause::xxs -> |
0708f913 | 674 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 675 | |
34e49164 | 676 | if is_ifdef_positif |
ae4735db | 677 | then xxs +> List.iter |
0708f913 | 678 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 | 679 | else begin |
0708f913 | 680 | firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal); |
34e49164 C |
681 | (match List.rev xxs with |
682 | (* keep only last *) | |
ae4735db C |
683 | | last::startxs -> |
684 | startxs +> List.iter | |
0708f913 | 685 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 C |
686 | | [] -> (* not #else *) () |
687 | ); | |
688 | end | |
689 | ); | |
ae4735db | 690 | |
34e49164 C |
691 | | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool |
692 | ) | |
693 | ||
694 | ||
695 | ||
34e49164 C |
696 | let thresholdIfdefSizeMid = 6 |
697 | ||
698 | (* infer ifdef involving not-closed expressions/statements *) | |
ae4735db C |
699 | let rec find_ifdef_mid xs = |
700 | xs +> List.iter (function | |
34e49164 | 701 | | NotIfdefLine _ -> () |
ae4735db C |
702 | | Ifdef (xxs, info_ifdef_stmt) -> |
703 | (match xxs with | |
34e49164 C |
704 | | [] -> raise Impossible |
705 | | [first] -> () | |
ae4735db | 706 | | first::second::rest -> |
34e49164 | 707 | (* don't analyse big ifdef *) |
ae4735db C |
708 | if xxs +> List.for_all |
709 | (fun xs -> List.length xs <= thresholdIfdefSizeMid) && | |
34e49164 | 710 | (* don't want nested ifdef *) |
ae4735db C |
711 | xxs +> List.for_all (fun xs -> |
712 | xs +> List.for_all | |
34e49164 C |
713 | (function NotIfdefLine _ -> true | _ -> false) |
714 | ) | |
ae4735db C |
715 | |
716 | then | |
34e49164 | 717 | let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in |
ae4735db C |
718 | let cnt1, cnt2 = List.hd counts in |
719 | if cnt1 <> 0 || cnt2 <> 0 && | |
b1b2de81 | 720 | counts +> List.for_all (fun x -> x =*= (cnt1, cnt2)) |
34e49164 | 721 | (* |
ae4735db C |
722 | if counts +> List.exists (fun (cnt1, cnt2) -> |
723 | cnt1 <> 0 || cnt2 <> 0 | |
724 | ) | |
34e49164 C |
725 | *) |
726 | then begin | |
485bce71 C |
727 | msg_ifdef_mid_something(); |
728 | ||
34e49164 | 729 | (* keep only first, treat the rest as comment *) |
0708f913 | 730 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 731 | (second::rest) +> List.iter |
0708f913 | 732 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
34e49164 | 733 | end |
ae4735db | 734 | |
34e49164 C |
735 | ); |
736 | List.iter find_ifdef_mid xxs | |
ae4735db | 737 | |
34e49164 | 738 | (* no need complex analysis for ifdefbool *) |
ae4735db | 739 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
34e49164 | 740 | List.iter find_ifdef_mid xxs |
ae4735db C |
741 | |
742 | ||
34e49164 C |
743 | ) |
744 | ||
745 | ||
746 | let thresholdFunheaderLimit = 4 | |
747 | ||
748 | (* ifdef defining alternate function header, type *) | |
749 | let rec find_ifdef_funheaders = function | |
750 | | [] -> () | |
ae4735db | 751 | | NotIfdefLine _::xs -> find_ifdef_funheaders xs |
34e49164 C |
752 | |
753 | (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *) | |
ae4735db | 754 | | Ifdef |
34e49164 C |
755 | ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1; |
756 | (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2 | |
ae4735db | 757 | ], info_ifdef_stmt |
34e49164 C |
758 | ) |
759 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3) | |
ae4735db | 760 | ::xs |
34e49164 C |
761 | when List.length ifdefblock1 <= thresholdFunheaderLimit && |
762 | List.length ifdefblock2 <= thresholdFunheaderLimit | |
ae4735db | 763 | -> |
34e49164 | 764 | find_ifdef_funheaders xs; |
485bce71 C |
765 | |
766 | msg_ifdef_funheaders (); | |
0708f913 | 767 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 768 | let all_toks = [xline2] @ line2 in |
0708f913 C |
769 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ; |
770 | ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError); | |
34e49164 C |
771 | |
772 | (* ifdef with nested ifdef *) | |
ae4735db | 773 | | Ifdef |
34e49164 | 774 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
ae4735db | 775 | [Ifdef |
34e49164 C |
776 | ([[NotIfdefLine (({col = 0} as xline2)::line2)]; |
777 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
778 | ], info_ifdef_stmt2 | |
779 | ) | |
780 | ] | |
ae4735db | 781 | ], info_ifdef_stmt |
34e49164 C |
782 | ) |
783 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
784 | ::xs |
785 | -> | |
34e49164 | 786 | find_ifdef_funheaders xs; |
485bce71 C |
787 | |
788 | msg_ifdef_funheaders (); | |
0708f913 C |
789 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
790 | info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective); | |
34e49164 | 791 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 792 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError); |
34e49164 C |
793 | |
794 | (* ifdef with elseif *) | |
ae4735db | 795 | | Ifdef |
34e49164 C |
796 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
797 | [NotIfdefLine (({col = 0} as xline2)::line2)]; | |
798 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
ae4735db | 799 | ], info_ifdef_stmt |
34e49164 C |
800 | ) |
801 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
802 | ::xs |
803 | -> | |
34e49164 | 804 | find_ifdef_funheaders xs; |
485bce71 C |
805 | |
806 | msg_ifdef_funheaders (); | |
0708f913 | 807 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 808 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 809 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) |
ae4735db | 810 | |
485bce71 | 811 | (* recurse *) |
ae4735db C |
812 | | Ifdef (xxs,info_ifdef_stmt)::xs |
813 | | Ifdefbool (_, xxs,info_ifdef_stmt)::xs -> | |
814 | List.iter find_ifdef_funheaders xxs; | |
34e49164 | 815 | find_ifdef_funheaders xs |
ae4735db | 816 | |
34e49164 C |
817 | |
818 | ||
485bce71 | 819 | (* ?? *) |
ae4735db C |
820 | let rec adjust_inifdef_include xs = |
821 | xs +> List.iter (function | |
34e49164 | 822 | | NotIfdefLine _ -> () |
ae4735db C |
823 | | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
824 | xxs +> List.iter (iter_token_ifdef (fun tokext -> | |
34e49164 | 825 | match tokext.tok with |
ae4735db | 826 | | Parser_c.TInclude (s1, s2, inifdef_ref, ii) -> |
34e49164 C |
827 | inifdef_ref := true; |
828 | | _ -> () | |
829 | )); | |
830 | ) | |
831 | ||
832 | ||
833 | ||
34e49164 | 834 | |
485bce71 | 835 | |
34e49164 C |
836 | |
837 | ||
ae4735db C |
838 | let rec find_ifdef_cparen_else xs = |
839 | let rec aux xs = | |
840 | xs +> List.iter (function | |
708f4980 | 841 | | NotIfdefLine _ -> () |
ae4735db C |
842 | | Ifdef (xxs, info_ifdef_stmt) -> |
843 | (match xxs with | |
708f4980 C |
844 | | [] -> raise Impossible |
845 | | [first] -> () | |
ae4735db | 846 | | first::second::rest -> |
34e49164 | 847 | |
708f4980 | 848 | (* found a closing ')' just after the #else *) |
34e49164 | 849 | |
708f4980 C |
850 | (* Too bad ocaml does not support better list pattern matching |
851 | * a la Prolog-III where can match the end of lists. | |
852 | *) | |
ae4735db C |
853 | let condition = |
854 | if List.length first = 0 then false | |
855 | else | |
708f4980 C |
856 | let last_line = Common.last first in |
857 | match last_line with | |
ae4735db C |
858 | | NotIfdefLine xs -> |
859 | if List.length xs = 0 then false | |
860 | else | |
708f4980 C |
861 | let last_tok = Common.last xs in |
862 | TH.is_cpar last_tok.tok | |
ae4735db | 863 | | Ifdef _ | Ifdefbool _ -> false |
708f4980 C |
864 | in |
865 | if condition then begin | |
866 | msg_ifdef_cparen_else(); | |
34e49164 | 867 | |
708f4980 C |
868 | (* keep only first, treat the rest as comment *) |
869 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); | |
ae4735db | 870 | (second::rest) +> List.iter |
708f4980 C |
871 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
872 | end | |
ae4735db | 873 | |
708f4980 C |
874 | ); |
875 | List.iter aux xxs | |
ae4735db | 876 | |
708f4980 | 877 | (* no need complex analysis for ifdefbool *) |
ae4735db | 878 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
708f4980 C |
879 | List.iter aux xxs |
880 | ) | |
881 | in aux xs | |
34e49164 C |
882 | |
883 | ||
708f4980 C |
884 | (* ------------------------------------------------------------------------- *) |
885 | (* cpp-builtin part2, macro, using standard.h or other defs *) | |
886 | (* ------------------------------------------------------------------------- *) | |
34e49164 | 887 | |
ae4735db | 888 | (* now in cpp_token_c.ml *) |
34e49164 C |
889 | |
890 | (* ------------------------------------------------------------------------- *) | |
891 | (* stringification *) | |
892 | (* ------------------------------------------------------------------------- *) | |
893 | ||
ae4735db | 894 | let rec find_string_macro_paren xs = |
34e49164 C |
895 | match xs with |
896 | | [] -> () | |
ae4735db C |
897 | | Parenthised(xxs, info_parens)::xs -> |
898 | xxs +> List.iter (fun xs -> | |
899 | if xs +> List.exists | |
485bce71 | 900 | (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) && |
ae4735db C |
901 | xs +> List.for_all |
902 | (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) -> | |
34e49164 C |
903 | true | _ -> false) |
904 | then | |
ae4735db | 905 | xs +> List.iter (fun tok -> |
34e49164 | 906 | match tok with |
ae4735db | 907 | | PToken({tok = TIdent (s,_)} as id) -> |
34e49164 | 908 | msg_stringification s; |
485bce71 | 909 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
910 | | _ -> () |
911 | ) | |
ae4735db | 912 | else |
34e49164 C |
913 | find_string_macro_paren xs |
914 | ); | |
915 | find_string_macro_paren xs | |
ae4735db | 916 | | PToken(tok)::xs -> |
34e49164 | 917 | find_string_macro_paren xs |
ae4735db | 918 | |
34e49164 C |
919 | |
920 | (* ------------------------------------------------------------------------- *) | |
921 | (* macro2 *) | |
922 | (* ------------------------------------------------------------------------- *) | |
923 | ||
924 | (* don't forget to recurse in each case *) | |
ae4735db | 925 | let rec find_macro_paren xs = |
34e49164 C |
926 | match xs with |
927 | | [] -> () | |
ae4735db | 928 | |
34e49164 C |
929 | (* attribute *) |
930 | | PToken ({tok = Tattribute _} as id) | |
931 | ::Parenthised (xxs,info_parens) | |
932 | ::xs | |
ae4735db | 933 | -> |
34e49164 | 934 | pr2_cpp ("MACRO: __attribute detected "); |
ae4735db | 935 | [Parenthised (xxs, info_parens)] +> |
0708f913 C |
936 | iter_token_paren (set_as_comment Token_c.CppAttr); |
937 | set_as_comment Token_c.CppAttr id; | |
34e49164 C |
938 | find_macro_paren xs |
939 | ||
978fd7e5 C |
940 | | PToken ({tok = TattributeNoarg _} as id) |
941 | ::xs | |
ae4735db | 942 | -> |
978fd7e5 C |
943 | pr2_cpp ("MACRO: __attributenoarg detected "); |
944 | set_as_comment Token_c.CppAttr id; | |
945 | find_macro_paren xs | |
946 | ||
485bce71 | 947 | (* |
708f4980 | 948 | (* attribute cpp, __xxx id *) |
485bce71 | 949 | | PToken ({tok = TIdent (s,i1)} as id) |
708f4980 | 950 | ::PToken ({tok = TIdent (s2, i2)} as id2) |
485bce71 | 951 | ::xs when s ==~ regexp_annot |
ae4735db | 952 | -> |
485bce71 C |
953 | msg_attribute s; |
954 | id.tok <- TMacroAttr (s, i1); | |
708f4980 | 955 | find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *) |
485bce71 | 956 | |
708f4980 C |
957 | (* attribute cpp, id __xxx *) |
958 | | PToken ({tok = TIdent (s,i1)} as _id) | |
959 | ::PToken ({tok = TIdent (s2, i2)} as id2) | |
960 | ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef)) | |
ae4735db | 961 | -> |
485bce71 | 962 | msg_attribute s2; |
708f4980 C |
963 | id2.tok <- TMacroAttr (s2, i2); |
964 | find_macro_paren xs | |
965 | ||
966 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
967 | ::PToken ({tok = TIdent (s,i1)} as attr) | |
968 | ::xs when s ==~ regexp_annot | |
ae4735db | 969 | -> |
708f4980 C |
970 | pr2_cpp ("storage attribute: " ^ s); |
971 | attr.tok <- TMacroAttrStorage (s,i1); | |
972 | (* recurse, may have other storage attributes *) | |
973 | find_macro_paren (PToken (tok1)::xs) | |
ae4735db | 974 | |
708f4980 | 975 | |
485bce71 C |
976 | *) |
977 | ||
978 | (* storage attribute *) | |
979 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
ae4735db C |
980 | ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs |
981 | -> | |
485bce71 C |
982 | pr2_cpp ("storage attribute: " ^ s); |
983 | attr.tok <- TMacroAttrStorage (s,i1); | |
984 | (* recurse, may have other storage attributes *) | |
985 | find_macro_paren (PToken (tok1)::xs) | |
708f4980 | 986 | |
485bce71 | 987 | |
34e49164 | 988 | (* stringification |
ae4735db | 989 | * |
34e49164 | 990 | * the order of the matching clause is important |
ae4735db | 991 | * |
34e49164 C |
992 | *) |
993 | ||
994 | (* string macro with params, before case *) | |
485bce71 | 995 | | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
34e49164 | 996 | ::Parenthised (xxs, info_parens) |
ae4735db | 997 | ::xs -> |
485bce71 C |
998 | |
999 | msg_stringification_params s; | |
1000 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 1001 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1002 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1003 | find_macro_paren xs |
1004 | ||
1005 | (* after case *) | |
1006 | | PToken ({tok = TIdent (s,_)} as id) | |
1007 | ::Parenthised (xxs, info_parens) | |
485bce71 | 1008 | ::PToken ({tok = (TString _ | TMacroString _)}) |
ae4735db | 1009 | ::xs -> |
485bce71 C |
1010 | |
1011 | msg_stringification_params s; | |
1012 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 1013 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1014 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1015 | find_macro_paren xs |
1016 | ||
1017 | ||
1018 | (* for the case where the string is not inside a funcall, but | |
1019 | * for instance in an initializer. | |
1020 | *) | |
ae4735db | 1021 | |
34e49164 | 1022 | (* string macro variable, before case *) |
485bce71 | 1023 | | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
ae4735db | 1024 | ::xs -> |
485bce71 | 1025 | |
34e49164 | 1026 | msg_stringification s; |
485bce71 | 1027 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1028 | find_macro_paren xs |
1029 | ||
1030 | (* after case *) | |
485bce71 C |
1031 | | PToken ({tok = TIdent (s,_)} as id) |
1032 | ::PToken ({tok = (TString _ | TMacroString _)}) | |
ae4735db | 1033 | ::xs -> |
485bce71 | 1034 | |
34e49164 | 1035 | msg_stringification s; |
485bce71 | 1036 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1037 | find_macro_paren xs |
1038 | ||
1039 | ||
ae4735db | 1040 | |
34e49164 C |
1041 | |
1042 | ||
1043 | (* recurse *) | |
ae4735db C |
1044 | | (PToken x)::xs -> find_macro_paren xs |
1045 | | (Parenthised (xxs, info_parens))::xs -> | |
34e49164 C |
1046 | xxs +> List.iter find_macro_paren; |
1047 | find_macro_paren xs | |
1048 | ||
1049 | ||
1050 | ||
1051 | ||
1052 | ||
1053 | (* don't forget to recurse in each case *) | |
ae4735db | 1054 | let rec find_macro_lineparen xs = |
34e49164 C |
1055 | match xs with |
1056 | | [] -> () | |
1057 | ||
1058 | (* linuxext: ex: static [const] DEVICE_ATTR(); *) | |
ae4735db | 1059 | | (Line |
34e49164 C |
1060 | ( |
1061 | [PToken ({tok = Tstatic _}); | |
1062 | PToken ({tok = TIdent (s,_)} as macro); | |
1063 | Parenthised (xxs,info_parens); | |
1064 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1065 | ] |
34e49164 | 1066 | )) |
ae4735db C |
1067 | ::xs |
1068 | when (s ==~ regexp_macro) -> | |
485bce71 | 1069 | |
34e49164 C |
1070 | msg_declare_macro s; |
1071 | let info = TH.info_of_tok macro.tok in | |
1072 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1073 | ||
1074 | find_macro_lineparen (xs) | |
1075 | ||
1076 | (* the static const case *) | |
ae4735db | 1077 | | (Line |
34e49164 C |
1078 | ( |
1079 | [PToken ({tok = Tstatic _}); | |
1080 | PToken ({tok = Tconst _} as const); | |
1081 | PToken ({tok = TIdent (s,_)} as macro); | |
1082 | Parenthised (xxs,info_parens); | |
1083 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1084 | ] |
34e49164 C |
1085 | (*as line1*) |
1086 | ||
1087 | )) | |
ae4735db C |
1088 | ::xs |
1089 | when (s ==~ regexp_macro) -> | |
485bce71 | 1090 | |
34e49164 C |
1091 | msg_declare_macro s; |
1092 | let info = TH.info_of_tok macro.tok in | |
1093 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
ae4735db C |
1094 | |
1095 | (* need retag this const, otherwise ambiguity in grammar | |
34e49164 C |
1096 | 21: shift/reduce conflict (shift 121, reduce 137) on Tconst |
1097 | decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ... | |
1098 | decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ... | |
1099 | storage_class_spec : Tstatic . (137) | |
1100 | *) | |
1101 | const.tok <- TMacroDeclConst (TH.info_of_tok const.tok); | |
1102 | ||
1103 | find_macro_lineparen (xs) | |
1104 | ||
1105 | ||
1106 | (* same but without trailing ';' | |
ae4735db | 1107 | * |
34e49164 C |
1108 | * I do not put the final ';' because it can be on a multiline and |
1109 | * because of the way mk_line is coded, we will not have access to | |
1110 | * this ';' on the next line, even if next to the ')' *) | |
ae4735db | 1111 | | (Line |
34e49164 C |
1112 | ([PToken ({tok = Tstatic _}); |
1113 | PToken ({tok = TIdent (s,_)} as macro); | |
1114 | Parenthised (xxs,info_parens); | |
ae4735db | 1115 | ] |
34e49164 | 1116 | )) |
ae4735db C |
1117 | ::xs |
1118 | when s ==~ regexp_macro -> | |
34e49164 C |
1119 | |
1120 | msg_declare_macro s; | |
1121 | let info = TH.info_of_tok macro.tok in | |
1122 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1123 | ||
1124 | find_macro_lineparen (xs) | |
1125 | ||
1126 | ||
1127 | ||
1128 | ||
1129 | (* on multiple lines *) | |
ae4735db | 1130 | | (Line |
34e49164 C |
1131 | ( |
1132 | (PToken ({tok = Tstatic _})::[] | |
1133 | ))) | |
ae4735db | 1134 | ::(Line |
34e49164 C |
1135 | ( |
1136 | [PToken ({tok = TIdent (s,_)} as macro); | |
1137 | Parenthised (xxs,info_parens); | |
1138 | PToken ({tok = TPtVirg _}); | |
1139 | ] | |
ae4735db | 1140 | ) |
34e49164 | 1141 | ) |
ae4735db C |
1142 | ::xs |
1143 | when (s ==~ regexp_macro) -> | |
485bce71 | 1144 | |
34e49164 C |
1145 | msg_declare_macro s; |
1146 | let info = TH.info_of_tok macro.tok in | |
1147 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1148 | ||
1149 | find_macro_lineparen (xs) | |
1150 | ||
1151 | ||
ae4735db C |
1152 | (* linuxext: ex: DECLARE_BITMAP(); |
1153 | * | |
34e49164 C |
1154 | * Here I use regexp_declare and not regexp_macro because |
1155 | * Sometimes it can be a FunCallMacro such as DEBUG(foo()); | |
1156 | * Here we don't have the preceding 'static' so only way to | |
1157 | * not have positive is to restrict to .*DECLARE.* macros. | |
1158 | * | |
1159 | * but there is a grammar rule for that, so don't need this case anymore | |
0708f913 | 1160 | * unless the parameter of the DECLARE_xxx are weird and can not be mapped |
34e49164 C |
1161 | * on a argument_list |
1162 | *) | |
ae4735db C |
1163 | |
1164 | | (Line | |
34e49164 C |
1165 | ([PToken ({tok = TIdent (s,_)} as macro); |
1166 | Parenthised (xxs,info_parens); | |
1167 | PToken ({tok = TPtVirg _}); | |
1168 | ] | |
1169 | )) | |
ae4735db C |
1170 | ::xs |
1171 | when (s ==~ regexp_declare) -> | |
34e49164 C |
1172 | |
1173 | msg_declare_macro s; | |
1174 | let info = TH.info_of_tok macro.tok in | |
1175 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1176 | ||
1177 | find_macro_lineparen (xs) | |
1178 | ||
ae4735db | 1179 | |
34e49164 C |
1180 | (* toplevel macros. |
1181 | * module_init(xxx) | |
ae4735db | 1182 | * |
34e49164 C |
1183 | * Could also transform the TIdent in a TMacroTop but can have false |
1184 | * positive, so easier to just change the TCPar and so just solve | |
1185 | * the end-of-stream pb of ocamlyacc | |
1186 | *) | |
ae4735db | 1187 | | (Line |
34e49164 C |
1188 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro); |
1189 | Parenthised (xxs,info_parens); | |
1190 | ] as _line1 | |
1191 | )) | |
b1b2de81 | 1192 | ::xs when col1 =|= 0 |
ae4735db C |
1193 | -> |
1194 | let condition = | |
34e49164 C |
1195 | (* to reduce number of false positive *) |
1196 | (match xs with | |
ae4735db | 1197 | | (Line (PToken ({col = col2 } as other)::restline2))::_ -> |
b1b2de81 | 1198 | TH.is_eof other.tok || (col2 =|= 0 && |
34e49164 C |
1199 | (match other.tok with |
1200 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1201 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1202 | | TPtVirg _ |
34e49164 C |
1203 | | TDotDot _ |
1204 | -> false | |
1205 | | tok when TH.is_binary_operator tok -> false | |
ae4735db | 1206 | |
34e49164 C |
1207 | | _ -> true |
1208 | ) | |
1209 | ) | |
1210 | | _ -> false | |
1211 | ) | |
1212 | in | |
1213 | if condition | |
1214 | then begin | |
485bce71 | 1215 | |
34e49164 C |
1216 | msg_macro_toplevel_noptvirg s; |
1217 | (* just to avoid the end-of-stream pb of ocamlyacc *) | |
1218 | let tcpar = Common.last info_parens in | |
1219 | tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok); | |
ae4735db | 1220 | |
34e49164 | 1221 | (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*) |
ae4735db | 1222 | |
34e49164 C |
1223 | end; |
1224 | ||
1225 | find_macro_lineparen (xs) | |
1226 | ||
1227 | ||
1228 | ||
ae4735db | 1229 | (* macro with parameters |
34e49164 C |
1230 | * ex: DEBUG() |
1231 | * return x; | |
1232 | *) | |
ae4735db | 1233 | | (Line |
34e49164 C |
1234 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1235 | Parenthised (xxs,info_parens); | |
1236 | ] as _line1 | |
1237 | )) | |
ae4735db | 1238 | ::(Line |
34e49164 C |
1239 | (PToken ({col = col2 } as other)::restline2 |
1240 | ) as line2) | |
ae4735db | 1241 | ::xs |
34e49164 | 1242 | (* when s ==~ regexp_macro *) |
ae4735db C |
1243 | -> |
1244 | let condition = | |
1245 | (col1 =|= col2 && | |
34e49164 C |
1246 | (match other.tok with |
1247 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1248 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1249 | | TPtVirg _ |
34e49164 C |
1250 | | TDotDot _ |
1251 | -> false | |
1252 | | tok when TH.is_binary_operator tok -> false | |
1253 | ||
1254 | | _ -> true | |
1255 | ) | |
ae4735db C |
1256 | ) |
1257 | || | |
34e49164 | 1258 | (col2 <= col1 && |
113803cf | 1259 | (match other.tok, restline2 with |
b1b2de81 | 1260 | | TCBrace _, _ when ctx =*= InFunction -> true |
113803cf C |
1261 | | Treturn _, _ -> true |
1262 | | Tif _, _ -> true | |
1263 | | Telse _, _ -> true | |
1264 | ||
1265 | (* case of label, usually put in first line *) | |
ae4735db | 1266 | | TIdent _, (PToken ({tok = TDotDot _}))::_ -> |
113803cf C |
1267 | true |
1268 | ||
34e49164 C |
1269 | |
1270 | | _ -> false | |
1271 | ) | |
1272 | ) | |
1273 | ||
1274 | in | |
ae4735db | 1275 | |
34e49164 | 1276 | if condition |
ae4735db | 1277 | then |
b1b2de81 | 1278 | if col1 =|= 0 then () |
34e49164 C |
1279 | else begin |
1280 | msg_macro_noptvirg s; | |
485bce71 | 1281 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
ae4735db | 1282 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1283 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1284 | end; |
1285 | ||
1286 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1287 | |
1288 | (* linuxext:? single macro | |
34e49164 C |
1289 | * ex: LOCK |
1290 | * foo(); | |
1291 | * UNLOCK | |
ae4735db | 1292 | * |
113803cf | 1293 | * todo: factorize code with previous rule ? |
34e49164 | 1294 | *) |
ae4735db | 1295 | | (Line |
34e49164 C |
1296 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1297 | ] as _line1 | |
1298 | )) | |
ae4735db | 1299 | ::(Line |
34e49164 C |
1300 | (PToken ({col = col2 } as other)::restline2 |
1301 | ) as line2) | |
ae4735db | 1302 | ::xs -> |
34e49164 | 1303 | (* when s ==~ regexp_macro *) |
ae4735db C |
1304 | |
1305 | let condition = | |
1306 | (col1 =|= col2 && | |
34e49164 C |
1307 | col1 <> 0 && (* otherwise can match typedef of fundecl*) |
1308 | (match other.tok with | |
ae4735db C |
1309 | | TPtVirg _ -> false |
1310 | | TOr _ -> false | |
34e49164 C |
1311 | | TCBrace _ when ctx <> InFunction -> false |
1312 | | tok when TH.is_binary_operator tok -> false | |
1313 | ||
1314 | | _ -> true | |
1315 | )) || | |
1316 | (col2 <= col1 && | |
1317 | (match other.tok with | |
b1b2de81 | 1318 | | TCBrace _ when ctx =*= InFunction -> true |
34e49164 C |
1319 | | Treturn _ -> true |
1320 | | Tif _ -> true | |
1321 | | Telse _ -> true | |
1322 | | _ -> false | |
1323 | )) | |
1324 | in | |
ae4735db | 1325 | |
34e49164 C |
1326 | if condition |
1327 | then begin | |
1328 | msg_macro_noptvirg_single s; | |
485bce71 | 1329 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
34e49164 C |
1330 | end; |
1331 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1332 | |
1333 | | x::xs -> | |
34e49164 C |
1334 | find_macro_lineparen xs |
1335 | ||
1336 | ||
485bce71 C |
1337 | |
1338 | (* ------------------------------------------------------------------------- *) | |
1339 | (* define tobrace init *) | |
1340 | (* ------------------------------------------------------------------------- *) | |
1341 | ||
ae4735db C |
1342 | let rec find_define_init_brace_paren xs = |
1343 | let rec aux xs = | |
485bce71 C |
1344 | match xs with |
1345 | | [] -> () | |
1346 | ||
1347 | (* mainly for firefox *) | |
1348 | | (PToken {tok = TDefine _}) | |
1349 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1350 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1351 | ::(PToken tok2) | |
1352 | ::(PToken tok3) | |
ae4735db | 1353 | ::xs -> |
485bce71 C |
1354 | let is_init = |
1355 | match tok2.tok, tok3.tok with | |
1356 | | TInt _, TComma _ -> true | |
1357 | | TString _, TComma _ -> true | |
1358 | | TIdent _, TComma _ -> true | |
1359 | | _ -> false | |
ae4735db | 1360 | |
485bce71 C |
1361 | in |
1362 | if is_init | |
ae4735db | 1363 | then begin |
485bce71 C |
1364 | pr2_cpp("found define initializer: " ^s); |
1365 | tokbrace.tok <- TOBraceDefineInit i1; | |
1366 | end; | |
1367 | ||
1368 | aux xs | |
1369 | ||
1370 | (* mainly for linux, especially in sound/ *) | |
1371 | | (PToken {tok = TDefine _}) | |
1372 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1373 | ::(Parenthised(xxx, info_parens)) | |
1374 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1375 | ::(PToken tok2) | |
1376 | ::(PToken tok3) | |
ae4735db | 1377 | ::xs -> |
485bce71 C |
1378 | let is_init = |
1379 | match tok2.tok, tok3.tok with | |
1380 | | TInt _, TComma _ -> true | |
1381 | | TDot _, TIdent _ -> true | |
1382 | | TIdent _, TComma _ -> true | |
1383 | | _ -> false | |
ae4735db | 1384 | |
485bce71 C |
1385 | in |
1386 | if is_init | |
ae4735db | 1387 | then begin |
485bce71 C |
1388 | pr2_cpp("found define initializer with param: " ^ s); |
1389 | tokbrace.tok <- TOBraceDefineInit i1; | |
1390 | end; | |
1391 | ||
1392 | aux xs | |
1393 | ||
ae4735db | 1394 | |
485bce71 C |
1395 | |
1396 | (* recurse *) | |
ae4735db C |
1397 | | (PToken x)::xs -> aux xs |
1398 | | (Parenthised (xxs, info_parens))::xs -> | |
485bce71 | 1399 | (* not need for tobrace init: |
ae4735db | 1400 | * xxs +> List.iter aux; |
485bce71 C |
1401 | *) |
1402 | aux xs | |
1403 | in | |
1404 | aux xs | |
1405 | ||
1406 | ||
34e49164 C |
1407 | (* ------------------------------------------------------------------------- *) |
1408 | (* action *) | |
1409 | (* ------------------------------------------------------------------------- *) | |
1410 | ||
708f4980 | 1411 | (* obsolete now with macro expansion ? get some regression if comment. |
ae4735db | 1412 | * todo: if do bad decision here, then it can influence other phases |
708f4980 | 1413 | * and make it hard to parse. So maybe when have a parse error, should |
ae4735db | 1414 | * undo some of the guess those heuristics have done, and restore |
708f4980 C |
1415 | * the original token value. |
1416 | *) | |
1417 | ||
34e49164 C |
1418 | let rec find_actions = function |
1419 | | [] -> () | |
1420 | ||
1421 | | PToken ({tok = TIdent (s,ii)}) | |
1422 | ::Parenthised (xxs,info_parens) | |
ae4735db | 1423 | ::xs -> |
34e49164 C |
1424 | find_actions xs; |
1425 | xxs +> List.iter find_actions; | |
1426 | let modified = find_actions_params xxs in | |
ae4735db | 1427 | if modified |
34e49164 | 1428 | then msg_macro_higher_order s |
ae4735db C |
1429 | |
1430 | | x::xs -> | |
34e49164 C |
1431 | find_actions xs |
1432 | ||
ae4735db C |
1433 | and find_actions_params xxs = |
1434 | xxs +> List.fold_left (fun acc xs -> | |
34e49164 | 1435 | let toks = tokens_of_paren xs in |
ae4735db C |
1436 | if toks +> List.exists (fun x -> TH.is_statement x.tok) |
1437 | (* undo: && List.length toks > 1 | |
708f4980 C |
1438 | * good for sparse, not good for linux |
1439 | *) | |
34e49164 | 1440 | then begin |
ae4735db | 1441 | xs +> iter_token_paren (fun x -> |
34e49164 | 1442 | if TH.is_eof x.tok |
ae4735db | 1443 | then |
34e49164 | 1444 | (* certainly because paren detection had a pb because of |
708f4980 C |
1445 | * some ifdef-exp. Do similar additional checking than |
1446 | * what is done in set_as_comment. | |
34e49164 | 1447 | *) |
708f4980 | 1448 | pr2 "PB: weird, I try to tag an EOF token as an action" |
ae4735db | 1449 | else |
708f4980 | 1450 | (* cf tests-bis/no_cpar_macro.c *) |
ae4735db C |
1451 | if TH.is_eom x.tok |
1452 | then | |
708f4980 | 1453 | pr2 "PB: weird, I try to tag an EOM token as an action" |
ae4735db | 1454 | else |
708f4980 | 1455 | x.tok <- TAction (TH.info_of_tok x.tok); |
34e49164 C |
1456 | ); |
1457 | true (* modified *) | |
1458 | end | |
1459 | else acc | |
1460 | ) false | |
1461 | ||
1462 | ||
1463 | ||
1464 | (* ------------------------------------------------------------------------- *) | |
1465 | (* main fix cpp function *) | |
1466 | (* ------------------------------------------------------------------------- *) | |
1467 | ||
ae4735db | 1468 | let filter_cpp_stuff xs = |
951c7801 C |
1469 | List.filter |
1470 | (function x -> | |
1471 | (match x.tok with | |
1472 | | tok when TH.is_comment tok -> false | |
34e49164 C |
1473 | (* don't want drop the define, or if drop, have to drop |
1474 | * also its body otherwise the line heuristics may be lost | |
1475 | * by not finding the TDefine in column 0 but by finding | |
1476 | * a TDefineIdent in a column > 0 | |
1477 | *) | |
951c7801 C |
1478 | | Parser_c.TDefine _ -> true |
1479 | | tok when TH.is_cpp_instruction tok -> false | |
1480 | | _ -> true | |
1481 | )) | |
1482 | xs | |
34e49164 C |
1483 | |
1484 | let insert_virtual_positions l = | |
1485 | let strlen x = String.length (Ast_c.str_of_info x) in | |
708f4980 C |
1486 | let rec loop prev offset acc = function |
1487 | [] -> List.rev acc | |
34e49164 C |
1488 | | x::xs -> |
1489 | let ii = TH.info_of_tok x in | |
1490 | let inject pi = | |
1491 | TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in | |
1492 | match Ast_c.pinfo_of_info ii with | |
1493 | Ast_c.OriginTok pi -> | |
1494 | let prev = Ast_c.parse_info_of_info ii in | |
ae4735db | 1495 | loop prev (strlen ii) (x::acc) xs |
34e49164 | 1496 | | Ast_c.ExpandedTok (pi,_) -> |
708f4980 | 1497 | let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in |
ae4735db | 1498 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 | 1499 | | Ast_c.FakeTok (s,_) -> |
708f4980 | 1500 | let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in |
ae4735db | 1501 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 C |
1502 | | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in |
1503 | let rec skip_fake = function | |
708f4980 | 1504 | | [] -> [] |
34e49164 C |
1505 | | x::xs -> |
1506 | let ii = TH.info_of_tok x in | |
1507 | match Ast_c.pinfo_of_info ii with | |
708f4980 | 1508 | | Ast_c.OriginTok pi -> |
34e49164 | 1509 | let prev = Ast_c.parse_info_of_info ii in |
708f4980 C |
1510 | let res = loop prev (strlen ii) [] xs in |
1511 | x::res | |
34e49164 | 1512 | | _ -> x::skip_fake xs in |
ae4735db | 1513 | skip_fake l |
708f4980 | 1514 | |
485bce71 | 1515 | (* ------------------------------------------------------------------------- *) |
f59c9fb7 | 1516 | |
ae4735db | 1517 | let fix_tokens_cpp2 ~macro_defs tokens = |
708f4980 | 1518 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in |
ae4735db C |
1519 | |
1520 | begin | |
34e49164 C |
1521 | (* the order is important, if you put the action heuristic first, |
1522 | * then because of ifdef, can have not closed paren | |
ae4735db C |
1523 | * and so may believe that higher order macro |
1524 | * and it will eat too much tokens. So important to do | |
34e49164 | 1525 | * first the ifdef. |
ae4735db | 1526 | * |
34e49164 C |
1527 | * I recompute multiple times cleaner cos the mutable |
1528 | * can have be changed and so may have more comments | |
1529 | * in the token original list. | |
ae4735db | 1530 | * |
34e49164 C |
1531 | *) |
1532 | ||
0708f913 C |
1533 | commentize_skip_start_to_end !tokens2; |
1534 | ||
34e49164 | 1535 | (* ifdef *) |
ae4735db C |
1536 | let cleaner = !tokens2 +> List.filter (fun x -> |
1537 | (* is_comment will also filter the TCommentCpp created in | |
0708f913 | 1538 | * commentize_skip_start_to_end *) |
34e49164 C |
1539 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1540 | ) in | |
708f4980 | 1541 | let ifdef_grouped = TV.mk_ifdef cleaner in |
485bce71 C |
1542 | set_ifdef_parenthize_info ifdef_grouped; |
1543 | ||
34e49164 C |
1544 | find_ifdef_funheaders ifdef_grouped; |
1545 | find_ifdef_bool ifdef_grouped; | |
1546 | find_ifdef_mid ifdef_grouped; | |
ae4735db | 1547 | (* change order ? maybe cparen_else heuristic make some of the funheaders |
708f4980 C |
1548 | * heuristics irrelevant ? |
1549 | *) | |
ae4735db | 1550 | find_ifdef_cparen_else ifdef_grouped; |
34e49164 C |
1551 | adjust_inifdef_include ifdef_grouped; |
1552 | ||
1553 | ||
1554 | (* macro 1 *) | |
1555 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1556 | ||
708f4980 C |
1557 | let paren_grouped = TV.mk_parenthised cleaner in |
1558 | Cpp_token_c.apply_macro_defs | |
ae4735db C |
1559 | ~msg_apply_known_macro |
1560 | ~msg_apply_known_macro_hint | |
708f4980 | 1561 | macro_defs paren_grouped; |
34e49164 | 1562 | (* because the before field is used by apply_macro_defs *) |
ae4735db | 1563 | tokens2 := TV.rebuild_tokens_extented !tokens2; |
34e49164 C |
1564 | |
1565 | (* tagging contextual info (InFunc, InStruct, etc). Better to do | |
1566 | * that after the "ifdef-simplification" phase. | |
1567 | *) | |
ae4735db | 1568 | let cleaner = !tokens2 +> List.filter (fun x -> |
34e49164 C |
1569 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1570 | ) in | |
1571 | ||
708f4980 | 1572 | let brace_grouped = TV.mk_braceised cleaner in |
34e49164 C |
1573 | set_context_tag brace_grouped; |
1574 | ||
34e49164 C |
1575 | (* macro *) |
1576 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1577 | ||
708f4980 C |
1578 | let paren_grouped = TV.mk_parenthised cleaner in |
1579 | let line_paren_grouped = TV.mk_line_parenthised paren_grouped in | |
485bce71 | 1580 | find_define_init_brace_paren paren_grouped; |
34e49164 C |
1581 | find_string_macro_paren paren_grouped; |
1582 | find_macro_lineparen line_paren_grouped; | |
1583 | find_macro_paren paren_grouped; | |
1584 | ||
1585 | ||
708f4980 | 1586 | (* obsolete: actions ? not yet *) |
34e49164 | 1587 | let cleaner = !tokens2 +> filter_cpp_stuff in |
708f4980 | 1588 | let paren_grouped = TV.mk_parenthised cleaner in |
34e49164 | 1589 | find_actions paren_grouped; |
ae4735db | 1590 | |
34e49164 C |
1591 | |
1592 | ||
708f4980 | 1593 | insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok)) |
34e49164 C |
1594 | end |
1595 | ||
ae4735db | 1596 | let time_hack1 ~macro_defs a = |
708f4980 | 1597 | Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a) |
34e49164 | 1598 | |
ae4735db | 1599 | let fix_tokens_cpp ~macro_defs a = |
708f4980 | 1600 | Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a) |
34e49164 | 1601 | |
34e49164 | 1602 | |
34e49164 | 1603 | |
34e49164 C |
1604 | |
1605 | (*****************************************************************************) | |
1606 | (* Lexing with lookahead *) | |
1607 | (*****************************************************************************) | |
1608 | ||
1609 | (* Why using yet another parsing_hack technique ? The fix_xxx where do | |
ae4735db | 1610 | * some pre-processing on the full list of tokens is not enough ? |
34e49164 C |
1611 | * No cos sometimes we need more contextual info, and even if |
1612 | * set_context() tries to give some contextual info, it's not completely | |
1613 | * accurate so the following code give yet another alternative, yet another | |
1614 | * chance to transform some tokens. | |
ae4735db | 1615 | * |
34e49164 C |
1616 | * todo?: maybe could try to get rid of this technique. Maybe a better |
1617 | * set_context() would make possible to move this code using a fix_xx | |
1618 | * technique. | |
ae4735db | 1619 | * |
485bce71 | 1620 | * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but |
34e49164 C |
1621 | * it is more general to do it via my LALR(k) tech. Because here we can |
1622 | * transform some token give some context information. So sometimes it | |
1623 | * makes sense to transform a token in one context, sometimes not, and | |
1624 | * lex can not provide us this context information. Note that the order | |
ae4735db C |
1625 | * in the pattern matching in lookahead is important. Do not cut/paste. |
1626 | * | |
34e49164 C |
1627 | * Note that in next there is only "clean" tokens, there is no comment |
1628 | * or space tokens. This is done by the caller. | |
ae4735db | 1629 | * |
34e49164 C |
1630 | *) |
1631 | ||
485bce71 C |
1632 | open Lexer_parser (* for the fields of lexer_hint type *) |
1633 | ||
1634 | let not_struct_enum = function | |
1635 | | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false | |
1636 | | _ -> true | |
34e49164 | 1637 | |
f59c9fb7 C |
1638 | let pointer = function |
1639 | TMul _ -> true | |
1640 | | TAnd _ when !Flag.c_plus_plus -> true | |
1641 | | _ -> false | |
485bce71 | 1642 | |
ae4735db | 1643 | let lookahead2 ~pass next before = |
34e49164 C |
1644 | |
1645 | match (next, before) with | |
1646 | ||
4dfbc1c2 C |
1647 | (* c++ hacks *) |
1648 | (* yy xx( and in function *) | |
1649 | | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_ | |
1650 | when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) -> | |
1651 | pr2_cpp("constructed_object: " ^s); | |
1652 | TOParCplusplusInit i1 | |
1653 | | TypedefIdent(s,i)::TOPar i1::_,_ | |
1654 | when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) -> | |
1655 | TIdent(s,i) | |
1656 | ||
34e49164 C |
1657 | (*-------------------------------------------------------------*) |
1658 | (* typedef inference, parse_typedef_fix3 *) | |
1659 | (*-------------------------------------------------------------*) | |
1660 | (* xx xx *) | |
b1b2de81 | 1661 | | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2 |
34e49164 C |
1662 | && ok_typedef s |
1663 | (* (take_safe 1 !passed_tok <> [TOPar]) -> *) | |
ae4735db | 1664 | -> |
34e49164 C |
1665 | (* parse_typedef_fix3: |
1666 | * acpi_object acpi_object; | |
ae4735db | 1667 | * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec. |
34e49164 C |
1668 | * Le parser en interne a deja appelé le prochain token pour pouvoir |
1669 | * decider des choses. | |
1670 | * => special case in lexer_heuristic, again | |
1671 | *) | |
ae4735db C |
1672 | if !Flag_parsing_c.debug_typedef |
1673 | then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s); | |
34e49164 C |
1674 | |
1675 | LP.disable_typedef(); | |
1676 | ||
4dfbc1c2 | 1677 | msg_typedef s 1; LP.add_typedef_root s; |
34e49164 C |
1678 | TypedefIdent (s, i1) |
1679 | ||
1680 | (* xx yy *) | |
ae4735db | 1681 | | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before |
34e49164 C |
1682 | && ok_typedef s |
1683 | -> | |
1684 | (* && not_annot s2 BUT lead to false positive*) | |
1685 | ||
4dfbc1c2 | 1686 | msg_typedef s 2; LP.add_typedef_root s; |
34e49164 C |
1687 | TypedefIdent (s, i1) |
1688 | ||
1689 | ||
1690 | (* xx inline *) | |
ae4735db | 1691 | | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before |
34e49164 | 1692 | && ok_typedef s |
ae4735db | 1693 | -> |
4dfbc1c2 | 1694 | msg_typedef s 3; LP.add_typedef_root s; |
34e49164 C |
1695 | TypedefIdent (s, i1) |
1696 | ||
1697 | ||
1698 | (* [,(] xx [,)] AND param decl *) | |
1699 | | (TIdent (s, i1)::(TComma _|TCPar _)::_ , (TComma _ |TOPar _)::_ ) | |
b1b2de81 | 1700 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
34e49164 | 1701 | && ok_typedef s |
ae4735db | 1702 | -> |
4dfbc1c2 | 1703 | msg_typedef s 4; LP.add_typedef_root s; |
34e49164 C |
1704 | TypedefIdent (s, i1) |
1705 | ||
1706 | (* xx* [,)] *) | |
1707 | (* specialcase: [,(] xx* [,)] *) | |
f59c9fb7 C |
1708 | | (TIdent (s, i1)::ptr::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) |
1709 | when pointer ptr && not_struct_enum before | |
34e49164 C |
1710 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1711 | && ok_typedef s | |
ae4735db | 1712 | -> |
4dfbc1c2 | 1713 | msg_typedef s 5; LP.add_typedef_root s; |
34e49164 C |
1714 | TypedefIdent (s, i1) |
1715 | ||
1716 | ||
1717 | (* xx** [,)] *) | |
1718 | (* specialcase: [,(] xx** [,)] *) | |
1719 | | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) | |
1720 | when not_struct_enum before | |
1721 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1722 | && ok_typedef s | |
ae4735db | 1723 | -> |
4dfbc1c2 | 1724 | msg_typedef s 6; LP.add_typedef_root s; |
34e49164 C |
1725 | TypedefIdent (s, i1) |
1726 | ||
1727 | ||
1728 | ||
1729 | (* xx const * USELESS because of next rule ? *) | |
ae4735db C |
1730 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ ) |
1731 | when not_struct_enum before | |
34e49164 C |
1732 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1733 | && ok_typedef s | |
1734 | -> | |
1735 | ||
4dfbc1c2 | 1736 | msg_typedef s 7; LP.add_typedef_root s; |
34e49164 | 1737 | TypedefIdent (s, i1) |
ae4735db | 1738 | |
34e49164 | 1739 | (* xx const *) |
ae4735db C |
1740 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ ) |
1741 | when not_struct_enum before | |
34e49164 C |
1742 | && ok_typedef s |
1743 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1744 | -> | |
1745 | ||
4dfbc1c2 | 1746 | msg_typedef s 8; LP.add_typedef_root s; |
34e49164 C |
1747 | TypedefIdent (s, i1) |
1748 | ||
1749 | ||
1750 | (* xx * const *) | |
f59c9fb7 C |
1751 | | (TIdent (s, i1)::ptr::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ ) |
1752 | when pointer ptr && not_struct_enum before | |
34e49164 C |
1753 | && ok_typedef s |
1754 | -> | |
1755 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1756 | ||
4dfbc1c2 | 1757 | msg_typedef s 9; LP.add_typedef_root s; |
34e49164 C |
1758 | TypedefIdent (s, i1) |
1759 | ||
1760 | ||
1761 | (* ( const xx) *) | |
485bce71 | 1762 | | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when |
34e49164 | 1763 | ok_typedef s -> |
4dfbc1c2 | 1764 | msg_typedef s 10; LP.add_typedef_root s; |
34e49164 | 1765 | TypedefIdent (s, i1) |
ae4735db | 1766 | |
34e49164 C |
1767 | |
1768 | ||
1769 | (* ( xx ) [sizeof, ~] *) | |
485bce71 | 1770 | | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ ) |
34e49164 C |
1771 | when not_struct_enum before |
1772 | && ok_typedef s | |
ae4735db | 1773 | -> |
4dfbc1c2 | 1774 | msg_typedef s 11; LP.add_typedef_root s; |
34e49164 C |
1775 | TypedefIdent (s, i1) |
1776 | ||
1777 | (* [(,] xx [ AND parameterdeclaration *) | |
1778 | | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_) | |
b1b2de81 | 1779 | when (LP.current_context() =*= LP.InParameter) |
34e49164 | 1780 | && ok_typedef s |
ae4735db | 1781 | -> |
4dfbc1c2 | 1782 | msg_typedef s 12; LP.add_typedef_root s; |
34e49164 | 1783 | TypedefIdent (s, i1) |
ae4735db | 1784 | |
34e49164 C |
1785 | (*------------------------------------------------------------*) |
1786 | (* if 'x*y' maybe an expr, maybe just a classic multiplication *) | |
1787 | (* but if have a '=', or ',' I think not *) | |
1788 | (*------------------------------------------------------------*) | |
1789 | ||
1790 | (* static xx * yy *) | |
f59c9fb7 | 1791 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ , |
485bce71 | 1792 | (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when |
f59c9fb7 | 1793 | pointer ptr && ok_typedef s |
34e49164 | 1794 | -> |
4dfbc1c2 | 1795 | msg_typedef s 13; LP.add_typedef_root s; |
34e49164 | 1796 | TypedefIdent (s, i1) |
ae4735db | 1797 | |
34e49164 C |
1798 | (* TODO xx * yy ; AND in start of compound element *) |
1799 | ||
1800 | ||
1801 | (* xx * yy, AND in paramdecl *) | |
f59c9fb7 | 1802 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _) |
b1b2de81 | 1803 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
f59c9fb7 | 1804 | && pointer ptr && ok_typedef s |
ae4735db | 1805 | -> |
34e49164 | 1806 | |
4dfbc1c2 | 1807 | msg_typedef s 14; LP.add_typedef_root s; |
34e49164 C |
1808 | TypedefIdent (s, i1) |
1809 | ||
1810 | ||
1811 | (* xx * yy ; AND in Toplevel, except when have = before *) | |
1812 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) -> | |
1813 | TIdent (s, i1) | |
f59c9fb7 C |
1814 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ , _) |
1815 | when not_struct_enum before && pointer ptr && | |
1816 | (LP.is_top_or_struct (LP.current_context ())) | |
ae4735db | 1817 | -> |
4dfbc1c2 | 1818 | msg_typedef s 15; LP.add_typedef_root s; |
34e49164 C |
1819 | TypedefIdent (s, i1) |
1820 | ||
1821 | (* xx * yy , AND in Toplevel *) | |
f59c9fb7 | 1822 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _) |
b1b2de81 | 1823 | when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel) |
f59c9fb7 | 1824 | && ok_typedef s && pointer ptr |
ae4735db | 1825 | -> |
34e49164 | 1826 | |
4dfbc1c2 | 1827 | msg_typedef s 16; LP.add_typedef_root s; |
34e49164 C |
1828 | TypedefIdent (s, i1) |
1829 | ||
1830 | (* xx * yy ( AND in Toplevel *) | |
f59c9fb7 | 1831 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOPar _::_ , _) |
ae4735db | 1832 | when not_struct_enum before |
485bce71 | 1833 | && (LP.is_top_or_struct (LP.current_context ())) |
f59c9fb7 | 1834 | && ok_typedef s && pointer ptr |
34e49164 | 1835 | -> |
4dfbc1c2 | 1836 | msg_typedef s 17; LP.add_typedef_root s; |
34e49164 | 1837 | TypedefIdent (s, i1) |
ae4735db | 1838 | |
34e49164 C |
1839 | (* xx * yy [ *) |
1840 | (* todo? enough ? cos in struct def we can have some expression ! *) | |
f59c9fb7 | 1841 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOCro _::_ , _) |
ae4735db | 1842 | when not_struct_enum before && |
485bce71 | 1843 | (LP.is_top_or_struct (LP.current_context ())) |
f59c9fb7 | 1844 | && ok_typedef s && pointer ptr |
ae4735db | 1845 | -> |
4dfbc1c2 | 1846 | msg_typedef s 18; LP.add_typedef_root s; |
34e49164 C |
1847 | TypedefIdent (s, i1) |
1848 | ||
1849 | (* u16: 10; in struct *) | |
1850 | | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_) | |
485bce71 | 1851 | when (LP.is_top_or_struct (LP.current_context ())) |
ae4735db C |
1852 | && ok_typedef s |
1853 | -> | |
4dfbc1c2 | 1854 | msg_typedef s 19; LP.add_typedef_root s; |
34e49164 | 1855 | TypedefIdent (s, i1) |
ae4735db | 1856 | |
34e49164 C |
1857 | |
1858 | (* why need TOPar condition as stated in preceding rule ? really needed ? *) | |
1859 | (* YES cos at toplevel can have some expression !! for instance when *) | |
1860 | (* enter in the dimension of an array *) | |
1861 | (* | |
1862 | | (TIdent s::TMul::TIdent s2::_ , _) | |
1863 | when (take_safe 1 !passed_tok <> [Tstruct] && | |
1864 | (take_safe 1 !passed_tok <> [Tenum])) | |
1865 | && | |
ae4735db | 1866 | !LP._lexer_hint = Some LP.Toplevel -> |
4dfbc1c2 | 1867 | msg_typedef s 20; LP.add_typedef_root s; |
34e49164 C |
1868 | TypedefIdent s |
1869 | *) | |
1870 | ||
1871 | (* xx * yy = *) | |
f59c9fb7 | 1872 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TEq _::_ , _) |
ae4735db | 1873 | when not_struct_enum before |
f59c9fb7 | 1874 | && ok_typedef s && pointer ptr |
34e49164 | 1875 | -> |
4dfbc1c2 | 1876 | msg_typedef s 21; LP.add_typedef_root s; |
34e49164 C |
1877 | TypedefIdent (s, i1) |
1878 | ||
1879 | ||
1880 | (* xx * yy) AND in paramdecl *) | |
f59c9fb7 | 1881 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TCPar _::_ , _) |
b1b2de81 | 1882 | when not_struct_enum before && (LP.current_context () =*= LP.InParameter) |
f59c9fb7 | 1883 | && ok_typedef s && pointer ptr |
34e49164 | 1884 | -> |
4dfbc1c2 | 1885 | msg_typedef s 22; LP.add_typedef_root s; |
34e49164 | 1886 | TypedefIdent (s, i1) |
ae4735db | 1887 | |
34e49164 C |
1888 | |
1889 | (* xx * yy; *) (* wrong ? *) | |
f59c9fb7 | 1890 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ , |
ae4735db | 1891 | (TOBrace _| TPtVirg _)::_) when not_struct_enum before |
f59c9fb7 | 1892 | && ok_typedef s & pointer ptr |
34e49164 | 1893 | -> |
4dfbc1c2 | 1894 | msg_typedef s 23; LP.add_typedef_root s; |
485bce71 | 1895 | msg_maybe_dangereous_typedef s; |
34e49164 C |
1896 | TypedefIdent (s, i1) |
1897 | ||
1898 | ||
1899 | (* xx * yy, and ';' before xx *) (* wrong ? *) | |
f59c9fb7 | 1900 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , |
34e49164 | 1901 | (TOBrace _| TPtVirg _)::_) when |
f59c9fb7 | 1902 | ok_typedef s && pointer ptr |
34e49164 | 1903 | -> |
4dfbc1c2 | 1904 | msg_typedef s 24; LP.add_typedef_root s; |
34e49164 C |
1905 | TypedefIdent (s, i1) |
1906 | ||
1907 | ||
1908 | (* xx_t * yy *) | |
f59c9fb7 | 1909 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ , _) |
ae4735db C |
1910 | when s ==~ regexp_typedef && not_struct_enum before |
1911 | (* struct user_info_t sometimes *) | |
f59c9fb7 | 1912 | && ok_typedef s && pointer ptr |
ae4735db | 1913 | -> |
4dfbc1c2 | 1914 | msg_typedef s 25; LP.add_typedef_root s; |
34e49164 C |
1915 | TypedefIdent (s, i1) |
1916 | ||
1917 | (* xx ** yy *) (* wrong ? *) | |
1918 | | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
1919 | when not_struct_enum before | |
1920 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
ae4735db | 1921 | && ok_typedef s |
34e49164 | 1922 | -> |
4dfbc1c2 | 1923 | msg_typedef s 26; LP.add_typedef_root s; |
34e49164 C |
1924 | TypedefIdent (s, i1) |
1925 | ||
1926 | (* xx *** yy *) | |
1927 | | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
ae4735db C |
1928 | when not_struct_enum before |
1929 | && ok_typedef s | |
34e49164 C |
1930 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1931 | -> | |
4dfbc1c2 | 1932 | msg_typedef s 27; LP.add_typedef_root s; |
34e49164 C |
1933 | TypedefIdent (s, i1) |
1934 | ||
1935 | (* xx ** ) *) | |
1936 | | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _) | |
ae4735db | 1937 | when not_struct_enum before |
34e49164 | 1938 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
ae4735db | 1939 | && ok_typedef s |
34e49164 | 1940 | -> |
4dfbc1c2 | 1941 | msg_typedef s 28; LP.add_typedef_root s; |
34e49164 C |
1942 | TypedefIdent (s, i1) |
1943 | ||
1944 | ||
1945 | ||
1946 | (* ----------------------------------- *) | |
ae4735db | 1947 | (* old: why not do like for other rules and start with TIdent ? |
485bce71 C |
1948 | * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ? |
1949 | * new: prefer now start with TIdent because otherwise the add_typedef_root | |
1950 | * may have no effect if in second pass or if have disable the add_typedef. | |
1951 | *) | |
34e49164 C |
1952 | |
1953 | (* (xx) yy *) | |
ae4735db C |
1954 | | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::_ , |
1955 | (TOPar info)::x::_) | |
34e49164 | 1956 | when not (TH.is_stuff_taking_parenthized x) && |
b1b2de81 | 1957 | Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 |
ae4735db C |
1958 | && ok_typedef s |
1959 | -> | |
34e49164 | 1960 | |
4dfbc1c2 | 1961 | msg_typedef s 29; LP.add_typedef_root s; |
485bce71 C |
1962 | (*TOPar info*) |
1963 | TypedefIdent (s, i1) | |
34e49164 C |
1964 | |
1965 | ||
ae4735db | 1966 | (* (xx) ( yy) |
91eba41f C |
1967 | * but false positif: typedef int (xxx_t)(...), so do specialisation below. |
1968 | *) | |
1969 | (* | |
ae4735db C |
1970 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_) |
1971 | when not (TH.is_stuff_taking_parenthized x) | |
1972 | && ok_typedef s | |
34e49164 | 1973 | -> |
4dfbc1c2 | 1974 | msg_typedef s 30; LP.add_typedef_root s; |
485bce71 C |
1975 | (* TOPar info *) |
1976 | TypedefIdent (s, i1) | |
91eba41f C |
1977 | *) |
1978 | (* special case: = (xx) ( yy) *) | |
ae4735db | 1979 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , |
91eba41f | 1980 | (TOPar info)::(TEq _ |TEqEq _)::_) |
ae4735db | 1981 | when ok_typedef s |
91eba41f | 1982 | -> |
4dfbc1c2 | 1983 | msg_typedef s 31; LP.add_typedef_root s; |
91eba41f C |
1984 | (* TOPar info *) |
1985 | TypedefIdent (s, i1) | |
1986 | ||
34e49164 C |
1987 | |
1988 | (* (xx * ) yy *) | |
f59c9fb7 C |
1989 | | (TIdent (s, i1)::ptr::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) |
1990 | when ok_typedef s && pointer ptr | |
ae4735db | 1991 | -> |
4dfbc1c2 | 1992 | msg_typedef s 32; LP.add_typedef_root s; |
485bce71 C |
1993 | (*TOPar info*) |
1994 | TypedefIdent (s,i1) | |
1995 | ||
34e49164 C |
1996 | |
1997 | (* (xx){ ... } constructor *) | |
ae4735db C |
1998 | | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_) |
1999 | when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x) | |
2000 | && ok_typedef s | |
34e49164 | 2001 | -> |
4dfbc1c2 | 2002 | msg_typedef s 33; LP.add_typedef_root s; |
34e49164 C |
2003 | TypedefIdent (s, i1) |
2004 | ||
2005 | ||
2006 | (* can have sizeof on expression | |
ae4735db | 2007 | | (Tsizeof::TOPar::TIdent s::TCPar::_, _) -> |
708f4980 | 2008 | msg_typedef s; LP.add_typedef_root s; |
34e49164 C |
2009 | Tsizeof |
2010 | *) | |
91eba41f C |
2011 | |
2012 | ||
2013 | (* ----------------------------------- *) | |
2014 | (* x ( *y )(params), function pointer *) | |
ae4735db | 2015 | | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
34e49164 | 2016 | when not_struct_enum before |
ae4735db | 2017 | && ok_typedef s |
34e49164 | 2018 | -> |
4dfbc1c2 | 2019 | msg_typedef s 34; LP.add_typedef_root s; |
34e49164 C |
2020 | TypedefIdent (s, i1) |
2021 | ||
91eba41f | 2022 | (* x* ( *y )(params), function pointer 2 *) |
ae4735db | 2023 | | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
91eba41f | 2024 | when not_struct_enum before |
ae4735db | 2025 | && ok_typedef s |
91eba41f | 2026 | -> |
4dfbc1c2 | 2027 | msg_typedef s 35; LP.add_typedef_root s; |
91eba41f C |
2028 | TypedefIdent (s, i1) |
2029 | ||
34e49164 C |
2030 | |
2031 | (*-------------------------------------------------------------*) | |
2032 | (* CPP *) | |
2033 | (*-------------------------------------------------------------*) | |
485bce71 C |
2034 | | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) | |
2035 | TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii)) | |
34e49164 | 2036 | as x) |
ae4735db C |
2037 | ::_, _ |
2038 | -> | |
485bce71 | 2039 | (* |
ae4735db | 2040 | if not !Flag_parsing_c.ifdef_to_if |
34e49164 | 2041 | then TCommentCpp (Ast_c.CppDirective, ii) |
ae4735db | 2042 | else |
485bce71 | 2043 | *) |
0708f913 C |
2044 | (* not !LP._lexer_hint.toplevel *) |
2045 | if !Flag_parsing_c.ifdef_directive_passing | |
708f4980 | 2046 | || (pass >= 2) |
0708f913 | 2047 | then begin |
ae4735db | 2048 | |
b1b2de81 | 2049 | if (LP.current_context () =*= LP.InInitializer) |
ae4735db | 2050 | then begin |
0708f913 C |
2051 | pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *) |
2052 | incr Stat.nIfdefInitializer; | |
ae4735db | 2053 | end else begin |
708f4980 | 2054 | pr2_cpp("IFDEF: or related inside function. I treat it as comment"); |
0708f913 C |
2055 | incr Stat.nIfdefPassing; |
2056 | end; | |
2057 | TCommentCpp (Token_c.CppDirective, ii) | |
2058 | end | |
2059 | else x | |
ae4735db | 2060 | |
3a314143 | 2061 | | (TUndef (ii) as x)::_, _ |
ae4735db | 2062 | -> |
708f4980 | 2063 | if (pass >= 2) |
485bce71 | 2064 | then begin |
0708f913 C |
2065 | pr2_cpp("UNDEF: I treat it as comment"); |
2066 | TCommentCpp (Token_c.CppDirective, ii) | |
113803cf C |
2067 | end |
2068 | else x | |
2069 | ||
ae4735db C |
2070 | | (TCppDirectiveOther (ii) as x)::_, _ |
2071 | -> | |
708f4980 | 2072 | if (pass >= 2) |
113803cf | 2073 | then begin |
0708f913 C |
2074 | pr2_cpp ("OTHER directive: I treat it as comment"); |
2075 | TCommentCpp (Token_c.CppDirective, ii) | |
485bce71 C |
2076 | end |
2077 | else x | |
34e49164 C |
2078 | |
2079 | (* If ident contain a for_each, then certainly a macro. But to be | |
2080 | * sure should look if there is a '{' after the ')', but it requires | |
2081 | * to count the '('. Because this can be expensive, we do that only | |
ae4735db | 2082 | * when the token contains "for_each". |
34e49164 | 2083 | *) |
ae4735db | 2084 | | (TIdent (s, i1)::TOPar _::rest, _) |
b1b2de81 | 2085 | when not (LP.current_context () =*= LP.InTopLevel) |
ae4735db C |
2086 | (* otherwise a function such as static void loopback_enable(int i) { |
2087 | * will be considered as a loop | |
34e49164 C |
2088 | *) |
2089 | -> | |
2090 | ||
ae4735db | 2091 | if s ==~ regexp_foreach && |
34e49164 | 2092 | is_really_foreach (Common.take_safe forLOOKAHEAD rest) |
ae4735db | 2093 | |
34e49164 C |
2094 | then begin |
2095 | msg_foreach s; | |
2096 | TMacroIterator (s, i1) | |
2097 | end | |
2098 | else TIdent (s, i1) | |
2099 | ||
2100 | ||
ae4735db | 2101 | |
34e49164 C |
2102 | (*-------------------------------------------------------------*) |
2103 | | v::xs, _ -> v | |
2104 | | _ -> raise Impossible | |
2105 | ||
ae4735db | 2106 | let lookahead ~pass a b = |
485bce71 | 2107 | Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b) |
34e49164 C |
2108 | |
2109 |