Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | ||
16 | open Common | |
17 | ||
ae4735db | 18 | module TH = Token_helpers |
708f4980 | 19 | module TV = Token_views_c |
34e49164 C |
20 | module LP = Lexer_parser |
21 | ||
485bce71 | 22 | module Stat = Parsing_stat |
34e49164 | 23 | |
ae4735db | 24 | open Parser_c |
34e49164 | 25 | |
ae4735db | 26 | open TV |
708f4980 | 27 | |
34e49164 C |
28 | (*****************************************************************************) |
29 | (* Some debugging functions *) | |
30 | (*****************************************************************************) | |
31 | ||
708f4980 | 32 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
113803cf | 33 | |
ae4735db | 34 | let pr2_cpp s = |
34e49164 C |
35 | if !Flag_parsing_c.debug_cpp |
36 | then Common.pr2_once ("CPP-" ^ s) | |
37 | ||
38 | ||
ae4735db | 39 | let msg_gen cond is_known printer s = |
34e49164 C |
40 | if cond |
41 | then | |
42 | if not (!Flag_parsing_c.filter_msg) | |
43 | then printer s | |
44 | else | |
45 | if not (is_known s) | |
46 | then printer s | |
ae4735db | 47 | |
34e49164 | 48 | |
485bce71 C |
49 | (* In the following, there are some harcoded names of types or macros |
50 | * but they are not used by our heuristics! They are just here to | |
51 | * enable to detect false positive by printing only the typedef/macros | |
52 | * that we don't know yet. If we print everything, then we can easily | |
53 | * get lost with too much verbose tracing information. So those | |
54 | * functions "filter" some messages. So our heuristics are still good, | |
55 | * there is no more (or not that much) hardcoded linux stuff. | |
34e49164 | 56 | *) |
485bce71 | 57 | |
ae4735db C |
58 | let is_known_typdef = |
59 | (fun s -> | |
34e49164 C |
60 | (match s with |
61 | | "u_char" | "u_short" | "u_int" | "u_long" | |
ae4735db C |
62 | | "u8" | "u16" | "u32" | "u64" |
63 | | "s8" | "s16" | "s32" | "s64" | |
64 | | "__u8" | "__u16" | "__u32" | "__u64" | |
34e49164 | 65 | -> true |
ae4735db C |
66 | |
67 | | "acpi_handle" | |
68 | | "acpi_status" | |
34e49164 C |
69 | -> true |
70 | ||
ae4735db C |
71 | | "FILE" |
72 | | "DIR" | |
34e49164 | 73 | -> true |
ae4735db | 74 | |
34e49164 | 75 | | s when s =~ ".*_t$" -> true |
ae4735db | 76 | | _ -> false |
34e49164 C |
77 | ) |
78 | ) | |
485bce71 | 79 | |
ae4735db C |
80 | (* note: cant use partial application with let msg_typedef = |
81 | * because it would compute msg_typedef at compile time when | |
485bce71 C |
82 | * the flag debug_typedef is always false |
83 | *) | |
97111a47 | 84 | let msg_typedef s ii n = |
485bce71 C |
85 | incr Stat.nTypedefInfer; |
86 | msg_gen (!Flag_parsing_c.debug_typedef) | |
87 | is_known_typdef | |
ae4735db | 88 | (fun s -> |
97111a47 C |
89 | pr2_cpp |
90 | (Printf.sprintf "TYPEDEF: promoting:(%d) %s on line %d" n s | |
91 | (Ast_c.line_of_info ii)) | |
92 | (*(Printf.sprintf "TYPEDEF: promoting: %s on line %d" s | |
93 | (Ast_c.line_of_info ii))*) | |
34e49164 C |
94 | ) |
95 | s | |
96 | ||
485bce71 C |
97 | let msg_maybe_dangereous_typedef s = |
98 | if not (is_known_typdef s) | |
ae4735db | 99 | then |
113803cf C |
100 | pr2 |
101 | ("PB MAYBE: dangerous typedef inference, maybe not a typedef: " ^ s) | |
34e49164 C |
102 | |
103 | ||
104 | ||
ae4735db | 105 | let msg_declare_macro s = |
485bce71 | 106 | incr Stat.nMacroDecl; |
34e49164 | 107 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
108 | (fun s -> |
109 | (match s with | |
34e49164 | 110 | | "DECLARE_MUTEX" | "DECLARE_COMPLETION" | "DECLARE_RWSEM" |
ae4735db | 111 | | "DECLARE_WAITQUEUE" | "DECLARE_WAIT_QUEUE_HEAD" |
34e49164 C |
112 | | "DEFINE_SPINLOCK" | "DEFINE_TIMER" |
113 | | "DEVICE_ATTR" | "CLASS_DEVICE_ATTR" | "DRIVER_ATTR" | |
114 | | "SENSOR_DEVICE_ATTR" | |
115 | | "LIST_HEAD" | |
116 | | "DECLARE_WORK" | "DECLARE_TASKLET" | |
117 | | "PORT_ATTR_RO" | "PORT_PMA_ATTR" | |
118 | | "DECLARE_BITMAP" | |
119 | ||
120 | -> true | |
121 | (* | |
122 | | s when s =~ "^DECLARE_.*" -> true | |
123 | | s when s =~ ".*_ATTR$" -> true | |
124 | | s when s =~ "^DEFINE_.*" -> true | |
125 | *) | |
126 | ||
127 | | _ -> false | |
128 | ) | |
129 | ) | |
130 | (fun s -> pr2_cpp ("MACRO: found declare-macro: " ^ s)) | |
131 | s | |
34e49164 | 132 | |
ae4735db C |
133 | |
134 | let msg_foreach s = | |
485bce71 | 135 | incr Stat.nIteratorHeuristic; |
34e49164 C |
136 | pr2_cpp ("MACRO: found foreach: " ^ s) |
137 | ||
138 | ||
ae4735db C |
139 | (* ?? |
140 | let msg_debug_macro s = | |
34e49164 | 141 | pr2_cpp ("MACRO: found debug-macro: " ^ s) |
485bce71 | 142 | *) |
34e49164 C |
143 | |
144 | ||
ae4735db | 145 | let msg_macro_noptvirg s = |
485bce71 | 146 | incr Stat.nMacroStmt; |
34e49164 C |
147 | pr2_cpp ("MACRO: found macro with param noptvirg: " ^ s) |
148 | ||
ae4735db | 149 | let msg_macro_toplevel_noptvirg s = |
485bce71 | 150 | incr Stat.nMacroStmt; |
34e49164 C |
151 | pr2_cpp ("MACRO: found toplevel macro noptvirg: " ^ s) |
152 | ||
ae4735db | 153 | let msg_macro_noptvirg_single s = |
485bce71 | 154 | incr Stat.nMacroStmt; |
34e49164 C |
155 | pr2_cpp ("MACRO: found single-macro noptvirg: " ^ s) |
156 | ||
157 | ||
485bce71 C |
158 | |
159 | ||
ae4735db | 160 | let msg_macro_higher_order s = |
485bce71 | 161 | incr Stat.nMacroHigherOrder; |
34e49164 | 162 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
163 | (fun s -> |
164 | (match s with | |
34e49164 C |
165 | | "DBGINFO" |
166 | | "DBGPX" | |
167 | | "DFLOW" | |
168 | -> true | |
169 | | _ -> false | |
170 | ) | |
171 | ) | |
172 | (fun s -> pr2_cpp ("MACRO: found higher ordre macro : " ^ s)) | |
173 | s | |
174 | ||
175 | ||
ae4735db | 176 | let msg_stringification s = |
485bce71 | 177 | incr Stat.nMacroString; |
34e49164 | 178 | msg_gen (!Flag_parsing_c.debug_cpp) |
ae4735db C |
179 | (fun s -> |
180 | (match s with | |
34e49164 C |
181 | | "REVISION" |
182 | | "UTS_RELEASE" | |
183 | | "SIZE_STR" | |
184 | | "DMA_STR" | |
185 | -> true | |
ae4735db | 186 | (* s when s =~ ".*STR.*" -> true *) |
34e49164 C |
187 | | _ -> false |
188 | ) | |
189 | ) | |
190 | (fun s -> pr2_cpp ("MACRO: found string-macro " ^ s)) | |
191 | s | |
192 | ||
485bce71 C |
193 | let msg_stringification_params s = |
194 | incr Stat.nMacroString; | |
195 | pr2_cpp ("MACRO: string-macro with params : " ^ s) | |
196 | ||
197 | ||
198 | ||
ae4735db | 199 | let msg_apply_known_macro s = |
485bce71 C |
200 | incr Stat.nMacroExpand; |
201 | pr2_cpp ("MACRO: found known macro = " ^ s) | |
202 | ||
ae4735db | 203 | let msg_apply_known_macro_hint s = |
485bce71 C |
204 | incr Stat.nMacroHint; |
205 | pr2_cpp ("MACRO: found known macro hint = " ^ s) | |
206 | ||
207 | ||
34e49164 | 208 | |
ae4735db C |
209 | |
210 | let msg_ifdef_bool_passing is_ifdef_positif = | |
485bce71 C |
211 | incr Stat.nIfdefZero; (* of Version ? *) |
212 | if is_ifdef_positif | |
213 | then pr2_cpp "commenting parts of a #if 1 or #if LINUX_VERSION" | |
214 | else pr2_cpp "commenting a #if 0 or #if LINUX_VERSION or __cplusplus" | |
215 | ||
216 | ||
217 | let msg_ifdef_mid_something () = | |
218 | incr Stat.nIfdefExprPassing; | |
219 | pr2_cpp "found ifdef-mid-something" | |
220 | ||
221 | let msg_ifdef_funheaders () = | |
222 | incr Stat.nIfdefFunheader; | |
223 | () | |
224 | ||
ae4735db | 225 | let msg_ifdef_cparen_else () = |
708f4980 C |
226 | incr Stat.nIfdefPassing; |
227 | pr2_cpp("found ifdef-cparen-else") | |
228 | ||
485bce71 | 229 | |
ae4735db | 230 | let msg_attribute s = |
485bce71 C |
231 | incr Stat.nMacroAttribute; |
232 | pr2_cpp("ATTR:" ^ s) | |
ae4735db | 233 | |
485bce71 C |
234 | |
235 | ||
34e49164 | 236 | (*****************************************************************************) |
485bce71 | 237 | (* The regexp and basic view definitions *) |
34e49164 C |
238 | (*****************************************************************************) |
239 | ||
240 | (* opti: better to built then once and for all, especially regexp_foreach *) | |
241 | ||
242 | let regexp_macro = Str.regexp | |
243 | "^[A-Z_][A-Z_0-9]*$" | |
244 | ||
245 | (* linuxext: *) | |
246 | let regexp_annot = Str.regexp | |
247 | "^__.*$" | |
248 | ||
249 | (* linuxext: *) | |
250 | let regexp_declare = Str.regexp | |
251 | ".*DECLARE.*" | |
252 | ||
253 | (* linuxext: *) | |
ae4735db | 254 | let regexp_foreach = Str.regexp_case_fold |
34e49164 C |
255 | ".*\\(for_?each\\|for_?all\\|iterate\\|loop\\|walk\\|scan\\|each\\|for\\)" |
256 | ||
257 | let regexp_typedef = Str.regexp | |
258 | ".*_t$" | |
259 | ||
34e49164 C |
260 | let false_typedef = [ |
261 | "printk"; | |
262 | ] | |
263 | ||
34e49164 | 264 | |
485bce71 C |
265 | let ok_typedef s = not (List.mem s false_typedef) |
266 | ||
ae4735db | 267 | let not_annot s = |
485bce71 C |
268 | not (s ==~ regexp_annot) |
269 | ||
270 | ||
34e49164 | 271 | |
34e49164 | 272 | |
485bce71 C |
273 | (*****************************************************************************) |
274 | (* Helpers *) | |
275 | (*****************************************************************************) | |
276 | ||
485bce71 | 277 | (* ------------------------------------------------------------------------- *) |
ae4735db C |
278 | (* the pair is the status of '()' and '{}', ex: (-1,0) |
279 | * if too much ')' and good '{}' | |
280 | * could do for [] too ? | |
485bce71 C |
281 | * could do for ',' if encounter ',' at "toplevel", not inside () or {} |
282 | * then if have ifdef, then certainly can lead to a problem. | |
283 | *) | |
708f4980 | 284 | let (count_open_close_stuff_ifdef_clause: TV.ifdef_grouped list -> (int * int))= |
ae4735db | 285 | fun xs -> |
485bce71 | 286 | let cnt_paren, cnt_brace = ref 0, ref 0 in |
ae4735db | 287 | xs +> TV.iter_token_ifdef (fun x -> |
485bce71 C |
288 | (match x.tok with |
289 | | x when TH.is_opar x -> incr cnt_paren | |
290 | | TOBrace _ -> incr cnt_brace | |
291 | | x when TH.is_cpar x -> decr cnt_paren | |
292 | | TCBrace _ -> decr cnt_brace | |
293 | | _ -> () | |
294 | ) | |
295 | ); | |
296 | !cnt_paren, !cnt_brace | |
297 | ||
298 | ||
299 | (* ------------------------------------------------------------------------- *) | |
300 | let forLOOKAHEAD = 30 | |
301 | ||
ae4735db | 302 | |
485bce71 | 303 | (* look if there is a '{' just after the closing ')', and handling the |
ae4735db C |
304 | * possibility to have nested expressions inside nested parenthesis |
305 | * | |
485bce71 C |
306 | * todo: use indentation instead of premier(statement) ? |
307 | *) | |
ae4735db | 308 | let rec is_really_foreach xs = |
485bce71 C |
309 | let rec is_foreach_aux = function |
310 | | [] -> false, [] | |
311 | | TCPar _::TOBrace _::xs -> true, xs | |
312 | (* the following attempts to handle the cases where there is a | |
313 | single statement in the body of the loop. undoubtedly more | |
ae4735db | 314 | cases are needed. |
485bce71 C |
315 | todo: premier(statement) - suivant(funcall) |
316 | *) | |
317 | | TCPar _::TIdent _::xs -> true, xs | |
318 | | TCPar _::Tif _::xs -> true, xs | |
319 | | TCPar _::Twhile _::xs -> true, xs | |
320 | | TCPar _::Tfor _::xs -> true, xs | |
321 | | TCPar _::Tswitch _::xs -> true, xs | |
322 | | TCPar _::Treturn _::xs -> true, xs | |
323 | ||
324 | ||
325 | | TCPar _::xs -> false, xs | |
ae4735db | 326 | | TOPar _::xs -> |
485bce71 C |
327 | let (_, xs') = is_foreach_aux xs in |
328 | is_foreach_aux xs' | |
329 | | x::xs -> is_foreach_aux xs | |
330 | in | |
331 | is_foreach_aux xs +> fst | |
332 | ||
333 | ||
334 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 335 | let set_ifdef_token_parenthize_info cnt x = |
485bce71 C |
336 | match x with |
337 | | TIfdef (tag, _) | |
338 | | TIfdefelse (tag, _) | |
339 | | TIfdefelif (tag, _) | |
340 | | TEndif (tag, _) | |
341 | ||
342 | | TIfdefBool (_, tag, _) | |
ae4735db | 343 | | TIfdefMisc (_, tag, _) |
485bce71 | 344 | | TIfdefVersion (_, tag, _) |
ae4735db | 345 | -> |
485bce71 C |
346 | tag := Some cnt; |
347 | ||
348 | | _ -> raise Impossible | |
485bce71 C |
349 | |
350 | ||
485bce71 | 351 | |
ae4735db | 352 | let ifdef_paren_cnt = ref 0 |
485bce71 | 353 | |
ae4735db C |
354 | |
355 | let rec set_ifdef_parenthize_info xs = | |
485bce71 C |
356 | xs +> List.iter (function |
357 | | NotIfdefLine xs -> () | |
ae4735db C |
358 | | Ifdefbool (_, xxs, info_ifdef) |
359 | | Ifdef (xxs, info_ifdef) -> | |
360 | ||
485bce71 C |
361 | incr ifdef_paren_cnt; |
362 | let total_directives = List.length info_ifdef in | |
363 | ||
ae4735db | 364 | info_ifdef +> List.iter (fun x -> |
485bce71 C |
365 | set_ifdef_token_parenthize_info (!ifdef_paren_cnt, total_directives) |
366 | x.tok); | |
367 | xxs +> List.iter set_ifdef_parenthize_info | |
368 | ) | |
369 | ||
370 | ||
978fd7e5 C |
371 | (*****************************************************************************) |
372 | (* The parsing hack for #define *) | |
373 | (*****************************************************************************) | |
374 | ||
ae4735db | 375 | (* To parse macro definitions I need to do some tricks |
978fd7e5 C |
376 | * as some information can be get only at the lexing level. For instance |
377 | * the space after the name of the macro in '#define foo (x)' is meaningful | |
378 | * but the grammar can not get this information. So define_ident below | |
379 | * look at such space and generate a special TOpardefine. In a similar | |
380 | * way macro definitions can contain some antislash and newlines | |
ae4735db C |
381 | * and the grammar need to know where the macro ends (which is |
382 | * a line-level and so low token-level information). Hence the | |
978fd7e5 | 383 | * function 'define_line' below and the TDefEol. |
ae4735db C |
384 | * |
385 | * update: TDefEol is handled in a special way at different places, | |
978fd7e5 C |
386 | * a little bit like EOF, especially for error recovery, so this |
387 | * is an important token that should not be retagged! | |
ae4735db C |
388 | * |
389 | * | |
390 | * ugly hack, a better solution perhaps would be to erase TDefEOL | |
391 | * from the Ast and list of tokens in parse_c. | |
392 | * | |
978fd7e5 | 393 | * note: I do a +1 somewhere, it's for the unparsing to correctly sync. |
ae4735db | 394 | * |
978fd7e5 C |
395 | * note: can't replace mark_end_define by simply a fakeInfo(). The reason |
396 | * is where is the \n TCommentSpace. Normally there is always a last token | |
397 | * to synchronize on, either EOF or the token of the next toplevel. | |
ae4735db | 398 | * In the case of the #define we got in list of token |
978fd7e5 C |
399 | * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will |
400 | * not synchronize on it and so we will not print the "\n". | |
401 | * A solution would be to put the TDefEOL before the "\n". | |
c491d8ee | 402 | * (jll: tried to do this, see the comment "Put end of line..." below) |
ae4735db C |
403 | * |
404 | * todo?: could put a ExpandedTok for that ? | |
978fd7e5 | 405 | *) |
ae4735db C |
406 | let mark_end_define ii = |
407 | let ii' = | |
408 | { Ast_c.pinfo = Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
409 | Common.str = ""; | |
978fd7e5 C |
410 | Common.charpos = Ast_c.pos_of_info ii + 1 |
411 | }; | |
412 | cocci_tag = ref Ast_c.emptyAnnot; | |
413 | comments_tag = ref Ast_c.emptyComments; | |
ae4735db | 414 | } |
978fd7e5 C |
415 | in |
416 | TDefEOL (ii') | |
417 | ||
418 | (* put the TDefEOL at the good place *) | |
ae4735db | 419 | let rec define_line_1 acc xs = |
978fd7e5 C |
420 | match xs with |
421 | | [] -> List.rev acc | |
422 | | TDefine ii::xs -> | |
423 | let line = Ast_c.line_of_info ii in | |
424 | let acc = (TDefine ii) :: acc in | |
425 | define_line_2 acc line ii xs | |
3a314143 C |
426 | | TUndef ii::xs -> |
427 | let line = Ast_c.line_of_info ii in | |
428 | let acc = (TUndef ii) :: acc in | |
429 | define_line_2 acc line ii xs | |
978fd7e5 C |
430 | | TCppEscapedNewline ii::xs -> |
431 | pr2 ("SUSPICIOUS: a \\ character appears outside of a #define at"); | |
432 | pr2 (Ast_c.strloc_of_info ii); | |
433 | let acc = (TCommentSpace ii) :: acc in | |
434 | define_line_1 acc xs | |
435 | | x::xs -> define_line_1 (x::acc) xs | |
436 | ||
ae4735db C |
437 | and define_line_2 acc line lastinfo xs = |
438 | match xs with | |
439 | | [] -> | |
978fd7e5 | 440 | (* should not happened, should meet EOF before *) |
ae4735db | 441 | pr2 "PB: WEIRD"; |
978fd7e5 | 442 | List.rev (mark_end_define lastinfo::acc) |
ae4735db | 443 | | x::xs -> |
978fd7e5 C |
444 | let line' = TH.line_of_tok x in |
445 | let info = TH.info_of_tok x in | |
446 | ||
447 | (match x with | |
ae4735db | 448 | | EOF ii -> |
978fd7e5 C |
449 | let acc = (mark_end_define lastinfo) :: acc in |
450 | let acc = (EOF ii) :: acc in | |
451 | define_line_1 acc xs | |
ae4735db | 452 | | TCppEscapedNewline ii -> |
978fd7e5 C |
453 | if (line' <> line) then pr2 "PB: WEIRD: not same line number"; |
454 | let acc = (TCommentSpace ii) :: acc in | |
455 | define_line_2 acc (line+1) info xs | |
ae4735db | 456 | | x -> |
978fd7e5 | 457 | if line' =|= line |
ae4735db | 458 | then define_line_2 (x::acc) line info xs |
c491d8ee C |
459 | else |
460 | (* Put end of line token before the newline. A newline at least | |
461 | must be there because the line changed and because we saw a | |
462 | #define previously to get to this function at all *) | |
463 | define_line_1 | |
464 | ((List.hd acc)::(mark_end_define lastinfo::(List.tl acc))) | |
465 | (x::xs) | |
978fd7e5 C |
466 | ) |
467 | ||
ae4735db | 468 | let rec define_ident acc xs = |
978fd7e5 C |
469 | match xs with |
470 | | [] -> List.rev acc | |
3a314143 C |
471 | | TUndef ii::xs -> |
472 | let acc = TUndef ii :: acc in | |
473 | (match xs with | |
474 | TCommentSpace i1::TIdent (s,i2)::xs -> | |
475 | let acc = (TCommentSpace i1) :: acc in | |
476 | let acc = (TIdentDefine (s,i2)) :: acc in | |
477 | define_ident acc xs | |
478 | | _ -> | |
479 | pr2 "WEIRD: weird #define body"; | |
480 | define_ident acc xs | |
481 | ) | |
ae4735db | 482 | | TDefine ii::xs -> |
978fd7e5 C |
483 | let acc = TDefine ii :: acc in |
484 | (match xs with | |
ae4735db | 485 | | TCommentSpace i1::TIdent (s,i2)::TOPar (i3)::xs -> |
978fd7e5 C |
486 | (* Change also the kind of TIdent to avoid bad interaction |
487 | * with other parsing_hack tricks. For instant if keep TIdent then | |
488 | * the stringication algo can believe the TIdent is a string-macro. | |
489 | * So simpler to change the kind of the ident too. | |
490 | *) | |
ae4735db | 491 | (* if TOParDefine sticked to the ident, then |
978fd7e5 C |
492 | * it's a macro-function. Change token to avoid ambiguity |
493 | * between #define foo(x) and #define foo (x) | |
494 | *) | |
495 | let acc = (TCommentSpace i1) :: acc in | |
496 | let acc = (TIdentDefine (s,i2)) :: acc in | |
497 | let acc = (TOParDefine i3) :: acc in | |
498 | define_ident acc xs | |
499 | ||
ae4735db | 500 | | TCommentSpace i1::TIdent (s,i2)::xs -> |
978fd7e5 C |
501 | let acc = (TCommentSpace i1) :: acc in |
502 | let acc = (TIdentDefine (s,i2)) :: acc in | |
503 | define_ident acc xs | |
504 | ||
505 | (* bugfix: ident of macro (as well as params, cf below) can be tricky | |
506 | * note, do we need to subst in the body of the define ? no cos | |
507 | * here the issue is the name of the macro, as in #define inline, | |
ae4735db | 508 | * so obviously the name of this macro will not be used in its |
978fd7e5 C |
509 | * body (it would be a recursive macro, which is forbidden). |
510 | *) | |
ae4735db C |
511 | |
512 | | TCommentSpace i1::t::xs -> | |
978fd7e5 C |
513 | |
514 | let s = TH.str_of_tok t in | |
515 | let ii = TH.info_of_tok t in | |
516 | if s ==~ Common.regexp_alpha | |
517 | then begin | |
518 | pr2 (spf "remapping: %s to an ident in macro name" s); | |
519 | let acc = (TCommentSpace i1) :: acc in | |
520 | let acc = (TIdentDefine (s,ii)) :: acc in | |
521 | define_ident acc xs | |
522 | end | |
523 | else begin | |
ae4735db | 524 | pr2 "WEIRD: weird #define body"; |
978fd7e5 C |
525 | define_ident acc xs |
526 | end | |
527 | ||
ae4735db C |
528 | | _ -> |
529 | pr2 "WEIRD: weird #define body"; | |
978fd7e5 C |
530 | define_ident acc xs |
531 | ) | |
532 | | x::xs -> | |
533 | let acc = x :: acc in | |
534 | define_ident acc xs | |
978fd7e5 C |
535 | |
536 | ||
ae4735db C |
537 | |
538 | let fix_tokens_define2 xs = | |
978fd7e5 C |
539 | define_ident [] (define_line_1 [] xs) |
540 | ||
ae4735db | 541 | let fix_tokens_define a = |
978fd7e5 | 542 | Common.profile_code "C parsing.fix_define" (fun () -> fix_tokens_define2 a) |
ae4735db | 543 | |
978fd7e5 C |
544 | |
545 | ||
546 | ||
547 | ||
548 | (* ------------------------------------------------------------------------- *) | |
549 | (* Other parsing hacks related to cpp, Include/Define hacks *) | |
550 | (* ------------------------------------------------------------------------- *) | |
551 | ||
552 | (* Sometimes I prefer to generate a single token for a list of things in the | |
553 | * lexer so that if I have to passed them, like for passing TInclude then | |
ae4735db C |
554 | * it's easy. Also if I don't do a single token, then I need to |
555 | * parse the rest which may not need special stuff, like detecting | |
978fd7e5 C |
556 | * end of line which the parser is not really ready for. So for instance |
557 | * could I parse a #include <a/b/c/xxx.h> as 2 or more tokens ? just | |
ae4735db | 558 | * lex #include ? so then need recognize <a/b/c/xxx.h> as one token ? |
978fd7e5 C |
559 | * but this kind of token is valid only after a #include and the |
560 | * lexing and parsing rules are different for such tokens so not that | |
561 | * easy to parse such things in parser_c.mly. Hence the following hacks. | |
ae4735db | 562 | * |
978fd7e5 C |
563 | * less?: maybe could get rid of this like I get rid of some of fix_define. |
564 | *) | |
565 | ||
566 | (* helpers *) | |
567 | ||
568 | (* used to generate new token from existing one *) | |
569 | let new_info posadd str ii = | |
ae4735db C |
570 | { Ast_c.pinfo = |
571 | Ast_c.OriginTok { (Ast_c.parse_info_of_info ii) with | |
978fd7e5 C |
572 | charpos = Ast_c.pos_of_info ii + posadd; |
573 | str = str; | |
574 | column = Ast_c.col_of_info ii + posadd; | |
575 | }; | |
576 | (* must generate a new ref each time, otherwise share *) | |
577 | cocci_tag = ref Ast_c.emptyAnnot; | |
578 | comments_tag = ref Ast_c.emptyComments; | |
579 | } | |
580 | ||
581 | ||
ae4735db | 582 | let rec comment_until_defeol xs = |
978fd7e5 | 583 | match xs with |
ae4735db | 584 | | [] -> |
978fd7e5 C |
585 | (* job not done in Cpp_token_c.define_parse ? *) |
586 | failwith "cant find end of define token TDefEOL" | |
ae4735db | 587 | | x::xs -> |
978fd7e5 | 588 | (match x with |
ae4735db | 589 | | Parser_c.TDefEOL i -> |
978fd7e5 C |
590 | Parser_c.TCommentCpp (Token_c.CppDirective, TH.info_of_tok x) |
591 | ::xs | |
ae4735db C |
592 | | _ -> |
593 | let x' = | |
978fd7e5 C |
594 | (* bugfix: otherwise may lose a TComment token *) |
595 | if TH.is_real_comment x | |
596 | then x | |
597 | else Parser_c.TCommentCpp (Token_c.CppPassingNormal (*good?*), TH.info_of_tok x) | |
598 | in | |
599 | x'::comment_until_defeol xs | |
600 | ) | |
601 | ||
ae4735db C |
602 | let drop_until_defeol xs = |
603 | List.tl | |
978fd7e5 C |
604 | (Common.drop_until (function Parser_c.TDefEOL _ -> true | _ -> false) xs) |
605 | ||
606 | ||
607 | ||
608 | (* ------------------------------------------------------------------------- *) | |
609 | (* returns a pair (replaced token, list of next tokens) *) | |
610 | (* ------------------------------------------------------------------------- *) | |
611 | ||
ae4735db C |
612 | let tokens_include (info, includes, filename, inifdef) = |
613 | Parser_c.TIncludeStart (Ast_c.rewrap_str includes info, inifdef), | |
614 | [Parser_c.TIncludeFilename | |
978fd7e5 C |
615 | (filename, (new_info (String.length includes) filename info)) |
616 | ] | |
617 | ||
618 | ||
619 | ||
620 | ||
485bce71 C |
621 | (*****************************************************************************) |
622 | (* CPP handling: macros, ifdefs, macros defs *) | |
623 | (*****************************************************************************) | |
624 | ||
0708f913 C |
625 | (* ------------------------------------------------------------------------- *) |
626 | (* special skip_start skip_end handling *) | |
627 | (* ------------------------------------------------------------------------- *) | |
628 | ||
629 | (* note: after this normally the token list should not contain any more the | |
630 | * TCommentSkipTagStart and End tokens. | |
631 | *) | |
632 | let rec commentize_skip_start_to_end xs = | |
633 | match xs with | |
634 | | [] -> () | |
ae4735db | 635 | | x::xs -> |
0708f913 | 636 | (match x with |
ae4735db C |
637 | | {tok = TCommentSkipTagStart info} -> |
638 | (try | |
639 | let (before, x2, after) = | |
0708f913 C |
640 | xs +> Common.split_when (function |
641 | | {tok = TCommentSkipTagEnd _ } -> true | |
ae4735db | 642 | | _ -> false |
0708f913 C |
643 | ) |
644 | in | |
645 | let topass = x::x2::before in | |
ae4735db | 646 | topass +> List.iter (fun tok -> |
0708f913 C |
647 | set_as_comment Token_c.CppPassingExplicit tok |
648 | ); | |
649 | commentize_skip_start_to_end after | |
ae4735db | 650 | with Not_found -> |
0708f913 C |
651 | failwith "could not find end of skip_start special comment" |
652 | ) | |
ae4735db | 653 | | {tok = TCommentSkipTagEnd info} -> |
0708f913 | 654 | failwith "found skip_end comment but no skip_start" |
ae4735db | 655 | | _ -> |
0708f913 C |
656 | commentize_skip_start_to_end xs |
657 | ) | |
ae4735db C |
658 | |
659 | ||
0708f913 C |
660 | |
661 | ||
34e49164 C |
662 | (* ------------------------------------------------------------------------- *) |
663 | (* ifdef keeping/passing *) | |
664 | (* ------------------------------------------------------------------------- *) | |
665 | ||
666 | (* #if 0, #if 1, #if LINUX_VERSION handling *) | |
ae4735db C |
667 | let rec find_ifdef_bool xs = |
668 | xs +> List.iter (function | |
34e49164 | 669 | | NotIfdefLine _ -> () |
ae4735db | 670 | | Ifdefbool (is_ifdef_positif, xxs, info_ifdef_stmt) -> |
485bce71 C |
671 | |
672 | msg_ifdef_bool_passing is_ifdef_positif; | |
34e49164 C |
673 | |
674 | (match xxs with | |
675 | | [] -> raise Impossible | |
ae4735db | 676 | | firstclause::xxs -> |
0708f913 | 677 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 678 | |
34e49164 | 679 | if is_ifdef_positif |
ae4735db | 680 | then xxs +> List.iter |
0708f913 | 681 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 | 682 | else begin |
0708f913 | 683 | firstclause +> iter_token_ifdef (set_as_comment Token_c.CppPassingNormal); |
34e49164 C |
684 | (match List.rev xxs with |
685 | (* keep only last *) | |
ae4735db C |
686 | | last::startxs -> |
687 | startxs +> List.iter | |
0708f913 | 688 | (iter_token_ifdef (set_as_comment Token_c.CppPassingNormal)) |
34e49164 C |
689 | | [] -> (* not #else *) () |
690 | ); | |
691 | end | |
692 | ); | |
ae4735db | 693 | |
34e49164 C |
694 | | Ifdef (xxs, info_ifdef_stmt) -> xxs +> List.iter find_ifdef_bool |
695 | ) | |
696 | ||
697 | ||
698 | ||
34e49164 C |
699 | let thresholdIfdefSizeMid = 6 |
700 | ||
701 | (* infer ifdef involving not-closed expressions/statements *) | |
ae4735db C |
702 | let rec find_ifdef_mid xs = |
703 | xs +> List.iter (function | |
34e49164 | 704 | | NotIfdefLine _ -> () |
ae4735db C |
705 | | Ifdef (xxs, info_ifdef_stmt) -> |
706 | (match xxs with | |
34e49164 C |
707 | | [] -> raise Impossible |
708 | | [first] -> () | |
ae4735db | 709 | | first::second::rest -> |
34e49164 | 710 | (* don't analyse big ifdef *) |
ae4735db C |
711 | if xxs +> List.for_all |
712 | (fun xs -> List.length xs <= thresholdIfdefSizeMid) && | |
34e49164 | 713 | (* don't want nested ifdef *) |
ae4735db C |
714 | xxs +> List.for_all (fun xs -> |
715 | xs +> List.for_all | |
34e49164 C |
716 | (function NotIfdefLine _ -> true | _ -> false) |
717 | ) | |
ae4735db C |
718 | |
719 | then | |
34e49164 | 720 | let counts = xxs +> List.map count_open_close_stuff_ifdef_clause in |
ae4735db C |
721 | let cnt1, cnt2 = List.hd counts in |
722 | if cnt1 <> 0 || cnt2 <> 0 && | |
b1b2de81 | 723 | counts +> List.for_all (fun x -> x =*= (cnt1, cnt2)) |
34e49164 | 724 | (* |
ae4735db C |
725 | if counts +> List.exists (fun (cnt1, cnt2) -> |
726 | cnt1 <> 0 || cnt2 <> 0 | |
727 | ) | |
34e49164 C |
728 | *) |
729 | then begin | |
485bce71 C |
730 | msg_ifdef_mid_something(); |
731 | ||
34e49164 | 732 | (* keep only first, treat the rest as comment *) |
0708f913 | 733 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
ae4735db | 734 | (second::rest) +> List.iter |
0708f913 | 735 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
34e49164 | 736 | end |
ae4735db | 737 | |
34e49164 C |
738 | ); |
739 | List.iter find_ifdef_mid xxs | |
ae4735db | 740 | |
34e49164 | 741 | (* no need complex analysis for ifdefbool *) |
ae4735db | 742 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
34e49164 | 743 | List.iter find_ifdef_mid xxs |
ae4735db C |
744 | |
745 | ||
34e49164 C |
746 | ) |
747 | ||
748 | ||
749 | let thresholdFunheaderLimit = 4 | |
750 | ||
751 | (* ifdef defining alternate function header, type *) | |
752 | let rec find_ifdef_funheaders = function | |
753 | | [] -> () | |
ae4735db | 754 | | NotIfdefLine _::xs -> find_ifdef_funheaders xs |
34e49164 C |
755 | |
756 | (* ifdef-funheader if ifdef with 2 lines and a '{' in next line *) | |
ae4735db | 757 | | Ifdef |
34e49164 C |
758 | ([(NotIfdefLine (({col = 0} as _xline1)::line1))::ifdefblock1; |
759 | (NotIfdefLine (({col = 0} as xline2)::line2))::ifdefblock2 | |
ae4735db | 760 | ], info_ifdef_stmt |
34e49164 C |
761 | ) |
762 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line3) | |
ae4735db | 763 | ::xs |
34e49164 C |
764 | when List.length ifdefblock1 <= thresholdFunheaderLimit && |
765 | List.length ifdefblock2 <= thresholdFunheaderLimit | |
ae4735db | 766 | -> |
34e49164 | 767 | find_ifdef_funheaders xs; |
485bce71 C |
768 | |
769 | msg_ifdef_funheaders (); | |
0708f913 | 770 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 771 | let all_toks = [xline2] @ line2 in |
0708f913 C |
772 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) ; |
773 | ifdefblock2 +> iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError); | |
34e49164 C |
774 | |
775 | (* ifdef with nested ifdef *) | |
ae4735db | 776 | | Ifdef |
34e49164 | 777 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
ae4735db | 778 | [Ifdef |
34e49164 C |
779 | ([[NotIfdefLine (({col = 0} as xline2)::line2)]; |
780 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
781 | ], info_ifdef_stmt2 | |
782 | ) | |
783 | ] | |
ae4735db | 784 | ], info_ifdef_stmt |
34e49164 C |
785 | ) |
786 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
787 | ::xs |
788 | -> | |
34e49164 | 789 | find_ifdef_funheaders xs; |
485bce71 C |
790 | |
791 | msg_ifdef_funheaders (); | |
0708f913 C |
792 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
793 | info_ifdef_stmt2 +> List.iter (set_as_comment Token_c.CppDirective); | |
34e49164 | 794 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 795 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError); |
34e49164 C |
796 | |
797 | (* ifdef with elseif *) | |
ae4735db | 798 | | Ifdef |
34e49164 C |
799 | ([[NotIfdefLine (({col = 0} as _xline1)::line1)]; |
800 | [NotIfdefLine (({col = 0} as xline2)::line2)]; | |
801 | [NotIfdefLine (({col = 0} as xline3)::line3)]; | |
ae4735db | 802 | ], info_ifdef_stmt |
34e49164 C |
803 | ) |
804 | ::NotIfdefLine (({tok = TOBrace i; col = 0})::line4) | |
ae4735db C |
805 | ::xs |
806 | -> | |
34e49164 | 807 | find_ifdef_funheaders xs; |
485bce71 C |
808 | |
809 | msg_ifdef_funheaders (); | |
0708f913 | 810 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); |
34e49164 | 811 | let all_toks = [xline2;xline3] @ line2 @ line3 in |
0708f913 | 812 | all_toks +> List.iter (set_as_comment Token_c.CppPassingCosWouldGetError) |
ae4735db | 813 | |
485bce71 | 814 | (* recurse *) |
ae4735db C |
815 | | Ifdef (xxs,info_ifdef_stmt)::xs |
816 | | Ifdefbool (_, xxs,info_ifdef_stmt)::xs -> | |
817 | List.iter find_ifdef_funheaders xxs; | |
34e49164 | 818 | find_ifdef_funheaders xs |
ae4735db | 819 | |
34e49164 C |
820 | |
821 | ||
485bce71 | 822 | (* ?? *) |
ae4735db C |
823 | let rec adjust_inifdef_include xs = |
824 | xs +> List.iter (function | |
34e49164 | 825 | | NotIfdefLine _ -> () |
ae4735db C |
826 | | Ifdef (xxs, info_ifdef_stmt) | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
827 | xxs +> List.iter (iter_token_ifdef (fun tokext -> | |
34e49164 | 828 | match tokext.tok with |
ae4735db | 829 | | Parser_c.TInclude (s1, s2, inifdef_ref, ii) -> |
34e49164 C |
830 | inifdef_ref := true; |
831 | | _ -> () | |
832 | )); | |
833 | ) | |
834 | ||
835 | ||
836 | ||
34e49164 | 837 | |
485bce71 | 838 | |
34e49164 C |
839 | |
840 | ||
ae4735db C |
841 | let rec find_ifdef_cparen_else xs = |
842 | let rec aux xs = | |
843 | xs +> List.iter (function | |
708f4980 | 844 | | NotIfdefLine _ -> () |
ae4735db C |
845 | | Ifdef (xxs, info_ifdef_stmt) -> |
846 | (match xxs with | |
708f4980 C |
847 | | [] -> raise Impossible |
848 | | [first] -> () | |
ae4735db | 849 | | first::second::rest -> |
34e49164 | 850 | |
708f4980 | 851 | (* found a closing ')' just after the #else *) |
34e49164 | 852 | |
708f4980 C |
853 | (* Too bad ocaml does not support better list pattern matching |
854 | * a la Prolog-III where can match the end of lists. | |
855 | *) | |
ae4735db C |
856 | let condition = |
857 | if List.length first = 0 then false | |
858 | else | |
708f4980 C |
859 | let last_line = Common.last first in |
860 | match last_line with | |
ae4735db C |
861 | | NotIfdefLine xs -> |
862 | if List.length xs = 0 then false | |
863 | else | |
708f4980 C |
864 | let last_tok = Common.last xs in |
865 | TH.is_cpar last_tok.tok | |
ae4735db | 866 | | Ifdef _ | Ifdefbool _ -> false |
708f4980 C |
867 | in |
868 | if condition then begin | |
869 | msg_ifdef_cparen_else(); | |
34e49164 | 870 | |
708f4980 C |
871 | (* keep only first, treat the rest as comment *) |
872 | info_ifdef_stmt +> List.iter (set_as_comment Token_c.CppDirective); | |
ae4735db | 873 | (second::rest) +> List.iter |
708f4980 C |
874 | (iter_token_ifdef (set_as_comment Token_c.CppPassingCosWouldGetError)); |
875 | end | |
ae4735db | 876 | |
708f4980 C |
877 | ); |
878 | List.iter aux xxs | |
ae4735db | 879 | |
708f4980 | 880 | (* no need complex analysis for ifdefbool *) |
ae4735db | 881 | | Ifdefbool (_, xxs, info_ifdef_stmt) -> |
708f4980 C |
882 | List.iter aux xxs |
883 | ) | |
884 | in aux xs | |
34e49164 C |
885 | |
886 | ||
708f4980 C |
887 | (* ------------------------------------------------------------------------- *) |
888 | (* cpp-builtin part2, macro, using standard.h or other defs *) | |
889 | (* ------------------------------------------------------------------------- *) | |
34e49164 | 890 | |
ae4735db | 891 | (* now in cpp_token_c.ml *) |
34e49164 C |
892 | |
893 | (* ------------------------------------------------------------------------- *) | |
894 | (* stringification *) | |
895 | (* ------------------------------------------------------------------------- *) | |
896 | ||
ae4735db | 897 | let rec find_string_macro_paren xs = |
34e49164 C |
898 | match xs with |
899 | | [] -> () | |
ae4735db C |
900 | | Parenthised(xxs, info_parens)::xs -> |
901 | xxs +> List.iter (fun xs -> | |
902 | if xs +> List.exists | |
485bce71 | 903 | (function PToken({tok = (TString _| TMacroString _)}) -> true | _ -> false) && |
ae4735db C |
904 | xs +> List.for_all |
905 | (function PToken({tok = (TString _| TMacroString _)}) | PToken({tok = TIdent _}) -> | |
34e49164 C |
906 | true | _ -> false) |
907 | then | |
ae4735db | 908 | xs +> List.iter (fun tok -> |
34e49164 | 909 | match tok with |
ae4735db | 910 | | PToken({tok = TIdent (s,_)} as id) -> |
34e49164 | 911 | msg_stringification s; |
485bce71 | 912 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
913 | | _ -> () |
914 | ) | |
ae4735db | 915 | else |
34e49164 C |
916 | find_string_macro_paren xs |
917 | ); | |
918 | find_string_macro_paren xs | |
ae4735db | 919 | | PToken(tok)::xs -> |
34e49164 | 920 | find_string_macro_paren xs |
ae4735db | 921 | |
34e49164 C |
922 | |
923 | (* ------------------------------------------------------------------------- *) | |
924 | (* macro2 *) | |
925 | (* ------------------------------------------------------------------------- *) | |
926 | ||
927 | (* don't forget to recurse in each case *) | |
ae4735db | 928 | let rec find_macro_paren xs = |
34e49164 C |
929 | match xs with |
930 | | [] -> () | |
ae4735db | 931 | |
34e49164 C |
932 | (* attribute *) |
933 | | PToken ({tok = Tattribute _} as id) | |
934 | ::Parenthised (xxs,info_parens) | |
935 | ::xs | |
ae4735db | 936 | -> |
34e49164 | 937 | pr2_cpp ("MACRO: __attribute detected "); |
ae4735db | 938 | [Parenthised (xxs, info_parens)] +> |
0708f913 C |
939 | iter_token_paren (set_as_comment Token_c.CppAttr); |
940 | set_as_comment Token_c.CppAttr id; | |
34e49164 C |
941 | find_macro_paren xs |
942 | ||
978fd7e5 C |
943 | | PToken ({tok = TattributeNoarg _} as id) |
944 | ::xs | |
ae4735db | 945 | -> |
978fd7e5 C |
946 | pr2_cpp ("MACRO: __attributenoarg detected "); |
947 | set_as_comment Token_c.CppAttr id; | |
948 | find_macro_paren xs | |
949 | ||
485bce71 | 950 | (* |
708f4980 | 951 | (* attribute cpp, __xxx id *) |
485bce71 | 952 | | PToken ({tok = TIdent (s,i1)} as id) |
708f4980 | 953 | ::PToken ({tok = TIdent (s2, i2)} as id2) |
485bce71 | 954 | ::xs when s ==~ regexp_annot |
ae4735db | 955 | -> |
485bce71 C |
956 | msg_attribute s; |
957 | id.tok <- TMacroAttr (s, i1); | |
708f4980 | 958 | find_macro_paren ((PToken id2)::xs); (* recurse also on id2 ? *) |
485bce71 | 959 | |
708f4980 C |
960 | (* attribute cpp, id __xxx *) |
961 | | PToken ({tok = TIdent (s,i1)} as _id) | |
962 | ::PToken ({tok = TIdent (s2, i2)} as id2) | |
963 | ::xs when s2 ==~ regexp_annot && (not (s ==~ regexp_typedef)) | |
ae4735db | 964 | -> |
485bce71 | 965 | msg_attribute s2; |
708f4980 C |
966 | id2.tok <- TMacroAttr (s2, i2); |
967 | find_macro_paren xs | |
968 | ||
969 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
970 | ::PToken ({tok = TIdent (s,i1)} as attr) | |
971 | ::xs when s ==~ regexp_annot | |
ae4735db | 972 | -> |
708f4980 C |
973 | pr2_cpp ("storage attribute: " ^ s); |
974 | attr.tok <- TMacroAttrStorage (s,i1); | |
975 | (* recurse, may have other storage attributes *) | |
976 | find_macro_paren (PToken (tok1)::xs) | |
ae4735db | 977 | |
485bce71 C |
978 | *) |
979 | ||
980 | (* storage attribute *) | |
981 | | PToken ({tok = (Tstatic _ | Textern _)} as tok1) | |
ae4735db C |
982 | ::PToken ({tok = TMacroAttr (s,i1)} as attr)::xs |
983 | -> | |
485bce71 C |
984 | pr2_cpp ("storage attribute: " ^ s); |
985 | attr.tok <- TMacroAttrStorage (s,i1); | |
986 | (* recurse, may have other storage attributes *) | |
987 | find_macro_paren (PToken (tok1)::xs) | |
708f4980 | 988 | |
485bce71 | 989 | |
34e49164 | 990 | (* stringification |
ae4735db | 991 | * |
34e49164 | 992 | * the order of the matching clause is important |
ae4735db | 993 | * |
34e49164 C |
994 | *) |
995 | ||
996 | (* string macro with params, before case *) | |
485bce71 | 997 | | PToken ({tok = (TString _| TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
34e49164 | 998 | ::Parenthised (xxs, info_parens) |
ae4735db | 999 | ::xs -> |
485bce71 C |
1000 | |
1001 | msg_stringification_params s; | |
1002 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 1003 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1004 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1005 | find_macro_paren xs |
1006 | ||
1007 | (* after case *) | |
1008 | | PToken ({tok = TIdent (s,_)} as id) | |
1009 | ::Parenthised (xxs, info_parens) | |
485bce71 | 1010 | ::PToken ({tok = (TString _ | TMacroString _)}) |
ae4735db | 1011 | ::xs -> |
485bce71 C |
1012 | |
1013 | msg_stringification_params s; | |
1014 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); | |
ae4735db | 1015 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1016 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1017 | find_macro_paren xs |
1018 | ||
1019 | ||
1020 | (* for the case where the string is not inside a funcall, but | |
1021 | * for instance in an initializer. | |
1022 | *) | |
ae4735db | 1023 | |
34e49164 | 1024 | (* string macro variable, before case *) |
485bce71 | 1025 | | PToken ({tok = (TString _ | TMacroString _)})::PToken ({tok = TIdent (s,_)} as id) |
ae4735db | 1026 | ::xs -> |
485bce71 | 1027 | |
34e49164 | 1028 | msg_stringification s; |
485bce71 | 1029 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1030 | find_macro_paren xs |
1031 | ||
1032 | (* after case *) | |
485bce71 C |
1033 | | PToken ({tok = TIdent (s,_)} as id) |
1034 | ::PToken ({tok = (TString _ | TMacroString _)}) | |
ae4735db | 1035 | ::xs -> |
485bce71 | 1036 | |
34e49164 | 1037 | msg_stringification s; |
485bce71 | 1038 | id.tok <- TMacroString (s, TH.info_of_tok id.tok); |
34e49164 C |
1039 | find_macro_paren xs |
1040 | ||
1041 | ||
ae4735db | 1042 | |
34e49164 C |
1043 | |
1044 | ||
1045 | (* recurse *) | |
ae4735db C |
1046 | | (PToken x)::xs -> find_macro_paren xs |
1047 | | (Parenthised (xxs, info_parens))::xs -> | |
34e49164 C |
1048 | xxs +> List.iter find_macro_paren; |
1049 | find_macro_paren xs | |
1050 | ||
1051 | ||
1052 | ||
1053 | ||
1054 | ||
1055 | (* don't forget to recurse in each case *) | |
ae4735db | 1056 | let rec find_macro_lineparen xs = |
34e49164 C |
1057 | match xs with |
1058 | | [] -> () | |
1059 | ||
1060 | (* linuxext: ex: static [const] DEVICE_ATTR(); *) | |
ae4735db | 1061 | | (Line |
34e49164 C |
1062 | ( |
1063 | [PToken ({tok = Tstatic _}); | |
1064 | PToken ({tok = TIdent (s,_)} as macro); | |
1065 | Parenthised (xxs,info_parens); | |
1066 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1067 | ] |
34e49164 | 1068 | )) |
ae4735db C |
1069 | ::xs |
1070 | when (s ==~ regexp_macro) -> | |
485bce71 | 1071 | |
34e49164 C |
1072 | msg_declare_macro s; |
1073 | let info = TH.info_of_tok macro.tok in | |
1074 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1075 | ||
1076 | find_macro_lineparen (xs) | |
1077 | ||
1078 | (* the static const case *) | |
ae4735db | 1079 | | (Line |
34e49164 C |
1080 | ( |
1081 | [PToken ({tok = Tstatic _}); | |
1082 | PToken ({tok = Tconst _} as const); | |
1083 | PToken ({tok = TIdent (s,_)} as macro); | |
1084 | Parenthised (xxs,info_parens); | |
1085 | PToken ({tok = TPtVirg _}); | |
ae4735db | 1086 | ] |
34e49164 C |
1087 | (*as line1*) |
1088 | ||
1089 | )) | |
ae4735db C |
1090 | ::xs |
1091 | when (s ==~ regexp_macro) -> | |
485bce71 | 1092 | |
34e49164 C |
1093 | msg_declare_macro s; |
1094 | let info = TH.info_of_tok macro.tok in | |
1095 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
ae4735db C |
1096 | |
1097 | (* need retag this const, otherwise ambiguity in grammar | |
34e49164 C |
1098 | 21: shift/reduce conflict (shift 121, reduce 137) on Tconst |
1099 | decl2 : Tstatic . TMacroDecl TOPar argument_list TCPar ... | |
1100 | decl2 : Tstatic . Tconst TMacroDecl TOPar argument_list TCPar ... | |
1101 | storage_class_spec : Tstatic . (137) | |
1102 | *) | |
1103 | const.tok <- TMacroDeclConst (TH.info_of_tok const.tok); | |
1104 | ||
1105 | find_macro_lineparen (xs) | |
1106 | ||
1107 | ||
1108 | (* same but without trailing ';' | |
ae4735db | 1109 | * |
34e49164 C |
1110 | * I do not put the final ';' because it can be on a multiline and |
1111 | * because of the way mk_line is coded, we will not have access to | |
1112 | * this ';' on the next line, even if next to the ')' *) | |
ae4735db | 1113 | | (Line |
34e49164 C |
1114 | ([PToken ({tok = Tstatic _}); |
1115 | PToken ({tok = TIdent (s,_)} as macro); | |
1116 | Parenthised (xxs,info_parens); | |
ae4735db | 1117 | ] |
34e49164 | 1118 | )) |
ae4735db C |
1119 | ::xs |
1120 | when s ==~ regexp_macro -> | |
34e49164 C |
1121 | |
1122 | msg_declare_macro s; | |
1123 | let info = TH.info_of_tok macro.tok in | |
1124 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1125 | ||
1126 | find_macro_lineparen (xs) | |
1127 | ||
1128 | ||
1129 | ||
1130 | ||
1131 | (* on multiple lines *) | |
ae4735db | 1132 | | (Line |
34e49164 C |
1133 | ( |
1134 | (PToken ({tok = Tstatic _})::[] | |
1135 | ))) | |
ae4735db | 1136 | ::(Line |
34e49164 C |
1137 | ( |
1138 | [PToken ({tok = TIdent (s,_)} as macro); | |
1139 | Parenthised (xxs,info_parens); | |
1140 | PToken ({tok = TPtVirg _}); | |
1141 | ] | |
ae4735db | 1142 | ) |
34e49164 | 1143 | ) |
ae4735db C |
1144 | ::xs |
1145 | when (s ==~ regexp_macro) -> | |
485bce71 | 1146 | |
34e49164 C |
1147 | msg_declare_macro s; |
1148 | let info = TH.info_of_tok macro.tok in | |
1149 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1150 | ||
1151 | find_macro_lineparen (xs) | |
1152 | ||
1153 | ||
ae4735db C |
1154 | (* linuxext: ex: DECLARE_BITMAP(); |
1155 | * | |
34e49164 C |
1156 | * Here I use regexp_declare and not regexp_macro because |
1157 | * Sometimes it can be a FunCallMacro such as DEBUG(foo()); | |
1158 | * Here we don't have the preceding 'static' so only way to | |
1159 | * not have positive is to restrict to .*DECLARE.* macros. | |
1160 | * | |
1161 | * but there is a grammar rule for that, so don't need this case anymore | |
0708f913 | 1162 | * unless the parameter of the DECLARE_xxx are weird and can not be mapped |
34e49164 C |
1163 | * on a argument_list |
1164 | *) | |
ae4735db C |
1165 | |
1166 | | (Line | |
34e49164 C |
1167 | ([PToken ({tok = TIdent (s,_)} as macro); |
1168 | Parenthised (xxs,info_parens); | |
1169 | PToken ({tok = TPtVirg _}); | |
1170 | ] | |
1171 | )) | |
ae4735db C |
1172 | ::xs |
1173 | when (s ==~ regexp_declare) -> | |
34e49164 C |
1174 | |
1175 | msg_declare_macro s; | |
1176 | let info = TH.info_of_tok macro.tok in | |
1177 | macro.tok <- TMacroDecl (Ast_c.str_of_info info, info); | |
1178 | ||
1179 | find_macro_lineparen (xs) | |
1180 | ||
ae4735db | 1181 | |
34e49164 C |
1182 | (* toplevel macros. |
1183 | * module_init(xxx) | |
ae4735db | 1184 | * |
34e49164 C |
1185 | * Could also transform the TIdent in a TMacroTop but can have false |
1186 | * positive, so easier to just change the TCPar and so just solve | |
1187 | * the end-of-stream pb of ocamlyacc | |
1188 | *) | |
ae4735db | 1189 | | (Line |
34e49164 C |
1190 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as _macro); |
1191 | Parenthised (xxs,info_parens); | |
1192 | ] as _line1 | |
1193 | )) | |
b1b2de81 | 1194 | ::xs when col1 =|= 0 |
ae4735db C |
1195 | -> |
1196 | let condition = | |
34e49164 C |
1197 | (* to reduce number of false positive *) |
1198 | (match xs with | |
ae4735db | 1199 | | (Line (PToken ({col = col2 } as other)::restline2))::_ -> |
b1b2de81 | 1200 | TH.is_eof other.tok || (col2 =|= 0 && |
34e49164 C |
1201 | (match other.tok with |
1202 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1203 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1204 | | TPtVirg _ |
34e49164 C |
1205 | | TDotDot _ |
1206 | -> false | |
1207 | | tok when TH.is_binary_operator tok -> false | |
ae4735db | 1208 | |
34e49164 C |
1209 | | _ -> true |
1210 | ) | |
1211 | ) | |
1212 | | _ -> false | |
1213 | ) | |
1214 | in | |
1215 | if condition | |
1216 | then begin | |
485bce71 | 1217 | |
34e49164 C |
1218 | msg_macro_toplevel_noptvirg s; |
1219 | (* just to avoid the end-of-stream pb of ocamlyacc *) | |
1220 | let tcpar = Common.last info_parens in | |
1221 | tcpar.tok <- TCParEOL (TH.info_of_tok tcpar.tok); | |
ae4735db | 1222 | |
34e49164 | 1223 | (*macro.tok <- TMacroTop (s, TH.info_of_tok macro.tok);*) |
ae4735db | 1224 | |
34e49164 C |
1225 | end; |
1226 | ||
1227 | find_macro_lineparen (xs) | |
1228 | ||
1229 | ||
1230 | ||
ae4735db | 1231 | (* macro with parameters |
34e49164 C |
1232 | * ex: DEBUG() |
1233 | * return x; | |
1234 | *) | |
ae4735db | 1235 | | (Line |
34e49164 C |
1236 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1237 | Parenthised (xxs,info_parens); | |
1238 | ] as _line1 | |
1239 | )) | |
ae4735db | 1240 | ::(Line |
34e49164 C |
1241 | (PToken ({col = col2 } as other)::restline2 |
1242 | ) as line2) | |
ae4735db | 1243 | ::xs |
34e49164 | 1244 | (* when s ==~ regexp_macro *) |
ae4735db | 1245 | -> |
97111a47 C |
1246 | (* This can give a false positive for K&R functions if the function |
1247 | name is in the same column as the first parameter declaration. *) | |
ae4735db C |
1248 | let condition = |
1249 | (col1 =|= col2 && | |
34e49164 C |
1250 | (match other.tok with |
1251 | | TOBrace _ -> false (* otherwise would match funcdecl *) | |
1252 | | TCBrace _ when ctx <> InFunction -> false | |
ae4735db | 1253 | | TPtVirg _ |
34e49164 C |
1254 | | TDotDot _ |
1255 | -> false | |
1256 | | tok when TH.is_binary_operator tok -> false | |
1257 | ||
1258 | | _ -> true | |
1259 | ) | |
ae4735db C |
1260 | ) |
1261 | || | |
34e49164 | 1262 | (col2 <= col1 && |
113803cf | 1263 | (match other.tok, restline2 with |
b1b2de81 | 1264 | | TCBrace _, _ when ctx =*= InFunction -> true |
113803cf C |
1265 | | Treturn _, _ -> true |
1266 | | Tif _, _ -> true | |
1267 | | Telse _, _ -> true | |
1268 | ||
1269 | (* case of label, usually put in first line *) | |
ae4735db | 1270 | | TIdent _, (PToken ({tok = TDotDot _}))::_ -> |
113803cf C |
1271 | true |
1272 | ||
34e49164 C |
1273 | |
1274 | | _ -> false | |
1275 | ) | |
1276 | ) | |
1277 | ||
1278 | in | |
ae4735db | 1279 | |
34e49164 | 1280 | if condition |
ae4735db | 1281 | then |
b1b2de81 | 1282 | if col1 =|= 0 then () |
34e49164 C |
1283 | else begin |
1284 | msg_macro_noptvirg s; | |
485bce71 | 1285 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
ae4735db | 1286 | [Parenthised (xxs, info_parens)] +> |
0708f913 | 1287 | iter_token_paren (set_as_comment Token_c.CppMacro); |
34e49164 C |
1288 | end; |
1289 | ||
1290 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1291 | |
1292 | (* linuxext:? single macro | |
34e49164 C |
1293 | * ex: LOCK |
1294 | * foo(); | |
1295 | * UNLOCK | |
ae4735db | 1296 | * |
113803cf | 1297 | * todo: factorize code with previous rule ? |
34e49164 | 1298 | *) |
ae4735db | 1299 | | (Line |
34e49164 C |
1300 | ([PToken ({tok = TIdent (s,ii); col = col1; where = ctx} as macro); |
1301 | ] as _line1 | |
1302 | )) | |
ae4735db | 1303 | ::(Line |
34e49164 C |
1304 | (PToken ({col = col2 } as other)::restline2 |
1305 | ) as line2) | |
ae4735db | 1306 | ::xs -> |
34e49164 | 1307 | (* when s ==~ regexp_macro *) |
ae4735db C |
1308 | |
1309 | let condition = | |
1310 | (col1 =|= col2 && | |
34e49164 C |
1311 | col1 <> 0 && (* otherwise can match typedef of fundecl*) |
1312 | (match other.tok with | |
ae4735db C |
1313 | | TPtVirg _ -> false |
1314 | | TOr _ -> false | |
34e49164 C |
1315 | | TCBrace _ when ctx <> InFunction -> false |
1316 | | tok when TH.is_binary_operator tok -> false | |
1317 | ||
1318 | | _ -> true | |
1319 | )) || | |
1320 | (col2 <= col1 && | |
1321 | (match other.tok with | |
b1b2de81 | 1322 | | TCBrace _ when ctx =*= InFunction -> true |
34e49164 C |
1323 | | Treturn _ -> true |
1324 | | Tif _ -> true | |
1325 | | Telse _ -> true | |
1326 | | _ -> false | |
1327 | )) | |
1328 | in | |
ae4735db | 1329 | |
34e49164 C |
1330 | if condition |
1331 | then begin | |
1332 | msg_macro_noptvirg_single s; | |
485bce71 | 1333 | macro.tok <- TMacroStmt (s, TH.info_of_tok macro.tok); |
34e49164 C |
1334 | end; |
1335 | find_macro_lineparen (line2::xs) | |
ae4735db C |
1336 | |
1337 | | x::xs -> | |
34e49164 C |
1338 | find_macro_lineparen xs |
1339 | ||
1340 | ||
485bce71 C |
1341 | |
1342 | (* ------------------------------------------------------------------------- *) | |
1343 | (* define tobrace init *) | |
1344 | (* ------------------------------------------------------------------------- *) | |
1345 | ||
ae4735db C |
1346 | let rec find_define_init_brace_paren xs = |
1347 | let rec aux xs = | |
485bce71 C |
1348 | match xs with |
1349 | | [] -> () | |
1350 | ||
1351 | (* mainly for firefox *) | |
1352 | | (PToken {tok = TDefine _}) | |
1353 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1354 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1355 | ::(PToken tok2) | |
1356 | ::(PToken tok3) | |
ae4735db | 1357 | ::xs -> |
485bce71 C |
1358 | let is_init = |
1359 | match tok2.tok, tok3.tok with | |
1360 | | TInt _, TComma _ -> true | |
1361 | | TString _, TComma _ -> true | |
1362 | | TIdent _, TComma _ -> true | |
1363 | | _ -> false | |
ae4735db | 1364 | |
485bce71 C |
1365 | in |
1366 | if is_init | |
ae4735db | 1367 | then begin |
485bce71 C |
1368 | pr2_cpp("found define initializer: " ^s); |
1369 | tokbrace.tok <- TOBraceDefineInit i1; | |
1370 | end; | |
1371 | ||
1372 | aux xs | |
1373 | ||
1374 | (* mainly for linux, especially in sound/ *) | |
1375 | | (PToken {tok = TDefine _}) | |
1376 | ::(PToken {tok = TIdentDefine (s,_)}) | |
1377 | ::(Parenthised(xxx, info_parens)) | |
1378 | ::(PToken ({tok = TOBrace i1} as tokbrace)) | |
1379 | ::(PToken tok2) | |
1380 | ::(PToken tok3) | |
ae4735db | 1381 | ::xs -> |
485bce71 C |
1382 | let is_init = |
1383 | match tok2.tok, tok3.tok with | |
1384 | | TInt _, TComma _ -> true | |
1385 | | TDot _, TIdent _ -> true | |
1386 | | TIdent _, TComma _ -> true | |
1387 | | _ -> false | |
ae4735db | 1388 | |
485bce71 C |
1389 | in |
1390 | if is_init | |
ae4735db | 1391 | then begin |
485bce71 C |
1392 | pr2_cpp("found define initializer with param: " ^ s); |
1393 | tokbrace.tok <- TOBraceDefineInit i1; | |
1394 | end; | |
1395 | ||
1396 | aux xs | |
1397 | ||
ae4735db | 1398 | |
485bce71 C |
1399 | |
1400 | (* recurse *) | |
ae4735db C |
1401 | | (PToken x)::xs -> aux xs |
1402 | | (Parenthised (xxs, info_parens))::xs -> | |
485bce71 | 1403 | (* not need for tobrace init: |
ae4735db | 1404 | * xxs +> List.iter aux; |
485bce71 C |
1405 | *) |
1406 | aux xs | |
1407 | in | |
1408 | aux xs | |
1409 | ||
1410 | ||
34e49164 C |
1411 | (* ------------------------------------------------------------------------- *) |
1412 | (* action *) | |
1413 | (* ------------------------------------------------------------------------- *) | |
1414 | ||
708f4980 | 1415 | (* obsolete now with macro expansion ? get some regression if comment. |
ae4735db | 1416 | * todo: if do bad decision here, then it can influence other phases |
708f4980 | 1417 | * and make it hard to parse. So maybe when have a parse error, should |
ae4735db | 1418 | * undo some of the guess those heuristics have done, and restore |
708f4980 C |
1419 | * the original token value. |
1420 | *) | |
1421 | ||
34e49164 C |
1422 | let rec find_actions = function |
1423 | | [] -> () | |
1424 | ||
1425 | | PToken ({tok = TIdent (s,ii)}) | |
1426 | ::Parenthised (xxs,info_parens) | |
ae4735db | 1427 | ::xs -> |
34e49164 C |
1428 | find_actions xs; |
1429 | xxs +> List.iter find_actions; | |
1430 | let modified = find_actions_params xxs in | |
ae4735db | 1431 | if modified |
34e49164 | 1432 | then msg_macro_higher_order s |
ae4735db C |
1433 | |
1434 | | x::xs -> | |
34e49164 C |
1435 | find_actions xs |
1436 | ||
ae4735db C |
1437 | and find_actions_params xxs = |
1438 | xxs +> List.fold_left (fun acc xs -> | |
34e49164 | 1439 | let toks = tokens_of_paren xs in |
ae4735db C |
1440 | if toks +> List.exists (fun x -> TH.is_statement x.tok) |
1441 | (* undo: && List.length toks > 1 | |
708f4980 C |
1442 | * good for sparse, not good for linux |
1443 | *) | |
34e49164 | 1444 | then begin |
ae4735db | 1445 | xs +> iter_token_paren (fun x -> |
34e49164 | 1446 | if TH.is_eof x.tok |
ae4735db | 1447 | then |
34e49164 | 1448 | (* certainly because paren detection had a pb because of |
708f4980 C |
1449 | * some ifdef-exp. Do similar additional checking than |
1450 | * what is done in set_as_comment. | |
34e49164 | 1451 | *) |
708f4980 | 1452 | pr2 "PB: weird, I try to tag an EOF token as an action" |
ae4735db | 1453 | else |
708f4980 | 1454 | (* cf tests-bis/no_cpar_macro.c *) |
ae4735db C |
1455 | if TH.is_eom x.tok |
1456 | then | |
708f4980 | 1457 | pr2 "PB: weird, I try to tag an EOM token as an action" |
ae4735db | 1458 | else |
708f4980 | 1459 | x.tok <- TAction (TH.info_of_tok x.tok); |
34e49164 C |
1460 | ); |
1461 | true (* modified *) | |
1462 | end | |
1463 | else acc | |
1464 | ) false | |
1465 | ||
1466 | ||
1467 | ||
1468 | (* ------------------------------------------------------------------------- *) | |
1469 | (* main fix cpp function *) | |
1470 | (* ------------------------------------------------------------------------- *) | |
1471 | ||
ae4735db | 1472 | let filter_cpp_stuff xs = |
951c7801 C |
1473 | List.filter |
1474 | (function x -> | |
1475 | (match x.tok with | |
1476 | | tok when TH.is_comment tok -> false | |
34e49164 C |
1477 | (* don't want drop the define, or if drop, have to drop |
1478 | * also its body otherwise the line heuristics may be lost | |
1479 | * by not finding the TDefine in column 0 but by finding | |
1480 | * a TDefineIdent in a column > 0 | |
1481 | *) | |
951c7801 C |
1482 | | Parser_c.TDefine _ -> true |
1483 | | tok when TH.is_cpp_instruction tok -> false | |
1484 | | _ -> true | |
1485 | )) | |
1486 | xs | |
34e49164 C |
1487 | |
1488 | let insert_virtual_positions l = | |
1489 | let strlen x = String.length (Ast_c.str_of_info x) in | |
708f4980 C |
1490 | let rec loop prev offset acc = function |
1491 | [] -> List.rev acc | |
34e49164 C |
1492 | | x::xs -> |
1493 | let ii = TH.info_of_tok x in | |
1494 | let inject pi = | |
1495 | TH.visitor_info_of_tok (function ii -> Ast_c.rewrap_pinfo pi ii) x in | |
1496 | match Ast_c.pinfo_of_info ii with | |
1497 | Ast_c.OriginTok pi -> | |
1498 | let prev = Ast_c.parse_info_of_info ii in | |
ae4735db | 1499 | loop prev (strlen ii) (x::acc) xs |
34e49164 | 1500 | | Ast_c.ExpandedTok (pi,_) -> |
708f4980 | 1501 | let x' = inject (Ast_c.ExpandedTok (pi,(prev,offset))) in |
ae4735db | 1502 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 | 1503 | | Ast_c.FakeTok (s,_) -> |
708f4980 | 1504 | let x' = inject (Ast_c.FakeTok (s,(prev,offset))) in |
ae4735db | 1505 | loop prev (offset + (strlen ii)) (x'::acc) xs |
34e49164 C |
1506 | | Ast_c.AbstractLineTok _ -> failwith "abstract not expected" in |
1507 | let rec skip_fake = function | |
708f4980 | 1508 | | [] -> [] |
34e49164 C |
1509 | | x::xs -> |
1510 | let ii = TH.info_of_tok x in | |
1511 | match Ast_c.pinfo_of_info ii with | |
708f4980 | 1512 | | Ast_c.OriginTok pi -> |
34e49164 | 1513 | let prev = Ast_c.parse_info_of_info ii in |
708f4980 C |
1514 | let res = loop prev (strlen ii) [] xs in |
1515 | x::res | |
34e49164 | 1516 | | _ -> x::skip_fake xs in |
ae4735db | 1517 | skip_fake l |
708f4980 | 1518 | |
485bce71 | 1519 | (* ------------------------------------------------------------------------- *) |
f59c9fb7 | 1520 | |
ae4735db | 1521 | let fix_tokens_cpp2 ~macro_defs tokens = |
708f4980 | 1522 | let tokens2 = ref (tokens +> Common.acc_map TV.mk_token_extended) in |
ae4735db C |
1523 | |
1524 | begin | |
34e49164 C |
1525 | (* the order is important, if you put the action heuristic first, |
1526 | * then because of ifdef, can have not closed paren | |
ae4735db C |
1527 | * and so may believe that higher order macro |
1528 | * and it will eat too much tokens. So important to do | |
34e49164 | 1529 | * first the ifdef. |
ae4735db | 1530 | * |
34e49164 C |
1531 | * I recompute multiple times cleaner cos the mutable |
1532 | * can have be changed and so may have more comments | |
1533 | * in the token original list. | |
ae4735db | 1534 | * |
34e49164 C |
1535 | *) |
1536 | ||
0708f913 C |
1537 | commentize_skip_start_to_end !tokens2; |
1538 | ||
34e49164 | 1539 | (* ifdef *) |
ae4735db C |
1540 | let cleaner = !tokens2 +> List.filter (fun x -> |
1541 | (* is_comment will also filter the TCommentCpp created in | |
0708f913 | 1542 | * commentize_skip_start_to_end *) |
34e49164 C |
1543 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1544 | ) in | |
708f4980 | 1545 | let ifdef_grouped = TV.mk_ifdef cleaner in |
485bce71 C |
1546 | set_ifdef_parenthize_info ifdef_grouped; |
1547 | ||
34e49164 C |
1548 | find_ifdef_funheaders ifdef_grouped; |
1549 | find_ifdef_bool ifdef_grouped; | |
1550 | find_ifdef_mid ifdef_grouped; | |
ae4735db | 1551 | (* change order ? maybe cparen_else heuristic make some of the funheaders |
708f4980 C |
1552 | * heuristics irrelevant ? |
1553 | *) | |
ae4735db | 1554 | find_ifdef_cparen_else ifdef_grouped; |
34e49164 C |
1555 | adjust_inifdef_include ifdef_grouped; |
1556 | ||
1557 | ||
1558 | (* macro 1 *) | |
1559 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1560 | ||
708f4980 C |
1561 | let paren_grouped = TV.mk_parenthised cleaner in |
1562 | Cpp_token_c.apply_macro_defs | |
ae4735db C |
1563 | ~msg_apply_known_macro |
1564 | ~msg_apply_known_macro_hint | |
708f4980 | 1565 | macro_defs paren_grouped; |
34e49164 | 1566 | (* because the before field is used by apply_macro_defs *) |
ae4735db | 1567 | tokens2 := TV.rebuild_tokens_extented !tokens2; |
34e49164 C |
1568 | |
1569 | (* tagging contextual info (InFunc, InStruct, etc). Better to do | |
1570 | * that after the "ifdef-simplification" phase. | |
1571 | *) | |
ae4735db | 1572 | let cleaner = !tokens2 +> List.filter (fun x -> |
34e49164 C |
1573 | not (TH.is_comment x.tok) (* could filter also #define/#include *) |
1574 | ) in | |
1575 | ||
708f4980 | 1576 | let brace_grouped = TV.mk_braceised cleaner in |
34e49164 C |
1577 | set_context_tag brace_grouped; |
1578 | ||
34e49164 C |
1579 | (* macro *) |
1580 | let cleaner = !tokens2 +> filter_cpp_stuff in | |
1581 | ||
708f4980 C |
1582 | let paren_grouped = TV.mk_parenthised cleaner in |
1583 | let line_paren_grouped = TV.mk_line_parenthised paren_grouped in | |
485bce71 | 1584 | find_define_init_brace_paren paren_grouped; |
34e49164 C |
1585 | find_string_macro_paren paren_grouped; |
1586 | find_macro_lineparen line_paren_grouped; | |
1587 | find_macro_paren paren_grouped; | |
1588 | ||
1589 | ||
708f4980 | 1590 | (* obsolete: actions ? not yet *) |
34e49164 | 1591 | let cleaner = !tokens2 +> filter_cpp_stuff in |
708f4980 | 1592 | let paren_grouped = TV.mk_parenthised cleaner in |
34e49164 | 1593 | find_actions paren_grouped; |
ae4735db | 1594 | |
34e49164 C |
1595 | |
1596 | ||
708f4980 | 1597 | insert_virtual_positions (!tokens2 +> Common.acc_map (fun x -> x.tok)) |
34e49164 C |
1598 | end |
1599 | ||
ae4735db | 1600 | let time_hack1 ~macro_defs a = |
708f4980 | 1601 | Common.profile_code_exclusif "HACK" (fun () -> fix_tokens_cpp2 ~macro_defs a) |
34e49164 | 1602 | |
ae4735db | 1603 | let fix_tokens_cpp ~macro_defs a = |
708f4980 | 1604 | Common.profile_code "C parsing.fix_cpp" (fun () -> time_hack1 ~macro_defs a) |
34e49164 | 1605 | |
34e49164 | 1606 | |
34e49164 | 1607 | |
34e49164 C |
1608 | |
1609 | (*****************************************************************************) | |
1610 | (* Lexing with lookahead *) | |
1611 | (*****************************************************************************) | |
1612 | ||
1613 | (* Why using yet another parsing_hack technique ? The fix_xxx where do | |
ae4735db | 1614 | * some pre-processing on the full list of tokens is not enough ? |
34e49164 C |
1615 | * No cos sometimes we need more contextual info, and even if |
1616 | * set_context() tries to give some contextual info, it's not completely | |
1617 | * accurate so the following code give yet another alternative, yet another | |
1618 | * chance to transform some tokens. | |
ae4735db | 1619 | * |
34e49164 C |
1620 | * todo?: maybe could try to get rid of this technique. Maybe a better |
1621 | * set_context() would make possible to move this code using a fix_xx | |
1622 | * technique. | |
ae4735db | 1623 | * |
485bce71 | 1624 | * LALR(k) trick. We can do stuff by adding cases in lexer_c.mll, but |
34e49164 C |
1625 | * it is more general to do it via my LALR(k) tech. Because here we can |
1626 | * transform some token give some context information. So sometimes it | |
1627 | * makes sense to transform a token in one context, sometimes not, and | |
1628 | * lex can not provide us this context information. Note that the order | |
ae4735db C |
1629 | * in the pattern matching in lookahead is important. Do not cut/paste. |
1630 | * | |
34e49164 C |
1631 | * Note that in next there is only "clean" tokens, there is no comment |
1632 | * or space tokens. This is done by the caller. | |
ae4735db | 1633 | * |
34e49164 C |
1634 | *) |
1635 | ||
485bce71 C |
1636 | open Lexer_parser (* for the fields of lexer_hint type *) |
1637 | ||
1638 | let not_struct_enum = function | |
1639 | | (Parser_c.Tstruct _ | Parser_c.Tunion _ | Parser_c.Tenum _)::_ -> false | |
1640 | | _ -> true | |
34e49164 | 1641 | |
f59c9fb7 C |
1642 | let pointer = function |
1643 | TMul _ -> true | |
1644 | | TAnd _ when !Flag.c_plus_plus -> true | |
1645 | | _ -> false | |
485bce71 | 1646 | |
17ba0788 C |
1647 | let ident_or_star = function |
1648 | TIdent _ -> true | |
1649 | | x -> pointer x | |
1650 | ||
97111a47 C |
1651 | (* This function is inefficient, because it will look over a K&R header, |
1652 | or function prototype multiple times. At least when we see a , and are in a | |
1653 | parameter list, we know we will eventually see a close paren, and it | |
1654 | should come fairly soon. *) | |
1655 | let k_and_r l = | |
1656 | let l1 = drop_until (function (TCPar _) -> true | _ -> false) l in | |
1657 | match l1 with | |
1658 | (TCPar _) :: (TOCro _) :: _ -> false | |
1659 | | (TCPar _) :: _ -> true | |
1660 | | _ -> false | |
34e49164 | 1661 | |
97111a47 C |
1662 | (* (a)(b) is ambiguous, because (a) could be a function name or a cast. |
1663 | At this point, we just see an ident for a; we don't know if it is eg a local | |
1664 | variable. This function sees at least if b is the only argument, ie there | |
1665 | are no commas at top level *) | |
1666 | let paren_before_comma l = | |
1667 | let rec loop level = function | |
1668 | [] -> false | |
1669 | | (TComma _)::_ when level = 1 -> false | |
1670 | | (TCPar _)::_ when level = 1 -> true | |
1671 | | (TCPar _)::rest -> loop (level-1) rest | |
1672 | | (TOPar _)::rest -> loop (level+1) rest | |
1673 | | x::rest -> loop level rest in | |
1674 | loop 0 l | |
1675 | ||
1676 | let lookahead2 ~pass next before = | |
34e49164 C |
1677 | match (next, before) with |
1678 | ||
4dfbc1c2 C |
1679 | (* c++ hacks *) |
1680 | (* yy xx( and in function *) | |
1681 | | TOPar i1::_, TIdent(s,i2)::TypedefIdent _::_ | |
1682 | when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) -> | |
1683 | pr2_cpp("constructed_object: " ^s); | |
1684 | TOParCplusplusInit i1 | |
1685 | | TypedefIdent(s,i)::TOPar i1::_,_ | |
1686 | when !Flag.c_plus_plus && (LP.current_context () = (LP.InFunction)) -> | |
1687 | TIdent(s,i) | |
1688 | ||
34e49164 C |
1689 | (*-------------------------------------------------------------*) |
1690 | (* typedef inference, parse_typedef_fix3 *) | |
1691 | (*-------------------------------------------------------------*) | |
1692 | (* xx xx *) | |
b1b2de81 | 1693 | | (TIdent(s,i1)::TIdent(s2,i2)::_ , _) when not_struct_enum before && s =$= s2 |
34e49164 C |
1694 | && ok_typedef s |
1695 | (* (take_safe 1 !passed_tok <> [TOPar]) -> *) | |
ae4735db | 1696 | -> |
34e49164 C |
1697 | (* parse_typedef_fix3: |
1698 | * acpi_object acpi_object; | |
ae4735db | 1699 | * etait mal parsé, car pas le temps d'appeler dt() dans le type_spec. |
34e49164 C |
1700 | * Le parser en interne a deja appelé le prochain token pour pouvoir |
1701 | * decider des choses. | |
1702 | * => special case in lexer_heuristic, again | |
1703 | *) | |
ae4735db C |
1704 | if !Flag_parsing_c.debug_typedef |
1705 | then pr2 ("TYPEDEF: disable typedef cos special case: " ^ s); | |
34e49164 C |
1706 | |
1707 | LP.disable_typedef(); | |
1708 | ||
97111a47 | 1709 | msg_typedef s i1 1; LP.add_typedef_root s; |
34e49164 C |
1710 | TypedefIdent (s, i1) |
1711 | ||
1712 | (* xx yy *) | |
ae4735db | 1713 | | (TIdent (s, i1)::TIdent (s2, i2)::_ , _) when not_struct_enum before |
34e49164 C |
1714 | && ok_typedef s |
1715 | -> | |
1716 | (* && not_annot s2 BUT lead to false positive*) | |
1717 | ||
97111a47 | 1718 | msg_typedef s i1 2; LP.add_typedef_root s; |
34e49164 C |
1719 | TypedefIdent (s, i1) |
1720 | ||
1721 | ||
1722 | (* xx inline *) | |
ae4735db | 1723 | | (TIdent (s, i1)::Tinline i2::_ , _) when not_struct_enum before |
34e49164 | 1724 | && ok_typedef s |
ae4735db | 1725 | -> |
97111a47 | 1726 | msg_typedef s i1 3; LP.add_typedef_root s; |
34e49164 C |
1727 | TypedefIdent (s, i1) |
1728 | ||
1729 | ||
1730 | (* [,(] xx [,)] AND param decl *) | |
97111a47 C |
1731 | | (TIdent (s, i1)::(((TComma _|TCPar _)::_) as rest) , |
1732 | (TComma _ |TOPar _)::_ ) | |
1733 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) | |
1734 | && k_and_r rest | |
1735 | -> | |
1736 | TKRParam(s,i1) | |
1737 | ||
1738 | | (TIdent (s, i1)::((TComma _|TCPar _)::_) , (TComma _ |TOPar _)::_ ) | |
b1b2de81 | 1739 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
34e49164 | 1740 | && ok_typedef s |
ae4735db | 1741 | -> |
97111a47 C |
1742 | msg_typedef s i1 4; LP.add_typedef_root s; |
1743 | TypedefIdent (s, i1) | |
34e49164 C |
1744 | |
1745 | (* xx* [,)] *) | |
1746 | (* specialcase: [,(] xx* [,)] *) | |
f59c9fb7 C |
1747 | | (TIdent (s, i1)::ptr::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) |
1748 | when pointer ptr && not_struct_enum before | |
34e49164 C |
1749 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1750 | && ok_typedef s | |
ae4735db | 1751 | -> |
97111a47 | 1752 | msg_typedef s i1 5; LP.add_typedef_root s; |
34e49164 C |
1753 | TypedefIdent (s, i1) |
1754 | ||
1755 | ||
1756 | (* xx** [,)] *) | |
1757 | (* specialcase: [,(] xx** [,)] *) | |
1758 | | (TIdent (s, i1)::TMul _::TMul _::(TComma _|TCPar _)::_ , (*(TComma _|TOPar _)::*)_ ) | |
1759 | when not_struct_enum before | |
1760 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1761 | && ok_typedef s | |
ae4735db | 1762 | -> |
97111a47 | 1763 | msg_typedef s i1 6; LP.add_typedef_root s; |
34e49164 C |
1764 | TypedefIdent (s, i1) |
1765 | ||
1766 | ||
1767 | ||
1768 | (* xx const * USELESS because of next rule ? *) | |
ae4735db C |
1769 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::TMul _::_ , _ ) |
1770 | when not_struct_enum before | |
34e49164 C |
1771 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1772 | && ok_typedef s | |
1773 | -> | |
1774 | ||
97111a47 | 1775 | msg_typedef s i1 7; LP.add_typedef_root s; |
34e49164 | 1776 | TypedefIdent (s, i1) |
ae4735db | 1777 | |
34e49164 | 1778 | (* xx const *) |
ae4735db C |
1779 | | (TIdent (s, i1)::(Tconst _|Tvolatile _|Trestrict _)::_ , _ ) |
1780 | when not_struct_enum before | |
34e49164 C |
1781 | && ok_typedef s |
1782 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1783 | -> | |
1784 | ||
97111a47 | 1785 | msg_typedef s i1 8; LP.add_typedef_root s; |
34e49164 C |
1786 | TypedefIdent (s, i1) |
1787 | ||
1788 | ||
1789 | (* xx * const *) | |
f59c9fb7 C |
1790 | | (TIdent (s, i1)::ptr::(Tconst _ | Tvolatile _|Trestrict _)::_ , _ ) |
1791 | when pointer ptr && not_struct_enum before | |
34e49164 C |
1792 | && ok_typedef s |
1793 | -> | |
1794 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
1795 | ||
97111a47 | 1796 | msg_typedef s i1 9; LP.add_typedef_root s; |
34e49164 C |
1797 | TypedefIdent (s, i1) |
1798 | ||
1799 | ||
1800 | (* ( const xx) *) | |
485bce71 | 1801 | | (TIdent (s, i1)::TCPar _::_, (Tconst _ | Tvolatile _|Trestrict _)::TOPar _::_) when |
34e49164 | 1802 | ok_typedef s -> |
97111a47 | 1803 | msg_typedef s i1 10; LP.add_typedef_root s; |
34e49164 | 1804 | TypedefIdent (s, i1) |
ae4735db | 1805 | |
34e49164 C |
1806 | |
1807 | ||
1808 | (* ( xx ) [sizeof, ~] *) | |
485bce71 | 1809 | | (TIdent (s, i1)::TCPar _::(Tsizeof _|TTilde _)::_ , TOPar _::_ ) |
34e49164 C |
1810 | when not_struct_enum before |
1811 | && ok_typedef s | |
ae4735db | 1812 | -> |
97111a47 | 1813 | msg_typedef s i1 11; LP.add_typedef_root s; |
34e49164 C |
1814 | TypedefIdent (s, i1) |
1815 | ||
1816 | (* [(,] xx [ AND parameterdeclaration *) | |
1817 | | (TIdent (s, i1)::TOCro _::_, (TComma _ |TOPar _)::_) | |
b1b2de81 | 1818 | when (LP.current_context() =*= LP.InParameter) |
34e49164 | 1819 | && ok_typedef s |
ae4735db | 1820 | -> |
97111a47 | 1821 | msg_typedef s i1 12; LP.add_typedef_root s; |
34e49164 | 1822 | TypedefIdent (s, i1) |
ae4735db | 1823 | |
34e49164 C |
1824 | (*------------------------------------------------------------*) |
1825 | (* if 'x*y' maybe an expr, maybe just a classic multiplication *) | |
1826 | (* but if have a '=', or ',' I think not *) | |
1827 | (*------------------------------------------------------------*) | |
1828 | ||
1829 | (* static xx * yy *) | |
f59c9fb7 | 1830 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ , |
485bce71 | 1831 | (Tregister _|Tstatic _ |Tvolatile _|Tconst _|Trestrict _)::_) when |
f59c9fb7 | 1832 | pointer ptr && ok_typedef s |
34e49164 | 1833 | -> |
97111a47 | 1834 | msg_typedef s i1 13; LP.add_typedef_root s; |
34e49164 | 1835 | TypedefIdent (s, i1) |
ae4735db | 1836 | |
34e49164 C |
1837 | (* TODO xx * yy ; AND in start of compound element *) |
1838 | ||
1839 | ||
1840 | (* xx * yy, AND in paramdecl *) | |
f59c9fb7 | 1841 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _) |
b1b2de81 | 1842 | when not_struct_enum before && (LP.current_context() =*= LP.InParameter) |
f59c9fb7 | 1843 | && pointer ptr && ok_typedef s |
ae4735db | 1844 | -> |
34e49164 | 1845 | |
97111a47 | 1846 | msg_typedef s i1 14; LP.add_typedef_root s; |
34e49164 C |
1847 | TypedefIdent (s, i1) |
1848 | ||
1849 | ||
1850 | (* xx * yy ; AND in Toplevel, except when have = before *) | |
1851 | | (TIdent (s, i1)::TMul _::TIdent (s2, i2)::TPtVirg _::_ , TEq _::_) -> | |
1852 | TIdent (s, i1) | |
f59c9fb7 C |
1853 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ , _) |
1854 | when not_struct_enum before && pointer ptr && | |
1855 | (LP.is_top_or_struct (LP.current_context ())) | |
ae4735db | 1856 | -> |
97111a47 | 1857 | msg_typedef s i1 15; LP.add_typedef_root s; |
34e49164 C |
1858 | TypedefIdent (s, i1) |
1859 | ||
1860 | (* xx * yy , AND in Toplevel *) | |
f59c9fb7 | 1861 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , _) |
b1b2de81 | 1862 | when not_struct_enum before && (LP.current_context () =*= LP.InTopLevel) |
f59c9fb7 | 1863 | && ok_typedef s && pointer ptr |
ae4735db | 1864 | -> |
34e49164 | 1865 | |
97111a47 | 1866 | msg_typedef s i1 16; LP.add_typedef_root s; |
34e49164 C |
1867 | TypedefIdent (s, i1) |
1868 | ||
1869 | (* xx * yy ( AND in Toplevel *) | |
f59c9fb7 | 1870 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOPar _::_ , _) |
ae4735db | 1871 | when not_struct_enum before |
485bce71 | 1872 | && (LP.is_top_or_struct (LP.current_context ())) |
f59c9fb7 | 1873 | && ok_typedef s && pointer ptr |
34e49164 | 1874 | -> |
97111a47 | 1875 | msg_typedef s i1 17; LP.add_typedef_root s; |
34e49164 | 1876 | TypedefIdent (s, i1) |
ae4735db | 1877 | |
34e49164 C |
1878 | (* xx * yy [ *) |
1879 | (* todo? enough ? cos in struct def we can have some expression ! *) | |
f59c9fb7 | 1880 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TOCro _::_ , _) |
ae4735db | 1881 | when not_struct_enum before && |
485bce71 | 1882 | (LP.is_top_or_struct (LP.current_context ())) |
f59c9fb7 | 1883 | && ok_typedef s && pointer ptr |
ae4735db | 1884 | -> |
97111a47 | 1885 | msg_typedef s i1 18; LP.add_typedef_root s; |
34e49164 C |
1886 | TypedefIdent (s, i1) |
1887 | ||
1888 | (* u16: 10; in struct *) | |
1889 | | (TIdent (s, i1)::TDotDot _::_ , (TOBrace _ | TPtVirg _)::_) | |
485bce71 | 1890 | when (LP.is_top_or_struct (LP.current_context ())) |
ae4735db C |
1891 | && ok_typedef s |
1892 | -> | |
97111a47 | 1893 | msg_typedef s i1 19; LP.add_typedef_root s; |
34e49164 | 1894 | TypedefIdent (s, i1) |
ae4735db | 1895 | |
34e49164 C |
1896 | |
1897 | (* why need TOPar condition as stated in preceding rule ? really needed ? *) | |
1898 | (* YES cos at toplevel can have some expression !! for instance when *) | |
1899 | (* enter in the dimension of an array *) | |
1900 | (* | |
1901 | | (TIdent s::TMul::TIdent s2::_ , _) | |
1902 | when (take_safe 1 !passed_tok <> [Tstruct] && | |
1903 | (take_safe 1 !passed_tok <> [Tenum])) | |
1904 | && | |
ae4735db | 1905 | !LP._lexer_hint = Some LP.Toplevel -> |
4dfbc1c2 | 1906 | msg_typedef s 20; LP.add_typedef_root s; |
34e49164 C |
1907 | TypedefIdent s |
1908 | *) | |
1909 | ||
1910 | (* xx * yy = *) | |
f59c9fb7 | 1911 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TEq _::_ , _) |
ae4735db | 1912 | when not_struct_enum before |
f59c9fb7 | 1913 | && ok_typedef s && pointer ptr |
34e49164 | 1914 | -> |
97111a47 | 1915 | msg_typedef s i1 21; LP.add_typedef_root s; |
34e49164 C |
1916 | TypedefIdent (s, i1) |
1917 | ||
1918 | ||
1919 | (* xx * yy) AND in paramdecl *) | |
f59c9fb7 | 1920 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TCPar _::_ , _) |
b1b2de81 | 1921 | when not_struct_enum before && (LP.current_context () =*= LP.InParameter) |
f59c9fb7 | 1922 | && ok_typedef s && pointer ptr |
34e49164 | 1923 | -> |
97111a47 | 1924 | msg_typedef s i1 22; LP.add_typedef_root s; |
34e49164 | 1925 | TypedefIdent (s, i1) |
ae4735db | 1926 | |
34e49164 C |
1927 | |
1928 | (* xx * yy; *) (* wrong ? *) | |
f59c9fb7 | 1929 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TPtVirg _::_ , |
ae4735db | 1930 | (TOBrace _| TPtVirg _)::_) when not_struct_enum before |
f59c9fb7 | 1931 | && ok_typedef s & pointer ptr |
34e49164 | 1932 | -> |
97111a47 | 1933 | msg_typedef s i1 23; LP.add_typedef_root s; |
485bce71 | 1934 | msg_maybe_dangereous_typedef s; |
34e49164 C |
1935 | TypedefIdent (s, i1) |
1936 | ||
1937 | ||
1938 | (* xx * yy, and ';' before xx *) (* wrong ? *) | |
f59c9fb7 | 1939 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::TComma _::_ , |
34e49164 | 1940 | (TOBrace _| TPtVirg _)::_) when |
f59c9fb7 | 1941 | ok_typedef s && pointer ptr |
34e49164 | 1942 | -> |
97111a47 | 1943 | msg_typedef s i1 24; LP.add_typedef_root s; |
34e49164 C |
1944 | TypedefIdent (s, i1) |
1945 | ||
1946 | ||
1947 | (* xx_t * yy *) | |
f59c9fb7 | 1948 | | (TIdent (s, i1)::ptr::TIdent (s2, i2)::_ , _) |
ae4735db C |
1949 | when s ==~ regexp_typedef && not_struct_enum before |
1950 | (* struct user_info_t sometimes *) | |
f59c9fb7 | 1951 | && ok_typedef s && pointer ptr |
ae4735db | 1952 | -> |
97111a47 | 1953 | msg_typedef s i1 25; LP.add_typedef_root s; |
34e49164 C |
1954 | TypedefIdent (s, i1) |
1955 | ||
1956 | (* xx ** yy *) (* wrong ? *) | |
1957 | | (TIdent (s, i1)::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
1958 | when not_struct_enum before | |
1959 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) | |
ae4735db | 1960 | && ok_typedef s |
34e49164 | 1961 | -> |
97111a47 | 1962 | msg_typedef s i1 26; LP.add_typedef_root s; |
34e49164 C |
1963 | TypedefIdent (s, i1) |
1964 | ||
1965 | (* xx *** yy *) | |
1966 | | (TIdent (s, i1)::TMul _::TMul _::TMul _::TIdent (s2, i2)::_ , _) | |
ae4735db C |
1967 | when not_struct_enum before |
1968 | && ok_typedef s | |
34e49164 C |
1969 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
1970 | -> | |
97111a47 | 1971 | msg_typedef s i1 27; LP.add_typedef_root s; |
34e49164 C |
1972 | TypedefIdent (s, i1) |
1973 | ||
1974 | (* xx ** ) *) | |
1975 | | (TIdent (s, i1)::TMul _::TMul _::TCPar _::_ , _) | |
ae4735db | 1976 | when not_struct_enum before |
34e49164 | 1977 | (* && !LP._lexer_hint = Some LP.ParameterDeclaration *) |
ae4735db | 1978 | && ok_typedef s |
34e49164 | 1979 | -> |
97111a47 | 1980 | msg_typedef s i1 28; LP.add_typedef_root s; |
34e49164 C |
1981 | TypedefIdent (s, i1) |
1982 | ||
1983 | ||
1984 | ||
1985 | (* ----------------------------------- *) | |
ae4735db | 1986 | (* old: why not do like for other rules and start with TIdent ? |
485bce71 C |
1987 | * why do TOPar :: TIdent :: ..., _ and not TIdent :: ..., TOPAr::_ ? |
1988 | * new: prefer now start with TIdent because otherwise the add_typedef_root | |
1989 | * may have no effect if in second pass or if have disable the add_typedef. | |
1990 | *) | |
34e49164 C |
1991 | |
1992 | (* (xx) yy *) | |
17ba0788 | 1993 | | (TIdent (s, i1)::TCPar i2::(TIdent (_,i3)|TInt (_,i3))::after::_ , |
ae4735db | 1994 | (TOPar info)::x::_) |
17ba0788 C |
1995 | when not (TH.is_stuff_taking_parenthized x) (* && |
1996 | Ast_c.line_of_info i2 =|= Ast_c.line_of_info i3 - why useful? | |
1997 | *) | |
ae4735db | 1998 | && ok_typedef s |
17ba0788 | 1999 | && not (ident_or_star after) (* possible K&R declaration *) |
ae4735db | 2000 | -> |
97111a47 | 2001 | msg_typedef s i1 29; LP.add_typedef_root s; |
485bce71 C |
2002 | (*TOPar info*) |
2003 | TypedefIdent (s, i1) | |
34e49164 C |
2004 | |
2005 | ||
ae4735db | 2006 | (* (xx) ( yy) |
91eba41f C |
2007 | * but false positif: typedef int (xxx_t)(...), so do specialisation below. |
2008 | *) | |
2009 | (* | |
ae4735db C |
2010 | | (TIdent (s, i1)::TCPar _::TOPar _::_ , (TOPar info)::x::_) |
2011 | when not (TH.is_stuff_taking_parenthized x) | |
2012 | && ok_typedef s | |
34e49164 | 2013 | -> |
4dfbc1c2 | 2014 | msg_typedef s 30; LP.add_typedef_root s; |
485bce71 C |
2015 | (* TOPar info *) |
2016 | TypedefIdent (s, i1) | |
91eba41f C |
2017 | *) |
2018 | (* special case: = (xx) ( yy) *) | |
97111a47 | 2019 | | (TIdent (s, i1)::TCPar _::((TOPar _::_) as rest) , |
91eba41f | 2020 | (TOPar info)::(TEq _ |TEqEq _)::_) |
97111a47 | 2021 | when ok_typedef s && paren_before_comma rest |
91eba41f | 2022 | -> |
97111a47 | 2023 | msg_typedef s i1 31; LP.add_typedef_root s; |
91eba41f C |
2024 | (* TOPar info *) |
2025 | TypedefIdent (s, i1) | |
2026 | ||
34e49164 C |
2027 | |
2028 | (* (xx * ) yy *) | |
f59c9fb7 C |
2029 | | (TIdent (s, i1)::ptr::TCPar _::TIdent (s2, i2)::_ , (TOPar info)::_) |
2030 | when ok_typedef s && pointer ptr | |
ae4735db | 2031 | -> |
97111a47 | 2032 | msg_typedef s i1 32; LP.add_typedef_root s; |
485bce71 C |
2033 | (*TOPar info*) |
2034 | TypedefIdent (s,i1) | |
2035 | ||
34e49164 C |
2036 | |
2037 | (* (xx){ ... } constructor *) | |
ae4735db C |
2038 | | (TIdent (s, i1)::TCPar _::TOBrace _::_ , TOPar _::x::_) |
2039 | when (*s ==~ regexp_typedef && *) not (TH.is_stuff_taking_parenthized x) | |
2040 | && ok_typedef s | |
34e49164 | 2041 | -> |
97111a47 | 2042 | msg_typedef s i1 33; LP.add_typedef_root s; |
34e49164 C |
2043 | TypedefIdent (s, i1) |
2044 | ||
2045 | ||
2046 | (* can have sizeof on expression | |
ae4735db | 2047 | | (Tsizeof::TOPar::TIdent s::TCPar::_, _) -> |
708f4980 | 2048 | msg_typedef s; LP.add_typedef_root s; |
34e49164 C |
2049 | Tsizeof |
2050 | *) | |
91eba41f C |
2051 | |
2052 | ||
2053 | (* ----------------------------------- *) | |
2054 | (* x ( *y )(params), function pointer *) | |
ae4735db | 2055 | | (TIdent (s, i1)::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
34e49164 | 2056 | when not_struct_enum before |
ae4735db | 2057 | && ok_typedef s |
34e49164 | 2058 | -> |
97111a47 | 2059 | msg_typedef s i1 34; LP.add_typedef_root s; |
34e49164 C |
2060 | TypedefIdent (s, i1) |
2061 | ||
91eba41f | 2062 | (* x* ( *y )(params), function pointer 2 *) |
ae4735db | 2063 | | (TIdent (s, i1)::TMul _::TOPar _::TMul _::TIdent _::TCPar _::TOPar _::_, _) |
91eba41f | 2064 | when not_struct_enum before |
ae4735db | 2065 | && ok_typedef s |
91eba41f | 2066 | -> |
97111a47 | 2067 | msg_typedef s i1 35; LP.add_typedef_root s; |
91eba41f C |
2068 | TypedefIdent (s, i1) |
2069 | ||
34e49164 C |
2070 | |
2071 | (*-------------------------------------------------------------*) | |
2072 | (* CPP *) | |
2073 | (*-------------------------------------------------------------*) | |
485bce71 C |
2074 | | ((TIfdef (_,ii) |TIfdefelse (_,ii) |TIfdefelif (_,ii) |TEndif (_,ii) | |
2075 | TIfdefBool (_,_,ii)|TIfdefMisc(_,_,ii)|TIfdefVersion(_,_,ii)) | |
34e49164 | 2076 | as x) |
ae4735db C |
2077 | ::_, _ |
2078 | -> | |
485bce71 | 2079 | (* |
ae4735db | 2080 | if not !Flag_parsing_c.ifdef_to_if |
34e49164 | 2081 | then TCommentCpp (Ast_c.CppDirective, ii) |
ae4735db | 2082 | else |
485bce71 | 2083 | *) |
0708f913 C |
2084 | (* not !LP._lexer_hint.toplevel *) |
2085 | if !Flag_parsing_c.ifdef_directive_passing | |
708f4980 | 2086 | || (pass >= 2) |
0708f913 | 2087 | then begin |
ae4735db | 2088 | |
b1b2de81 | 2089 | if (LP.current_context () =*= LP.InInitializer) |
ae4735db | 2090 | then begin |
0708f913 C |
2091 | pr2_cpp "In Initializer passing"; (* cheat: dont count in stat *) |
2092 | incr Stat.nIfdefInitializer; | |
ae4735db | 2093 | end else begin |
708f4980 | 2094 | pr2_cpp("IFDEF: or related inside function. I treat it as comment"); |
0708f913 C |
2095 | incr Stat.nIfdefPassing; |
2096 | end; | |
2097 | TCommentCpp (Token_c.CppDirective, ii) | |
2098 | end | |
2099 | else x | |
ae4735db | 2100 | |
3a314143 | 2101 | | (TUndef (ii) as x)::_, _ |
ae4735db | 2102 | -> |
708f4980 | 2103 | if (pass >= 2) |
485bce71 | 2104 | then begin |
0708f913 C |
2105 | pr2_cpp("UNDEF: I treat it as comment"); |
2106 | TCommentCpp (Token_c.CppDirective, ii) | |
113803cf C |
2107 | end |
2108 | else x | |
2109 | ||
ae4735db C |
2110 | | (TCppDirectiveOther (ii) as x)::_, _ |
2111 | -> | |
708f4980 | 2112 | if (pass >= 2) |
113803cf | 2113 | then begin |
0708f913 C |
2114 | pr2_cpp ("OTHER directive: I treat it as comment"); |
2115 | TCommentCpp (Token_c.CppDirective, ii) | |
485bce71 C |
2116 | end |
2117 | else x | |
34e49164 C |
2118 | |
2119 | (* If ident contain a for_each, then certainly a macro. But to be | |
2120 | * sure should look if there is a '{' after the ')', but it requires | |
2121 | * to count the '('. Because this can be expensive, we do that only | |
ae4735db | 2122 | * when the token contains "for_each". |
34e49164 | 2123 | *) |
ae4735db | 2124 | | (TIdent (s, i1)::TOPar _::rest, _) |
b1b2de81 | 2125 | when not (LP.current_context () =*= LP.InTopLevel) |
ae4735db C |
2126 | (* otherwise a function such as static void loopback_enable(int i) { |
2127 | * will be considered as a loop | |
34e49164 C |
2128 | *) |
2129 | -> | |
2130 | ||
ae4735db | 2131 | if s ==~ regexp_foreach && |
34e49164 | 2132 | is_really_foreach (Common.take_safe forLOOKAHEAD rest) |
ae4735db | 2133 | |
34e49164 C |
2134 | then begin |
2135 | msg_foreach s; | |
2136 | TMacroIterator (s, i1) | |
2137 | end | |
2138 | else TIdent (s, i1) | |
2139 | ||
34e49164 C |
2140 | (*-------------------------------------------------------------*) |
2141 | | v::xs, _ -> v | |
2142 | | _ -> raise Impossible | |
2143 | ||
ae4735db | 2144 | let lookahead ~pass a b = |
485bce71 | 2145 | Common.profile_code "C parsing.lookahead" (fun () -> lookahead2 ~pass a b) |
34e49164 C |
2146 | |
2147 |