39dce63612057246b209209cc9d6b6e887c8cd50
[bpt/coccinelle.git] / parsing_c / cpp_token_c.ml
1 open Common
2
3 module TH = Token_helpers
4
5 open Parser_c
6 open Token_views_c
7
8 (*****************************************************************************)
9 (* Prelude *)
10 (*****************************************************************************)
11
12 (* cpp functions working at the token level. Cf cpp_ast_c for cpp functions
13 * working at the AST level (which is very unusual but makes sense in
14 * the coccinelle context for instance).
15 *
16 * Note that as I use a single lexer to work both at the C and cpp level
17 * there are some inconveniencies.
18 * For instance 'for' is a valid name for a macro parameter and macro
19 * body, but is interpreted in a special way by our single lexer, and
20 * so at some places where I expect a TIdent I need also to
21 * handle special cases and accept Tfor, Tif, etc at those places.
22 *
23 * There are multiple issues related to those keywords incorrect tokens.
24 * Those keywords can be:
25 * - (1) in the name of the macro as in #define inline
26 * - (2) in a parameter of the macro as in #define foo(char) char x;
27 * - (3) in an argument to a macro call as in IDENT(if);
28 * Case 1 is easy to fix in define_ident.
29 * Case 2 is easy to fix in define_parse where detect such toks in
30 * the parameter and then replace their occurence in the body in a Tident.
31 * Case 3 is only an issue when the expanded token is not really use
32 * as usual but use for instance in concatenation as in a ## if
33 * when expanded. In the case the grammar this time will not be happy
34 * so this is also easy to fix in cpp_engine.
35 *
36 *)
37
38 (*****************************************************************************)
39 (* Wrappers *)
40 (*****************************************************************************)
41 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
42
43 (*****************************************************************************)
44 (* Types *)
45 (*****************************************************************************)
46
47 (* ------------------------------------------------------------------------- *)
48 (* mimic standard.h *)
49 (* ------------------------------------------------------------------------- *)
50
51 type define_def = string * define_param * define_body
52 and define_param =
53 | NoParam
54 | Params of define_arg list
55 and define_arg = FixedArg of string | VariadicArg of string
56 and define_body =
57 | DefineBody of Parser_c.token list
58 | DefineHint of parsinghack_hint
59
60 and parsinghack_hint =
61 | HintIterator
62 | HintDeclarator
63 | HintMacroString
64 | HintMacroStatement
65 | HintAttribute
66 | HintMacroIdentBuilder
67
68
69 (*****************************************************************************)
70 (* Parsing and helpers of hints *)
71 (*****************************************************************************)
72
73 (* cf also data/test.h *)
74 let assoc_hint_string = [
75 "YACFE_ITERATOR" , HintIterator;
76 "YACFE_DECLARATOR" , HintDeclarator;
77 "YACFE_STRING" , HintMacroString;
78 "YACFE_STATEMENT" , HintMacroStatement;
79 "YACFE_ATTRIBUTE" , HintAttribute;
80 "YACFE_IDENT_BUILDER" , HintMacroIdentBuilder;
81
82 "MACROSTATEMENT" , HintMacroStatement; (* backward compatibility *)
83 ]
84
85
86 let (parsinghack_hint_of_string: string -> parsinghack_hint option) = fun s ->
87 Common.assoc_option s assoc_hint_string
88 let (string_of_parsinghack_hint: parsinghack_hint -> string) = fun hint ->
89 let assoc' = assoc_hint_string +> List.map (fun (a,b) -> (b,a) ) in
90 Common.assoc hint assoc'
91
92
93
94 let (is_parsinghack_hint: string -> bool) = fun s ->
95 parsinghack_hint_of_string s <> None
96
97 let (token_from_parsinghack_hint:
98 (string * Ast_c.info) -> parsinghack_hint -> Parser_c.token) =
99 fun (s,ii) hint ->
100 match hint with
101 | HintIterator ->
102 Parser_c.TMacroIterator (s, ii)
103 | HintDeclarator ->
104 Parser_c.TMacroDecl (s, ii)
105 | HintMacroString ->
106 Parser_c.TMacroString (s, ii)
107 | HintMacroStatement ->
108 Parser_c.TMacroStmt (s, ii)
109 | HintAttribute ->
110 Parser_c.TMacroAttr (s, ii)
111 | HintMacroIdentBuilder ->
112 Parser_c.TMacroIdentBuilder (s, ii)
113
114
115 (* used in extract_macros for example *)
116 let string_of_define_def (s, params, body) =
117
118 let s1 =
119 match params with
120 | NoParam ->
121 spf "#define %s " s
122 | Params xs ->
123 let xs = List.map (function FixedArg s -> s | VariadicArg s -> s) xs in
124 spf "#define %s(%s) " s (Common.join "," xs)
125 in
126 let s2 =
127 match body with
128 | DefineHint hint ->
129 string_of_parsinghack_hint hint
130 | DefineBody xs ->
131 Common.join " " (xs +> List.map Token_helpers.str_of_tok)
132 in
133 s1 ^ s2
134
135
136 (*****************************************************************************)
137 (* Expansion helpers *)
138 (*****************************************************************************)
139
140 (* In some cases we can have macros like IDENT(if) that expands to some
141 * 'int xxx_if(void)', but as the lexer will currently generate a Tif for
142 * the expanded code, that may not be accepted as a token after a ##
143 * in the grammar. Hence this function to remap some tokens. This is because
144 * we should not use a single lexer for both working at the C level and
145 * cpp level.
146 *
147 * update: it can also rename some TypedefIdent into TIdent, possibly
148 * because of bad interaction with add_typedef_root in parsing_hacks.
149 *)
150 let rec remap_keyword_tokens xs =
151 match xs with
152 | [] -> []
153 | [x] -> [x]
154 | x::y::xs ->
155 (match x, y with
156 | Parser_c.TCppConcatOp _, Parser_c.TIdent _ ->
157 x::y::remap_keyword_tokens xs
158 | Parser_c.TIdent _, Parser_c.TCppConcatOp _ ->
159 x::y::remap_keyword_tokens xs
160
161 | Parser_c.TCppConcatOp (i1), y ->
162
163 let s = TH.str_of_tok y in
164 let ii = TH.info_of_tok y in
165 if s ==~ Common.regexp_alpha
166 then begin
167 pr2 (spf "remapping: %s to an ident in expanded code" s);
168 x::(Parser_c.TIdent (s, ii))::remap_keyword_tokens xs
169 end
170 else
171 x::y::remap_keyword_tokens xs
172
173 | x, Parser_c.TCppConcatOp (i1) ->
174 let s = TH.str_of_tok x in
175 let ii = TH.info_of_tok x in
176 if s ==~ Common.regexp_alpha
177 then begin
178 pr2 (spf "remapping: %s to an ident in expanded code" s);
179 (Parser_c.TIdent (s, ii))::remap_keyword_tokens (y::xs)
180 end
181 else
182 x::y::remap_keyword_tokens xs
183
184 | _, _ ->
185 x::remap_keyword_tokens (y::xs)
186 )
187
188
189 (* works with agglomerate_concat_op_ident below *)
190 let rec get_ident_in_concat_op xs =
191 match xs with
192 | [] ->
193 pr2 "weird: ident after ## operator not found";
194 "", []
195 | [x] ->
196 (match x with
197 | Parser_c.TIdent (s, i1) -> s, []
198 | _ ->
199 pr2 "weird: ident after ## operator not found";
200 "", [x]
201 )
202 | x::y::xs ->
203 (match x, y with
204 | Parser_c.TIdent (s,i1), Parser_c.TCppConcatOp (i2) ->
205 let (s2, rest) = get_ident_in_concat_op xs in
206 s ^ s2, rest
207 | Parser_c.TIdent (s, i1), _ ->
208 s, (y::xs)
209 | _ ->
210 pr2 "weird: ident after ## operator not found";
211 "", x::y::xs
212 )
213
214 (* must be run after the expansion has been done for the parameter so
215 * that all idents are actually ident, not macro parameter names.
216 *)
217 let rec agglomerate_concat_op_ident xs =
218 match xs with
219 | [] -> []
220 | [x] -> [x]
221 | x::y::xs ->
222 (* can we have ## id, and so ## as first token ? yes
223 * but the semantic is different as it represents variadic
224 * names so this must be handled elsewhere.
225 *)
226 (match x, y with
227 | Parser_c.TIdent (s,i1), Parser_c.TCppConcatOp (i2) ->
228 let (all_str_ident, rest_toks) =
229 get_ident_in_concat_op xs
230 in
231 let new_s = s ^ all_str_ident in
232 let i1' = Ast_c.rewrap_str new_s i1 in
233 Parser_c.TIdent (new_s, i1')::agglomerate_concat_op_ident rest_toks
234 | Parser_c.TCppConcatOp _, _ ->
235 pr2 "weird, ## alone";
236 x::agglomerate_concat_op_ident (y::xs)
237 | _ ->
238 x::agglomerate_concat_op_ident (y::xs)
239
240 )
241
242
243
244 (* To expand the parameter of the macro. The env corresponds to the actual
245 * code that is binded to the parameters of the macro.
246 * Recurse ? fixpoint ? the expansion may also contain macro.
247 * Or to macro expansion in a strict manner, that is process first
248 * the parameters, expands macro in params, and then process enclosing
249 * macro call.
250 *
251 * note: do the concatenation job of a##b here ?
252 * normally this should be done in the grammar. Here just expand
253 * tokens. The only thing we handle here is we may have to remap
254 * some tokens.
255 *
256 * todo: handle stringification here ? if #n
257 *
258 * todo? but could parsing_hacks then pass over the remapped tokens,
259 * for instance transform some of the back into some TypedefIdent
260 * so cpp_engine may be fooled?
261 *)
262 let rec (cpp_engine:
263 ?evaluate_concatop:bool ->
264 (string , Parser_c.token list) assoc ->
265 Parser_c.token list -> Parser_c.token list) =
266 fun ?(evaluate_concatop=true) env xs ->
267 xs +> List.map (fun tok ->
268 (* expand only TIdent ? no cos the parameter of the macro
269 * can actually be some 'register' so may have to look for
270 * any tokens candidates for the expansion.
271 * Only subtelity is maybe dont expand the TDefineIdent.
272 *
273 * update: in fact now the caller (define_parse) will have done
274 * the job right and already replaced the macro parameter with a TIdent.
275 *)
276 match tok with
277 | TIdent (s,i1) when List.mem_assoc s env ->
278 Common.assoc s env
279 | x -> [x]
280 )
281 +> List.flatten
282 +> remap_keyword_tokens
283 +> (fun xs ->
284 if evaluate_concatop
285 then agglomerate_concat_op_ident xs
286 else xs
287 )
288
289
290
291 (* ------------------------------------------------------------------------- *)
292 (* apply macro, using standard.h or other defs *)
293 (* ------------------------------------------------------------------------- *)
294
295 (* Thanks to this function many stuff are not anymore hardcoded in ocaml code.
296 * At some point there were hardcoded in a standard.h file but now I
297 * can even generate them on the fly on demand when there is actually
298 * a parsing problem.
299 *
300 * No need to take care to not substitute the macro name itself
301 * that occurs in the macro definition because the macro name is
302 * after fix_token_define a TDefineIdent, no more a TIdent.
303 *)
304
305 let rec apply_macro_defs
306 ~msg_apply_known_macro
307 ~msg_apply_known_macro_hint
308 ?evaluate_concatop
309 ?(inplace_when_single=true)
310 defs xs =
311
312 let rec apply_macro_defs xs =
313 match xs with
314 | [] -> ()
315
316 (* old: "but could do more, could reuse same original token
317 * so that have in the Ast a Dbg, not a MACROSTATEMENT"
318 *
319 * | PToken ({tok = TIdent (s,i1)} as id)::xs
320 * when s = "MACROSTATEMENT" ->
321 *
322 * msg_macro_statement_hint s;
323 * id.tok <- TMacroStmt(TH.info_of_tok id.tok);
324 * find_macro_paren xs
325 *
326 * let msg_macro_statement_hint s =
327 * incr Stat.nMacroHint;
328 * ()
329 *
330 *)
331
332 (* recognized macro of standard.h (or other) *)
333 | PToken ({tok = TIdent (s,i1)} as id)::Parenthised (xxs,info_parens)::xs
334 when Hashtbl.mem defs s ->
335
336 msg_apply_known_macro s;
337 let (s, params, body) = Hashtbl.find defs s in
338
339 (match params with
340 | NoParam ->
341 pr2 ("WEIRD: macro without param used before parenthize: " ^ s);
342 (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *)
343
344 (match body with
345 | DefineBody bodymacro ->
346 set_as_comment (Token_c.CppMacro) id;
347 id.new_tokens_before <- bodymacro;
348 | DefineHint hint ->
349 msg_apply_known_macro_hint s;
350 id.tok <- token_from_parsinghack_hint (s,i1) hint;
351 )
352 | Params params ->
353 (match body with
354 | DefineBody bodymacro ->
355
356 (* bugfix: better to put this that before the match body,
357 * cos our macrostatement hint can have variable number of
358 * arguments and so it's ok if it does not match exactly
359 * the number of arguments. *)
360 let build_binder params xxs =
361 let rec loop = function
362 ([],[]) -> Some (function [] -> [] | _ -> failwith "bad")
363 | ([],l) -> None
364 | ([(VariadicArg s)],l) ->
365 Some (function l -> List.map (function a -> (s,a)) l)
366 | ((VariadicArg _)::_,l) -> None
367 | ((FixedArg _)::_,[]) -> None
368 | ((FixedArg s)::rest,x::xs) ->
369 (match loop (rest,xs) with
370 Some k ->
371 Some (function l -> (s,(List.hd l)) :: k (List.tl l))
372 | None -> None) in
373 loop (params, xxs) in
374 (match build_binder params xxs with
375 None ->
376 pr2_once
377 ("WEIRD: macro with wrong number of arguments: " ^ s);
378 (* old: id.new_tokens_before <- bodymacro; *)
379
380 (* update: if wrong number, then I just pass this macro *)
381 [Parenthised (xxs, info_parens)] +>
382 iter_token_paren (set_as_comment Token_c.CppMacro);
383 set_as_comment Token_c.CppMacro id
384 | Some bind ->
385
386 let xxs' = xxs +> List.map (fun x ->
387 (tokens_of_paren_ordered x) +> List.map (fun x ->
388 TH.visitor_info_of_tok Ast_c.make_expanded x.tok
389 )
390 ) in
391 id.new_tokens_before <-
392 (* !!! cpp expansion job here !!! *)
393 cpp_engine ?evaluate_concatop
394 (bind xxs') bodymacro;
395
396 (* important to do that after have apply the macro,
397 otherwise will pass as argument to the macro some
398 tokens that are all TCommentCpp
399 *)
400 [Parenthised (xxs, info_parens)] +>
401 iter_token_paren (set_as_comment Token_c.CppMacro);
402 set_as_comment Token_c.CppMacro id)
403
404 | DefineHint (HintMacroStatement as hint) ->
405 (* important to do that after have apply the macro, otherwise
406 * will pass as argument to the macro some tokens that
407 * are all TCommentCpp
408 *
409 * note: such macrostatement can have a variable number of
410 * arguments but here we don't care, we just pass all the
411 * parameters.
412 *)
413
414 (match xs with
415 | PToken ({tok = TPtVirg _} as id2)::_ ->
416 pr2_once
417 ("macro stmt with trailing ';', passing also ';' for: "^
418 s);
419 (* sometimes still want pass its params ... as in
420 * DEBUGPOLL(static unsigned int prev_mask = 0);
421 *)
422
423 msg_apply_known_macro_hint s;
424 id.tok <- token_from_parsinghack_hint (s,i1) hint;
425 [Parenthised (xxs, info_parens)] +>
426 iter_token_paren (set_as_comment Token_c.CppMacro);
427 set_as_comment Token_c.CppMacro id2;
428
429 | _ ->
430 msg_apply_known_macro_hint s;
431 id.tok <- token_from_parsinghack_hint (s,i1) hint;
432 [Parenthised (xxs, info_parens)] +>
433 iter_token_paren (set_as_comment Token_c.CppMacro);
434 )
435
436
437 | DefineHint hint ->
438 msg_apply_known_macro_hint s;
439 id.tok <- token_from_parsinghack_hint (s,i1) hint;
440 )
441 );
442 apply_macro_defs xs
443
444 | PToken ({tok = TIdent (s,i1)} as id)::xs
445 when Hashtbl.mem defs s ->
446
447 msg_apply_known_macro s;
448 let (_s, params, body) = Hashtbl.find defs s in
449
450 (match params with
451 | Params _ ->
452 pr2 ("WEIRD: macro with params but no parens found: " ^ s);
453 (* dont apply the macro, perhaps a redefinition *)
454 ()
455 | NoParam ->
456 (match body with
457 (* bugfix: we prefer not using this special case when we come
458 * from extract_macros context
459 *)
460 | DefineBody [newtok] when inplace_when_single ->
461 (* special case when 1-1 substitution, we reuse the token *)
462 id.tok <- (newtok +> TH.visitor_info_of_tok (fun _ ->
463 TH.info_of_tok id.tok))
464 | DefineBody bodymacro ->
465 set_as_comment Token_c.CppMacro id;
466 id.new_tokens_before <- bodymacro;
467 | DefineHint hint ->
468 msg_apply_known_macro_hint s;
469 id.tok <- token_from_parsinghack_hint (s,i1) hint;
470 )
471 );
472 apply_macro_defs xs
473
474
475
476
477 (* recurse *)
478 | (PToken x)::xs -> apply_macro_defs xs
479 | (Parenthised (xxs, info_parens))::xs ->
480 xxs +> List.iter apply_macro_defs;
481 apply_macro_defs xs
482 in
483 apply_macro_defs xs
484
485
486
487 (*****************************************************************************)
488 (* extracting define_def from a standard.h *)
489 (*****************************************************************************)
490 (* was the cpp-builtin, standard.h, part 0 *)
491
492 let macro_body_to_maybe_hint body =
493 match body with
494 | [] -> DefineBody body
495 | [TIdent (s,i1)] ->
496 (match parsinghack_hint_of_string s with
497 | Some hint -> DefineHint hint
498 | None -> DefineBody body
499 )
500 | xs -> DefineBody body
501
502 exception Bad_param
503
504 let rec (define_parse: Parser_c.token list -> (string * define_def) list) =
505 fun xs ->
506 match xs with
507 | [] -> []
508 | TDefine i1::TIdentDefine (s,i2)::TOParDefine i3::xs ->
509 (* note: the macro could be badly written and have no closing ')' for
510 * its param, which would make us go too far away, but I don't think
511 * it's important to handle such an error *)
512 let def =
513 try
514 let (tokparams, _, xs) =
515 xs +> Common.split_when (function TCPar _ -> true | _ -> false) in
516 let (body, _, xs) =
517 xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in
518 let params =
519 tokparams +> Common.map_filter (function
520 | TComma _ -> None
521 | TIdent (s, _) -> Some (FixedArg s)
522
523 (* TODO *)
524 | TDefParamVariadic (s, _) -> Some (VariadicArg s)
525 (* TODO *)
526 | TEllipsis _ -> Some (VariadicArg "...")
527
528 | x ->
529 (* bugfix: param of macros can be tricky *)
530 let s = TH.str_of_tok x in
531 if s ==~ Common.regexp_alpha
532 then begin
533 pr2 (spf "remapping: %s to a macro parameter" s);
534 Some (FixedArg s)
535 end
536 else
537 begin
538 pr2 (spf "bad character %s in macro parameter list" s);
539 raise Bad_param
540 end) in
541 (* bugfix: also substitute to ident in body so cpp_engine will
542 * have an easy job.
543 *)
544 let body = body +> List.map (fun tok ->
545 match tok with
546 | TIdent _ -> tok
547 | _ ->
548 let s = TH.str_of_tok tok in
549 let ii = TH.info_of_tok tok in
550 let params =
551 List.map
552 (function FixedArg s -> s | VariadicArg s -> s)
553 params in
554 if s ==~ Common.regexp_alpha && List.mem s params
555 then begin
556 pr2 (spf "remapping: %s to an ident in macro body" s);
557 TIdent (s, ii)
558 end
559 else tok) +>
560 List.map (TH.visitor_info_of_tok Ast_c.make_expanded) in
561 Some (s, (s, Params params, macro_body_to_maybe_hint body))
562 with Bad_param -> None in
563 (match def with
564 Some def -> def::define_parse xs
565 | None -> define_parse xs)
566
567 | TDefine i1::TIdentDefine (s,i2)::xs ->
568 let (body, _, xs) =
569 xs +> Common.split_when (function TDefEOL _ -> true | _ -> false) in
570 let body = body +> List.map
571 (TH.visitor_info_of_tok Ast_c.make_expanded) in
572 let def = (s, (s, NoParam, macro_body_to_maybe_hint body)) in
573 def::define_parse xs
574
575 (* cf tests-bis/define_plus.c *)
576 | TDefine i1::xs ->
577 let line = Ast_c.line_of_info i1 in
578 pr2 (spf "WEIRD: no ident in define at line %d" line);
579 define_parse xs
580
581 | x::xs -> define_parse xs
582
583
584
585 let extract_macros xs =
586 let cleaner = xs +> List.filter (fun x ->
587 not (TH.is_comment x)
588 ) in
589 define_parse cleaner
590
591