3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
18 module TH
= Token_helpers
23 (*****************************************************************************)
25 (*****************************************************************************)
27 (* cpp functions working at the token level. Cf cpp_ast_c for cpp functions
28 * working at the AST level (which is very unusual but makes sense in
29 * the coccinelle context for instance).
31 * Note that as I use a single lexer to work both at the C and cpp level
32 * there are some inconveniencies.
33 * For instance 'for' is a valid name for a macro parameter and macro
34 * body, but is interpreted in a special way by our single lexer, and
35 * so at some places where I expect a TIdent I need also to
36 * handle special cases and accept Tfor, Tif, etc at those places.
38 * There are multiple issues related to those keywords incorrect tokens.
39 * Those keywords can be:
40 * - (1) in the name of the macro as in #define inline
41 * - (2) in a parameter of the macro as in #define foo(char) char x;
42 * - (3) in an argument to a macro call as in IDENT(if);
43 * Case 1 is easy to fix in define_ident.
44 * Case 2 is easy to fix in define_parse where detect such toks in
45 * the parameter and then replace their occurence in the body in a Tident.
46 * Case 3 is only an issue when the expanded token is not really use
47 * as usual but use for instance in concatenation as in a ## if
48 * when expanded. In the case the grammar this time will not be happy
49 * so this is also easy to fix in cpp_engine.
53 (*****************************************************************************)
55 (*****************************************************************************)
56 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
58 (*****************************************************************************)
60 (*****************************************************************************)
62 (* ------------------------------------------------------------------------- *)
63 (* mimic standard.h *)
64 (* ------------------------------------------------------------------------- *)
66 type define_def
= string * define_param
* define_body
69 | Params
of string list
71 | DefineBody
of Parser_c.token list
72 | DefineHint
of parsinghack_hint
74 and parsinghack_hint
=
80 | HintMacroIdentBuilder
83 (*****************************************************************************)
84 (* Parsing and helpers of hints *)
85 (*****************************************************************************)
87 (* cf also data/test.h *)
88 let assoc_hint_string = [
89 "YACFE_ITERATOR" , HintIterator
;
90 "YACFE_DECLARATOR" , HintDeclarator
;
91 "YACFE_STRING" , HintMacroString
;
92 "YACFE_STATEMENT" , HintMacroStatement
;
93 "YACFE_ATTRIBUTE" , HintAttribute
;
94 "YACFE_IDENT_BUILDER" , HintMacroIdentBuilder
;
96 "MACROSTATEMENT" , HintMacroStatement
; (* backward compatibility *)
100 let (parsinghack_hint_of_string
: string -> parsinghack_hint
option) = fun s
->
101 Common.assoc_option s
assoc_hint_string
102 let (string_of_parsinghack_hint
: parsinghack_hint
-> string) = fun hint
->
103 let assoc'
= assoc_hint_string +> List.map
(fun (a
,b
) -> (b
,a
) ) in
104 Common.assoc hint
assoc'
108 let (is_parsinghack_hint
: string -> bool) = fun s
->
109 parsinghack_hint_of_string s
<> None
111 let (token_from_parsinghack_hint
:
112 (string * Ast_c.info
) -> parsinghack_hint
-> Parser_c.token
) =
116 Parser_c.TMacroIterator
(s
, ii
)
118 Parser_c.TMacroDecl
(s
, ii
)
120 Parser_c.TMacroString
(s
, ii
)
121 | HintMacroStatement
->
122 Parser_c.TMacroStmt
(s
, ii
)
124 Parser_c.TMacroAttr
(s
, ii
)
125 | HintMacroIdentBuilder
->
126 Parser_c.TMacroIdentBuilder
(s
, ii
)
129 (* used in extract_macros for example *)
130 let string_of_define_def (s
, params
, body
) =
137 spf
"#define %s(%s) " s
(Common.join
"," xs
)
142 string_of_parsinghack_hint hint
144 Common.join
" " (xs
+> List.map
Token_helpers.str_of_tok
)
149 (*****************************************************************************)
150 (* Expansion helpers *)
151 (*****************************************************************************)
153 (* In some cases we can have macros like IDENT(if) that expands to some
154 * 'int xxx_if(void)', but as the lexer will currently generate a Tif for
155 * the expanded code, that may not be accepted as a token after a ##
156 * in the grammar. Hence this function to remap some tokens. This is because
157 * we should not use a single lexer for both working at the C level and
160 * update: it can also rename some TypedefIdent into TIdent, possibly
161 * because of bad interaction with add_typedef_root in parsing_hacks.
163 let rec remap_keyword_tokens xs
=
169 | Parser_c.TCppConcatOp _
, Parser_c.TIdent _
->
170 x
::y
::remap_keyword_tokens xs
171 | Parser_c.TIdent _
, Parser_c.TCppConcatOp _
->
172 x
::y
::remap_keyword_tokens xs
174 | Parser_c.TCppConcatOp
(i1
), y
->
176 let s = TH.str_of_tok y
in
177 let ii = TH.info_of_tok y
in
178 if s ==~
Common.regexp_alpha
180 pr2 (spf
"remapping: %s to an ident in expanded code" s);
181 x
::(Parser_c.TIdent
(s, ii))::remap_keyword_tokens xs
184 x
::y
::remap_keyword_tokens xs
186 | x
, Parser_c.TCppConcatOp
(i1
) ->
187 let s = TH.str_of_tok x
in
188 let ii = TH.info_of_tok x
in
189 if s ==~
Common.regexp_alpha
191 pr2 (spf
"remapping: %s to an ident in expanded code" s);
192 (Parser_c.TIdent
(s, ii))::remap_keyword_tokens (y
::xs
)
195 x
::y
::remap_keyword_tokens xs
198 x
::remap_keyword_tokens (y
::xs
)
202 (* works with agglomerate_concat_op_ident below *)
203 let rec get_ident_in_concat_op xs
=
206 pr2 "weird: ident after ## operator not found";
210 | Parser_c.TIdent
(s, i1
) -> s, []
212 pr2 "weird: ident after ## operator not found";
217 | Parser_c.TIdent
(s,i1
), Parser_c.TCppConcatOp
(i2
) ->
218 let (s2, rest
) = get_ident_in_concat_op xs
in
220 | Parser_c.TIdent
(s, i1
), _
->
223 pr2 "weird: ident after ## operator not found";
227 (* must be run after the expansion has been done for the parameter so
228 * that all idents are actually ident, not macro parameter names.
230 let rec agglomerate_concat_op_ident xs
=
235 (* can we have ## id, and so ## as first token ? yes
236 * but the semantic is different as it represents variadic
237 * names so this must be handled elsewhere.
240 | Parser_c.TIdent
(s,i1
), Parser_c.TCppConcatOp
(i2
) ->
241 let (all_str_ident
, rest_toks
) =
242 get_ident_in_concat_op xs
244 let new_s = s ^ all_str_ident
in
245 let i1'
= Ast_c.rewrap_str
new_s i1 in
246 Parser_c.TIdent
(new_s, i1'
)::agglomerate_concat_op_ident rest_toks
247 | Parser_c.TCppConcatOp _
, _
->
248 pr2 "weird, ## alone";
249 x
::agglomerate_concat_op_ident (y
::xs
)
251 x
::agglomerate_concat_op_ident (y
::xs
)
257 (* To expand the parameter of the macro. The env corresponds to the actual
258 * code that is binded to the parameters of the macro.
259 * Recurse ? fixpoint ? the expansion may also contain macro.
260 * Or to macro expansion in a strict manner, that is process first
261 * the parameters, expands macro in params, and then process enclosing
264 * note: do the concatenation job of a##b here ?
265 * normally this should be done in the grammar. Here just expand
266 * tokens. The only thing we handle here is we may have to remap
269 * todo: handle stringification here ? if #n
271 * todo? but could parsing_hacks then pass over the remapped tokens,
272 * for instance transform some of the back into some TypedefIdent
273 * so cpp_engine may be fooled?
276 ?evaluate_concatop
:bool ->
277 (string , Parser_c.token list
) assoc ->
278 Parser_c.token list
-> Parser_c.token list
) =
279 fun ?
(evaluate_concatop
=true) env xs
->
280 xs
+> List.map
(fun tok
->
281 (* expand only TIdent ? no cos the parameter of the macro
282 * can actually be some 'register' so may have to look for
283 * any tokens candidates for the expansion.
284 * Only subtelity is maybe dont expand the TDefineIdent.
286 * update: in fact now the caller (define_parse) will have done
287 * the job right and already replaced the macro parameter with a TIdent.
290 | TIdent
(s,i1) when List.mem_assoc
s env
-> Common.assoc s env
294 +> remap_keyword_tokens
297 then agglomerate_concat_op_ident xs
303 (* ------------------------------------------------------------------------- *)
304 (* apply macro, using standard.h or other defs *)
305 (* ------------------------------------------------------------------------- *)
307 (* Thanks to this function many stuff are not anymore hardcoded in ocaml code.
308 * At some point there were hardcoded in a standard.h file but now I
309 * can even generate them on the fly on demand when there is actually
312 * No need to take care to not substitute the macro name itself
313 * that occurs in the macro definition because the macro name is
314 * after fix_token_define a TDefineIdent, no more a TIdent.
317 let rec apply_macro_defs
318 ~msg_apply_known_macro
319 ~msg_apply_known_macro_hint
321 ?
(inplace_when_single
=true)
323 let rec apply_macro_defs xs
=
327 (* old: "but could do more, could reuse same original token
328 * so that have in the Ast a Dbg, not a MACROSTATEMENT"
330 * | PToken ({tok = TIdent (s,i1)} as id)::xs
331 * when s = "MACROSTATEMENT" ->
333 * msg_macro_statement_hint s;
334 * id.tok <- TMacroStmt(TH.info_of_tok id.tok);
335 * find_macro_paren xs
337 * let msg_macro_statement_hint s =
338 * incr Stat.nMacroHint;
343 (* recognized macro of standard.h (or other) *)
344 | PToken
({tok
= TIdent
(s,i1)} as id
)::Parenthised
(xxs
,info_parens
)::xs
345 when Hashtbl.mem defs
s ->
347 msg_apply_known_macro
s;
348 let (s, params
, body
) = Hashtbl.find defs
s in
352 pr2 ("WEIRD: macro without param used before parenthize: " ^
s);
353 (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *)
356 | DefineBody bodymacro
->
357 set_as_comment
(Token_c.CppMacro
) id
;
358 id
.new_tokens_before
<- bodymacro
;
360 msg_apply_known_macro_hint
s;
361 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
365 | DefineBody bodymacro
->
367 (* bugfix: better to put this that before the match body,
368 * cos our macrostatement hint can have variable number of
369 * arguments and so it's ok if it does not match exactly
370 * the number of arguments. *)
371 if List.length params
!= List.length xxs
373 pr2_once
("WEIRD: macro with wrong number of arguments: " ^
s);
374 (* old: id.new_tokens_before <- bodymacro; *)
376 (* update: if wrong number, then I just pass this macro *)
377 [Parenthised
(xxs
, info_parens
)] +>
378 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
379 set_as_comment
Token_c.CppMacro id
;
385 let xxs'
= xxs +> List.map
(fun x
->
386 (tokens_of_paren_ordered x
) +> List.map
(fun x
->
387 TH.visitor_info_of_tok
Ast_c.make_expanded x
.tok
390 id
.new_tokens_before
<-
391 (* !!! cpp expansion job here !!! *)
392 cpp_engine ?evaluate_concatop
393 (Common.zip params
xxs'
) bodymacro
;
395 (* important to do that after have apply the macro, otherwise
396 * will pass as argument to the macro some tokens that
397 * are all TCommentCpp
399 [Parenthised
(xxs, info_parens
)] +>
400 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
401 set_as_comment
Token_c.CppMacro id
;
403 | DefineHint
(HintMacroStatement
as hint
) ->
404 (* important to do that after have apply the macro, otherwise
405 * will pass as argument to the macro some tokens that
406 * are all TCommentCpp
408 * note: such macrostatement can have a variable number of
409 * arguments but here we don't care, we just pass all the
414 | PToken
({tok
= TPtVirg _
} as id2
)::_
->
416 ("macro stmt with trailing ';', passing also ';' for: "^
418 (* sometimes still want pass its params ... as in
419 * DEBUGPOLL(static unsigned int prev_mask = 0);
422 msg_apply_known_macro_hint
s;
423 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
424 [Parenthised
(xxs, info_parens
)] +>
425 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
426 set_as_comment
Token_c.CppMacro id2
;
429 msg_apply_known_macro_hint
s;
430 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
431 [Parenthised
(xxs, info_parens
)] +>
432 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
437 msg_apply_known_macro_hint
s;
438 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
443 | PToken
({tok
= TIdent
(s,i1)} as id
)::xs
444 when Hashtbl.mem defs
s ->
446 msg_apply_known_macro
s;
447 let (_s
, params
, body
) = Hashtbl.find defs
s in
451 pr2 ("WEIRD: macro with params but no parens found: " ^
s);
452 (* dont apply the macro, perhaps a redefinition *)
456 (* bugfix: we prefer not using this special case when we come
457 * from extract_macros context
459 | DefineBody
[newtok
] when inplace_when_single
->
460 (* special case when 1-1 substitution, we reuse the token *)
461 id
.tok
<- (newtok
+> TH.visitor_info_of_tok
(fun _
->
462 TH.info_of_tok id
.tok
))
463 | DefineBody bodymacro
->
464 set_as_comment
Token_c.CppMacro id
;
465 id
.new_tokens_before
<- bodymacro
;
467 msg_apply_known_macro_hint
s;
468 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
477 | (PToken x
)::xs
-> apply_macro_defs xs
478 | (Parenthised
(xxs, info_parens
))::xs
->
479 xxs +> List.iter
apply_macro_defs;
486 (*****************************************************************************)
487 (* extracting define_def from a standard.h *)
488 (*****************************************************************************)
489 (* was the cpp-builtin, standard.h, part 0 *)
491 let macro_body_to_maybe_hint body
=
493 | [] -> DefineBody body
495 (match parsinghack_hint_of_string
s with
496 | Some hint
-> DefineHint hint
497 | None
-> DefineBody body
499 | xs
-> DefineBody body
503 let rec (define_parse
: Parser_c.token list
-> (string * define_def
) list
) =
507 | TDefine
i1::TIdentDefine
(s,i2
)::TOParDefine i3
::xs
->
508 (* note: the macro could be badly written and have no closing ')' for
509 * its param, which would make us go too far away, but I don't think
510 * it's important to handle such an error *)
513 let (tokparams
, _
, xs
) =
514 xs
+> Common.split_when
(function TCPar _
-> true | _
-> false) in
516 xs
+> Common.split_when
(function TDefEOL _
-> true | _
-> false) in
518 tokparams
+> Common.map_filter
(function
520 | TIdent
(s, _
) -> Some
s
523 | TDefParamVariadic
(s, _
) -> Some
s
525 | TEllipsis _
-> Some
"..."
528 (* bugfix: param of macros can be tricky *)
529 let s = TH.str_of_tok x
in
530 if s ==~
Common.regexp_alpha
532 pr2 (spf
"remapping: %s to a macro parameter" s);
537 pr2 (spf
"bad character %s in macro parameter list" s);
540 (* bugfix: also substitute to ident in body so cpp_engine will
543 let body = body +> List.map
(fun tok
->
547 let s = TH.str_of_tok tok
in
548 let ii = TH.info_of_tok tok
in
549 if s ==~
Common.regexp_alpha
&& List.mem
s params
551 pr2 (spf
"remapping: %s to an ident in macro body" s);
555 List.map
(TH.visitor_info_of_tok
Ast_c.make_expanded
) in
556 Some
(s, (s, Params
params, macro_body_to_maybe_hint body))
557 with Bad_param
-> None
in
559 Some
def -> def::define_parse xs
560 | None
-> define_parse xs
)
562 | TDefine
i1::TIdentDefine
(s,i2
)::xs
->
564 xs
+> Common.split_when
(function TDefEOL _
-> true | _
-> false) in
565 let body = body +> List.map
566 (TH.visitor_info_of_tok
Ast_c.make_expanded
) in
567 let def = (s, (s, NoParam
, macro_body_to_maybe_hint body)) in
570 (* cf tests-bis/define_plus.c *)
572 let line = Ast_c.line_of_info
i1 in
573 pr2 (spf
"WEIRD: no ident in define at line %d" line);
576 | x
::xs
-> define_parse xs
580 let extract_macros xs
=
581 let cleaner = xs
+> List.filter
(fun x
->
582 not
(TH.is_comment x
)