3 module TH
= Token_helpers
8 (*****************************************************************************)
10 (*****************************************************************************)
12 (* cpp functions working at the token level. Cf cpp_ast_c for cpp functions
13 * working at the AST level (which is very unusual but makes sense in
14 * the coccinelle context for instance).
16 * Note that as I use a single lexer to work both at the C and cpp level
17 * there are some inconveniencies.
18 * For instance 'for' is a valid name for a macro parameter and macro
19 * body, but is interpreted in a special way by our single lexer, and
20 * so at some places where I expect a TIdent I need also to
21 * handle special cases and accept Tfor, Tif, etc at those places.
23 * There are multiple issues related to those keywords incorrect tokens.
24 * Those keywords can be:
25 * - (1) in the name of the macro as in #define inline
26 * - (2) in a parameter of the macro as in #define foo(char) char x;
27 * - (3) in an argument to a macro call as in IDENT(if);
28 * Case 1 is easy to fix in define_ident.
29 * Case 2 is easy to fix in define_parse where detect such toks in
30 * the parameter and then replace their occurence in the body in a Tident.
31 * Case 3 is only an issue when the expanded token is not really use
32 * as usual but use for instance in concatenation as in a ## if
33 * when expanded. In the case the grammar this time will not be happy
34 * so this is also easy to fix in cpp_engine.
38 (*****************************************************************************)
40 (*****************************************************************************)
41 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
43 (*****************************************************************************)
45 (*****************************************************************************)
47 (* ------------------------------------------------------------------------- *)
48 (* mimic standard.h *)
49 (* ------------------------------------------------------------------------- *)
51 type define_def
= string * define_param
* define_body
54 | Params
of define_arg list
55 and define_arg
= FixedArg
of string | VariadicArg
of string
57 | DefineBody
of Parser_c.token list
58 | DefineHint
of parsinghack_hint
60 and parsinghack_hint
=
66 | HintMacroIdentBuilder
69 (*****************************************************************************)
70 (* Parsing and helpers of hints *)
71 (*****************************************************************************)
73 (* cf also data/test.h *)
74 let assoc_hint_string = [
75 "YACFE_ITERATOR" , HintIterator
;
76 "YACFE_DECLARATOR" , HintDeclarator
;
77 "YACFE_STRING" , HintMacroString
;
78 "YACFE_STATEMENT" , HintMacroStatement
;
79 "YACFE_ATTRIBUTE" , HintAttribute
;
80 "YACFE_IDENT_BUILDER" , HintMacroIdentBuilder
;
82 "MACROSTATEMENT" , HintMacroStatement
; (* backward compatibility *)
86 let (parsinghack_hint_of_string
: string -> parsinghack_hint
option) = fun s
->
87 Common.assoc_option s
assoc_hint_string
88 let (string_of_parsinghack_hint
: parsinghack_hint
-> string) = fun hint
->
89 let assoc'
= assoc_hint_string +> List.map
(fun (a
,b
) -> (b
,a
) ) in
90 Common.assoc hint
assoc'
94 let (is_parsinghack_hint
: string -> bool) = fun s
->
95 parsinghack_hint_of_string s
<> None
97 let (token_from_parsinghack_hint
:
98 (string * Ast_c.info
) -> parsinghack_hint
-> Parser_c.token
) =
102 Parser_c.TMacroIterator
(s
, ii
)
104 Parser_c.TMacroDecl
(s
, ii
)
106 Parser_c.TMacroString
(s
, ii
)
107 | HintMacroStatement
->
108 Parser_c.TMacroStmt
(s
, ii
)
110 Parser_c.TMacroAttr
(s
, ii
)
111 | HintMacroIdentBuilder
->
112 Parser_c.TMacroIdentBuilder
(s
, ii
)
115 (* used in extract_macros for example *)
116 let string_of_define_def (s
, params
, body
) =
123 let xs = List.map
(function FixedArg s
-> s
| VariadicArg s
-> s
) xs in
124 spf
"#define %s(%s) " s
(Common.join
"," xs)
129 string_of_parsinghack_hint hint
131 Common.join
" " (xs +> List.map
Token_helpers.str_of_tok
)
136 (*****************************************************************************)
137 (* Expansion helpers *)
138 (*****************************************************************************)
140 (* In some cases we can have macros like IDENT(if) that expands to some
141 * 'int xxx_if(void)', but as the lexer will currently generate a Tif for
142 * the expanded code, that may not be accepted as a token after a ##
143 * in the grammar. Hence this function to remap some tokens. This is because
144 * we should not use a single lexer for both working at the C level and
147 * update: it can also rename some TypedefIdent into TIdent, possibly
148 * because of bad interaction with add_typedef_root in parsing_hacks.
150 let rec remap_keyword_tokens xs =
156 | Parser_c.TCppConcatOp _
, Parser_c.TIdent _
->
157 x
::y
::remap_keyword_tokens xs
158 | Parser_c.TIdent _
, Parser_c.TCppConcatOp _
->
159 x
::y
::remap_keyword_tokens xs
161 | Parser_c.TCppConcatOp
(i1
), y
->
162 let s = TH.str_of_tok y
in
163 let ii = TH.info_of_tok y
in
164 if s ==~
Common.regexp_alpha
166 pr2 (spf
"remapping: %s to an ident in expanded code" s);
167 x
::(Parser_c.TIdent
(s, ii))::remap_keyword_tokens xs
170 x
::y
::remap_keyword_tokens xs
172 | x
, Parser_c.TCppConcatOp
(i1
) ->
173 let s = TH.str_of_tok x
in
174 let ii = TH.info_of_tok x
in
175 if s ==~
Common.regexp_alpha
177 pr2 (spf
"remapping: %s to an ident in expanded code" s);
178 (Parser_c.TIdent
(s, ii))::remap_keyword_tokens (y
::xs)
181 x
::y
::remap_keyword_tokens xs
184 x
::remap_keyword_tokens (y
::xs)
188 (* works with agglomerate_concat_op_ident below *)
189 let rec get_ident_in_concat_op xs =
192 pr2 "weird: ident after ## operator not found";
196 | Parser_c.TIdent
(s, i1
) -> s, []
198 pr2 "weird: ident after ## operator not found";
203 | Parser_c.TIdent
(s,i1
), Parser_c.TCppConcatOp
(i2
) ->
204 let (s2, rest
) = get_ident_in_concat_op xs in
206 | Parser_c.TIdent
(s, i1
), _
->
209 pr2 "weird: ident after ## operator not found";
213 (* must be run after the expansion has been done for the parameter so
214 * that all idents are actually ident, not macro parameter names.
216 let rec agglomerate_concat_op_ident xs =
221 (* can we have ## id, and so ## as first token ? yes
222 * but the semantic is different as it represents variadic
223 * names so this must be handled elsewhere.
226 | Parser_c.TIdent
(s,i1
), Parser_c.TCppConcatOp
(i2
) ->
227 let (all_str_ident
, rest_toks
) =
228 get_ident_in_concat_op xs
230 let new_s = s ^ all_str_ident
in
231 let i1'
= Ast_c.rewrap_str
new_s i1 in
232 Parser_c.TIdent
(new_s, i1'
)::agglomerate_concat_op_ident rest_toks
233 | Parser_c.TCppConcatOp _
, _
->
234 pr2 "weird, ## alone";
235 x
::agglomerate_concat_op_ident (y
::xs)
237 x
::agglomerate_concat_op_ident (y
::xs)
243 (* To expand the parameter of the macro. The env corresponds to the actual
244 * code that is binded to the parameters of the macro.
245 * Recurse ? fixpoint ? the expansion may also contain macro.
246 * Or to macro expansion in a strict manner, that is process first
247 * the parameters, expands macro in params, and then process enclosing
250 * note: do the concatenation job of a##b here ?
251 * normally this should be done in the grammar. Here just expand
252 * tokens. The only thing we handle here is we may have to remap
255 * todo: handle stringification here ? if #n
257 * todo? but could parsing_hacks then pass over the remapped tokens,
258 * for instance transform some of the back into some TypedefIdent
259 * so cpp_engine may be fooled?
262 ?evaluate_concatop
:bool ->
263 (string , Parser_c.token list
) assoc ->
264 Parser_c.token list
-> Parser_c.token list
) =
265 fun ?
(evaluate_concatop
=true) env
xs ->
266 xs +> List.map
(fun tok
->
267 (* expand only TIdent ? no cos the parameter of the macro
268 * can actually be some 'register' so may have to look for
269 * any tokens candidates for the expansion.
270 * Only subtelity is maybe dont expand the TDefineIdent.
272 * update: in fact now the caller (define_parse) will have done
273 * the job right and already replaced the macro parameter with a TIdent.
276 | TIdent
(s,i1) when List.mem_assoc
s env
->
281 +> remap_keyword_tokens
284 then agglomerate_concat_op_ident xs
290 (* ------------------------------------------------------------------------- *)
291 (* apply macro, using standard.h or other defs *)
292 (* ------------------------------------------------------------------------- *)
294 (* Thanks to this function many stuff are not anymore hardcoded in ocaml code.
295 * At some point there were hardcoded in a standard.h file but now I
296 * can even generate them on the fly on demand when there is actually
299 * No need to take care to not substitute the macro name itself
300 * that occurs in the macro definition because the macro name is
301 * after fix_token_define a TDefineIdent, no more a TIdent.
304 let rec apply_macro_defs
305 ~msg_apply_known_macro
306 ~msg_apply_known_macro_hint
308 ?
(inplace_when_single
=true)
311 let rec apply_macro_defs xs =
315 (* old: "but could do more, could reuse same original token
316 * so that have in the Ast a Dbg, not a MACROSTATEMENT"
318 * | PToken ({tok = TIdent (s,i1)} as id)::xs
319 * when s = "MACROSTATEMENT" ->
321 * msg_macro_statement_hint s;
322 * id.tok <- TMacroStmt(TH.info_of_tok id.tok);
323 * find_macro_paren xs
325 * let msg_macro_statement_hint s =
326 * incr Stat.nMacroHint;
331 (* recognized macro of standard.h (or other) *)
332 | PToken
({tok
= TIdent
(s,i1)} as id
)::Parenthised
(xxs
,info_parens
)::xs
333 when Hashtbl.mem defs
s ->
335 msg_apply_known_macro
s;
336 let (s, params
, body
) = Hashtbl.find defs
s in
340 pr2 ("WEIRD: macro without param used before parenthize: " ^
s);
341 (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *)
344 | DefineBody bodymacro
->
345 set_as_comment
(Token_c.CppMacro
) id
;
346 id
.new_tokens_before
<- bodymacro
;
348 msg_apply_known_macro_hint
s;
349 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
353 | DefineBody bodymacro
->
355 (* bugfix: better to put this that before the match body,
356 * cos our macrostatement hint can have variable number of
357 * arguments and so it's ok if it does not match exactly
358 * the number of arguments. *)
359 let build_binder params xxs
=
360 let rec loop = function
361 ([],[]) -> Some
(function [] -> [] | _
-> failwith
"bad")
362 | ([],[[]]) -> Some
(function [[]] -> [] | _
-> failwith
"bad")
364 | ([(VariadicArg
s)],l
) ->
365 Some
(function l
-> List.map
(function a
-> (s,a
)) l
)
366 | ((VariadicArg _
)::_
,l
) -> None
367 | ((FixedArg _
)::_
,[]) -> None
368 | ((FixedArg
s)::rest
,x
::xs) ->
369 (match loop (rest
,xs) with
371 Some
(function l
-> (s,(List.hd l
)) :: k
(List.tl l
))
373 loop (params
, xxs
) in
374 (match build_binder params xxs
with
377 ("WEIRD: macro with wrong number of arguments: " ^
s);
378 (* old: id.new_tokens_before <- bodymacro; *)
380 (* update: if wrong number, then I just pass this macro *)
381 [Parenthised
(xxs
, info_parens
)] +>
382 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
383 set_as_comment
Token_c.CppMacro id
386 let xxs'
= xxs +> List.map
(fun x
->
387 (tokens_of_paren_ordered x
) +> List.map
(fun x
->
388 TH.visitor_info_of_tok
Ast_c.make_expanded x
.tok
391 id
.new_tokens_before
<-
392 (* !!! cpp expansion job here !!! *)
393 cpp_engine ?evaluate_concatop
394 (bind
xxs'
) bodymacro
;
396 (* important to do that after have apply the macro,
397 otherwise will pass as argument to the macro some
398 tokens that are all TCommentCpp
400 [Parenthised
(xxs, info_parens
)] +>
401 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
402 set_as_comment
Token_c.CppMacro id
)
404 | DefineHint
(HintMacroStatement
as hint
) ->
405 (* important to do that after have apply the macro, otherwise
406 * will pass as argument to the macro some tokens that
407 * are all TCommentCpp
409 * note: such macrostatement can have a variable number of
410 * arguments but here we don't care, we just pass all the
415 | PToken
({tok
= TPtVirg _
} as id2
)::_
->
417 ("macro stmt with trailing ';', passing also ';' for: "^
419 (* sometimes still want pass its params ... as in
420 * DEBUGPOLL(static unsigned int prev_mask = 0);
423 msg_apply_known_macro_hint
s;
424 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
425 [Parenthised
(xxs, info_parens
)] +>
426 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
427 set_as_comment
Token_c.CppMacro id2
;
430 msg_apply_known_macro_hint
s;
431 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
432 [Parenthised
(xxs, info_parens
)] +>
433 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
438 msg_apply_known_macro_hint
s;
439 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
444 | PToken
({tok
= TIdent
(s,i1)} as id
)::xs
445 when Hashtbl.mem defs
s ->
447 msg_apply_known_macro
s;
448 let (_s
, params
, body
) = Hashtbl.find defs
s in
452 pr2 ("WEIRD: macro with params but no parens found: " ^
s);
453 (* dont apply the macro, perhaps a redefinition *)
457 (* bugfix: we prefer not using this special case when we come
458 * from extract_macros context
460 | DefineBody
[newtok
] when inplace_when_single
->
461 (* special case when 1-1 substitution, we reuse the token *)
462 id
.tok
<- (newtok
+> TH.visitor_info_of_tok
(fun _
->
463 TH.info_of_tok id
.tok
))
464 | DefineBody bodymacro
->
465 set_as_comment
Token_c.CppMacro id
;
466 id
.new_tokens_before
<- bodymacro
;
468 msg_apply_known_macro_hint
s;
469 id
.tok
<- token_from_parsinghack_hint
(s,i1) hint
;
478 | (PToken x
)::xs -> apply_macro_defs xs
479 | (Parenthised
(xxs, info_parens
))::xs ->
480 xxs +> List.iter
apply_macro_defs;
487 (*****************************************************************************)
488 (* extracting define_def from a standard.h *)
489 (*****************************************************************************)
490 (* was the cpp-builtin, standard.h, part 0 *)
492 let macro_body_to_maybe_hint body
=
494 | [] -> DefineBody body
496 (match parsinghack_hint_of_string
s with
497 | Some hint
-> DefineHint hint
498 | None
-> DefineBody body
500 | xs -> DefineBody body
504 let rec (define_parse
: Parser_c.token list
-> (string * define_def
) list
) =
508 | TDefine
i1::TIdentDefine
(s,i2
)::TOParDefine i3
::xs ->
509 (* note: the macro could be badly written and have no closing ')' for
510 * its param, which would make us go too far away, but I don't think
511 * it's important to handle such an error *)
514 let (tokparams
, _
, xs) =
515 xs +> Common.split_when
(function TCPar _
-> true | _
-> false) in
517 xs +> Common.split_when
(function TDefEOL _
-> true | _
-> false) in
519 tokparams
+> Common.map_filter
(function
521 | TIdent
(s, _
) -> Some
(FixedArg
s)
524 | TDefParamVariadic
(s, _
) -> Some
(VariadicArg
s)
526 | TEllipsis _
-> Some
(VariadicArg
"...")
529 (* bugfix: param of macros can be tricky *)
530 let s = TH.str_of_tok x
in
531 if s ==~
Common.regexp_alpha
533 pr2 (spf
"remapping: %s to a macro parameter" s);
538 pr2 (spf
"bad character %s in macro parameter list" s);
541 (* bugfix: also substitute to ident in body so cpp_engine will
544 let body = body +> List.map
(fun tok
->
548 let s = TH.str_of_tok tok
in
549 let ii = TH.info_of_tok tok
in
552 (function FixedArg
s -> s | VariadicArg
s -> s)
554 if s ==~
Common.regexp_alpha
&& List.mem
s params
556 pr2 (spf
"remapping: %s to an ident in macro body" s);
560 List.map
(TH.visitor_info_of_tok
Ast_c.make_expanded
) in
561 Some
(s, (s, Params
params, macro_body_to_maybe_hint body))
562 with Bad_param
-> None
in
564 Some
def -> def::define_parse
xs
565 | None
-> define_parse
xs)
567 | TDefine
i1::TIdentDefine
(s,i2
)::xs ->
569 xs +> Common.split_when
(function TDefEOL _
-> true | _
-> false) in
570 let body = body +> List.map
571 (TH.visitor_info_of_tok
Ast_c.make_expanded
) in
572 let def = (s, (s, NoParam
, macro_body_to_maybe_hint body)) in
575 (* cf tests-bis/define_plus.c *)
577 let line = Ast_c.line_of_info
i1 in
578 pr2 (spf
"WEIRD: no ident in define at line %d" line);
581 | x
::xs -> define_parse
xs
585 let extract_macros xs =
586 let cleaner = xs +> List.filter
(fun x
->
587 not
(TH.is_comment x
)