3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
17 module TH
= Token_helpers
22 (*****************************************************************************)
24 (*****************************************************************************)
26 (* cpp functions working at the token level. Cf cpp_ast_c for cpp functions
27 * working at the AST level (which is very unusual but makes sense in
28 * the coccinelle context for instance).
30 * Note that as I use a single lexer to work both at the C and cpp level
31 * there are some inconveniencies.
32 * For instance 'for' is a valid name for a macro parameter and macro
33 * body, but is interpreted in a special way by our single lexer, and
34 * so at some places where I expect a TIdent I need also to
35 * handle special cases and accept Tfor, Tif, etc at those places.
37 * There are multiple issues related to those keywords incorrect tokens.
38 * Those keywords can be:
39 * - (1) in the name of the macro as in #define inline
40 * - (2) in a parameter of the macro as in #define foo(char) char x;
41 * - (3) in an argument to a macro call as in IDENT(if);
42 * Case 1 is easy to fix in define_ident.
43 * Case 2 is easy to fix in define_parse where detect such toks in
44 * the parameter and then replace their occurence in the body in a Tident.
45 * Case 3 is only an issue when the expanded token is not really use
46 * as usual but use for instance in concatenation as in a ## if
47 * when expanded. In the case the grammar this time will not be happy
48 * so this is also easy to fix in cpp_engine.
52 (*****************************************************************************)
54 (*****************************************************************************)
55 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
57 (*****************************************************************************)
59 (*****************************************************************************)
62 (* ------------------------------------------------------------------------- *)
63 (* mimic standard.h *)
64 (* ------------------------------------------------------------------------- *)
66 type define_def
= string * define_param
* define_body
69 | Params
of string list
71 | DefineBody
of Parser_c.token list
72 | DefineHint
of parsinghack_hint
74 and parsinghack_hint
=
80 | HintMacroIdentBuilder
84 (* cf also data/test.h *)
85 let assoc_hint_string = [
86 "YACFE_ITERATOR" , HintIterator
;
87 "YACFE_DECLARATOR" , HintDeclarator
;
88 "YACFE_STRING" , HintMacroString
;
89 "YACFE_STATEMENT" , HintMacroStatement
;
90 "YACFE_ATTRIBUTE" , HintAttribute
;
91 "YACFE_IDENT_BUILDER" , HintMacroIdentBuilder
;
93 "MACROSTATEMENT" , HintMacroStatement
; (* backward compatibility *)
97 let (parsinghack_hint_of_string
: string -> parsinghack_hint
option) = fun s
->
98 Common.assoc_option s
assoc_hint_string
100 let (is_parsinghack_hint
: string -> bool) = fun s
->
101 parsinghack_hint_of_string s
<> None
103 let (token_from_parsinghack_hint
:
104 (string * Ast_c.info
) -> parsinghack_hint
-> Parser_c.token
) =
108 Parser_c.TMacroIterator
(s
, ii
)
110 Parser_c.TMacroDecl
(s
, ii
)
112 Parser_c.TMacroString
(s
, ii
)
113 | HintMacroStatement
->
114 Parser_c.TMacroStmt
(s
, ii
)
116 Parser_c.TMacroAttr
(s
, ii
)
117 | HintMacroIdentBuilder
->
118 Parser_c.TMacroIdentBuilder
(s
, ii
)
125 (*****************************************************************************)
126 (* Expansion helpers *)
127 (*****************************************************************************)
129 (* In some cases we can have macros like IDENT(if) that expands to some
130 * 'int xxx_if(void)', but as the lexer will currently generate a Tif for
131 * the expanded code, that may not be accepted as a token after a ##
132 * in the grammar. Hence this function to remap some tokens. This is because
133 * we should not use a single lexer for both working at the C level and
136 * update: it can also rename some TypedefIdent into TIdent, possibly
137 * because of bad interaction with add_typedef_root in parsing_hacks.
139 let rec remap_keyword_tokens xs
=
145 | Parser_c.TCppConcatOp _
, Parser_c.TIdent _
->
146 x
::y
::remap_keyword_tokens xs
147 | Parser_c.TIdent _
, Parser_c.TCppConcatOp _
->
148 x
::y
::remap_keyword_tokens xs
150 | Parser_c.TCppConcatOp
(i1
), y
->
152 let s = TH.str_of_tok y
in
153 let ii = TH.info_of_tok y
in
154 if s ==~
Common.regexp_alpha
156 pr2 (spf
"remaping: %s to an ident in expanded code" s);
157 x
::(Parser_c.TIdent
(s, ii))::remap_keyword_tokens xs
160 x
::y
::remap_keyword_tokens xs
162 | x
, Parser_c.TCppConcatOp
(i1
) ->
163 let s = TH.str_of_tok x
in
164 let ii = TH.info_of_tok x
in
165 if s ==~
Common.regexp_alpha
167 pr2 (spf
"remaping: %s to an ident in expanded code" s);
168 (Parser_c.TIdent
(s, ii))::remap_keyword_tokens (y
::xs
)
171 x
::y
::remap_keyword_tokens xs
174 x
::remap_keyword_tokens (y
::xs
)
179 (* To expand the parameter of the macro. The env corresponds to the actual
180 * code that is binded to the parameters of the macro.
181 * Recurse ? fixpoint ? the expansion may also contain macro.
182 * Or to macro expansion in a strict manner, that is process first
183 * the parameters, expands macro in params, and then process enclosing
186 * note: do the concatenation job of a##b here ?
187 * normally this should be done in the grammar. Here just expand
188 * tokens. The only thing we handle here is we may have to remap
191 * todo: handle stringification here ? if #n
193 * todo? but could parsing_hacks then pass over the remapped tokens,
194 * for instance transform some of the back into some TypedefIdent
195 * so cpp_engine may be fooled?
197 let rec (cpp_engine
: (string , Parser_c.token list
) assoc
->
198 Parser_c.token list
-> Parser_c.token list
) =
200 xs
+> List.map
(fun tok
->
201 (* expand only TIdent ? no cos the parameter of the macro
202 * can actually be some 'register' so may have to look for
203 * any tokens candidates for the expansion.
204 * Only subtelity is maybe dont expand the TDefineIdent.
205 * update: in fact now the caller (define_parse) will have done
206 * the job right and already replaced the macro parameter with a TIdent.
209 | TIdent
(s,i1
) when List.mem_assoc
s env
-> Common.assoc
s env
213 +> remap_keyword_tokens
217 (* ------------------------------------------------------------------------- *)
218 (* apply macro, using standard.h or other defs *)
219 (* ------------------------------------------------------------------------- *)
221 (* Thanks to this function many stuff are not anymore hardcoded in ocaml code.
222 * At some point there were hardcoded in a standard.h file but now I
223 * can even generate them on the fly on demand when there is actually
226 * No need to take care to not substitute the macro name itself
227 * that occurs in the macro definition because the macro name is
228 * after fix_token_define a TDefineIdent, no more a TIdent.
231 let rec apply_macro_defs
232 ~msg_apply_known_macro
233 ~msg_apply_known_macro_hint
235 let rec apply_macro_defs xs
=
239 (* old: "but could do more, could reuse same original token
240 * so that have in the Ast a Dbg, not a MACROSTATEMENT"
242 * | PToken ({tok = TIdent (s,i1)} as id)::xs
243 * when s = "MACROSTATEMENT" ->
245 * msg_macro_statement_hint s;
246 * id.tok <- TMacroStmt(TH.info_of_tok id.tok);
247 * find_macro_paren xs
249 * let msg_macro_statement_hint s =
250 * incr Stat.nMacroHint;
255 (* recognized macro of standard.h (or other) *)
256 | PToken
({tok
= TIdent
(s,i1
)} as id
)::Parenthised
(xxs
,info_parens
)::xs
257 when Hashtbl.mem defs
s ->
259 msg_apply_known_macro
s;
260 let (s, params
, body
) = Hashtbl.find defs
s in
264 pr2 ("WEIRD: macro without param used before parenthize: " ^
s);
265 (* ex: PRINTP("NCR53C400 card%s detected\n" ANDP(((struct ... *)
268 | DefineBody bodymacro
->
269 set_as_comment
(Token_c.CppMacro
) id
;
270 id
.new_tokens_before
<- bodymacro
;
272 msg_apply_known_macro_hint
s;
273 id
.tok
<- token_from_parsinghack_hint
(s,i1
) hint
;
277 | DefineBody bodymacro
->
279 (* bugfix: better to put this that before the match body,
280 * cos our macrostatement hint can have variable number of
281 * arguments and so it's ok if it does not match exactly
282 * the number of arguments. *)
283 if List.length params
!= List.length xxs
285 pr2_once
("WEIRD: macro with wrong number of arguments: " ^
s);
286 (* old: id.new_tokens_before <- bodymacro; *)
288 (* update: if wrong number, then I just pass this macro *)
289 [Parenthised
(xxs
, info_parens
)] +>
290 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
291 set_as_comment
Token_c.CppMacro id
;
297 let xxs'
= xxs +> List.map
(fun x
->
298 (tokens_of_paren_ordered x
) +> List.map
(fun x
->
299 TH.visitor_info_of_tok
Ast_c.make_expanded x
.tok
302 id
.new_tokens_before
<-
303 (* !!! cpp expansion job here !!! *)
304 cpp_engine
(Common.zip params
xxs'
) bodymacro
;
306 (* important to do that after have apply the macro, otherwise
307 * will pass as argument to the macro some tokens that
308 * are all TCommentCpp
310 [Parenthised
(xxs, info_parens
)] +>
311 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
312 set_as_comment
Token_c.CppMacro id
;
314 | DefineHint
(HintMacroStatement
as hint
) ->
315 (* important to do that after have apply the macro, otherwise
316 * will pass as argument to the macro some tokens that
317 * are all TCommentCpp
319 * note: such macrostatement can have a variable number of
320 * arguments but here we don't care, we just pass all the
325 | PToken
({tok
= TPtVirg _
} as id2
)::_
->
327 ("macro stmt with trailing ';', passing also ';' for: "^
329 (* sometimes still want pass its params ... as in
330 * DEBUGPOLL(static unsigned int prev_mask = 0);
333 msg_apply_known_macro_hint
s;
334 id
.tok
<- token_from_parsinghack_hint
(s,i1
) hint
;
335 [Parenthised
(xxs, info_parens
)] +>
336 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
337 set_as_comment
Token_c.CppMacro id2
;
340 msg_apply_known_macro_hint
s;
341 id
.tok
<- token_from_parsinghack_hint
(s,i1
) hint
;
342 [Parenthised
(xxs, info_parens
)] +>
343 iter_token_paren
(set_as_comment
Token_c.CppMacro
);
348 msg_apply_known_macro_hint
s;
349 id
.tok
<- token_from_parsinghack_hint
(s,i1
) hint
;
354 | PToken
({tok
= TIdent
(s,i1
)} as id
)::xs
355 when Hashtbl.mem defs
s ->
357 msg_apply_known_macro
s;
358 let (_s
, params
, body
) = Hashtbl.find defs
s in
362 pr2 ("WEIRD: macro with params but no parens found: " ^
s);
363 (* dont apply the macro, perhaps a redefinition *)
367 | DefineBody
[newtok
] ->
368 (* special case when 1-1 substitution, we reuse the token *)
369 id
.tok
<- (newtok
+> TH.visitor_info_of_tok
(fun _
->
370 TH.info_of_tok id
.tok
))
371 | DefineBody bodymacro
->
372 set_as_comment
Token_c.CppMacro id
;
373 id
.new_tokens_before
<- bodymacro
;
375 msg_apply_known_macro_hint
s;
376 id
.tok
<- token_from_parsinghack_hint
(s,i1
) hint
;
385 | (PToken x
)::xs
-> apply_macro_defs xs
386 | (Parenthised
(xxs, info_parens
))::xs
->
387 xxs +> List.iter
apply_macro_defs;
395 (*****************************************************************************)
396 (* The parsing hack for #define *)
397 (*****************************************************************************)
399 (* To parse macro definitions I need to do some tricks
400 * as some information can be get only at the lexing level. For instance
401 * the space after the name of the macro in '#define foo (x)' is meaningful
402 * but the grammar can not get this information. So define_ident below
403 * look at such space and generate a special TOpardefine. In a similar
404 * way macro definitions can contain some antislash and newlines
405 * and the grammar need to know where the macro ends (which is
406 * a line-level and so low token-level information). Hence the
407 * function 'define_line' below and the TDefEol.
409 * update: TDefEol is handled in a special way at different places,
410 * a little bit like EOF, especially for error recovery, so this
411 * is an important token that should not be retagged!
414 * ugly hack, a better solution perhaps would be to erase TDefEOL
415 * from the Ast and list of tokens in parse_c.
417 * note: I do a +1 somewhere, it's for the unparsing to correctly sync.
419 * note: can't replace mark_end_define by simply a fakeInfo(). The reason
420 * is where is the \n TCommentSpace. Normally there is always a last token
421 * to synchronize on, either EOF or the token of the next toplevel.
422 * In the case of the #define we got in list of token
423 * [TCommentSpace "\n"; TDefEOL] but if TDefEOL is a fakeinfo then we will
424 * not synchronize on it and so we will not print the "\n".
425 * A solution would be to put the TDefEOL before the "\n".
427 * todo?: could put a ExpandedTok for that ?
429 let mark_end_define ii =
431 { Ast_c.pinfo
= Ast_c.OriginTok
{ (Ast_c.parse_info_of_info
ii) with
433 Common.charpos
= Ast_c.pos_of_info
ii + 1
435 cocci_tag
= ref Ast_c.emptyAnnot
;
436 comments_tag
= ref Ast_c.emptyComments
;
441 (* put the TDefEOL at the good place *)
442 let rec define_line_1 acc xs
=
446 let line = Ast_c.line_of_info
ii in
447 let acc = (TDefine
ii) :: acc in
448 define_line_2
acc line ii xs
449 | TCppEscapedNewline
ii::xs
->
450 pr2 "WEIRD: a \\ outside a #define";
451 let acc = (TCommentSpace
ii) :: acc in
453 | x
::xs
-> define_line_1 (x
::acc) xs
455 and define_line_2
acc line lastinfo xs
=
458 (* should not happened, should meet EOF before *)
460 List.rev
(mark_end_define lastinfo
::acc)
462 let line'
= TH.line_of_tok x
in
463 let info = TH.info_of_tok x
in
467 let acc = (mark_end_define lastinfo
) :: acc in
468 let acc = (EOF
ii) :: acc in
470 | TCppEscapedNewline
ii ->
471 if (line'
<> line) then pr2 "PB: WEIRD: not same line number";
472 let acc = (TCommentSpace
ii) :: acc in
473 define_line_2
acc (line+1) info xs
476 then define_line_2
(x
::acc) line info xs
477 else define_line_1 (mark_end_define lastinfo
::acc) (x
::xs
)
480 let rec define_ident acc xs
=
484 let acc = TDefine
ii :: acc in
486 | TCommentSpace i1
::TIdent
(s,i2
)::TOPar
(i3
)::xs
->
487 (* Change also the kind of TIdent to avoid bad interaction
488 * with other parsing_hack tricks. For instant if keep TIdent then
489 * the stringication algo can believe the TIdent is a string-macro.
490 * So simpler to change the kind of the ident too.
492 (* if TOParDefine sticked to the ident, then
493 * it's a macro-function. Change token to avoid ambiguity
494 * between #define foo(x) and #define foo (x)
496 let acc = (TCommentSpace i1
) :: acc in
497 let acc = (TIdentDefine
(s,i2
)) :: acc in
498 let acc = (TOParDefine i3
) :: acc in
501 | TCommentSpace i1
::TIdent
(s,i2
)::xs
->
502 let acc = (TCommentSpace i1
) :: acc in
503 let acc = (TIdentDefine
(s,i2
)) :: acc in
506 (* bugfix: ident of macro (as well as params, cf below) can be tricky
507 * note, do we need to subst in the body of the define ? no cos
508 * here the issue is the name of the macro, as in #define inline,
509 * so obviously the name of this macro will not be used in its
510 * body (it would be a recursive macro, which is forbidden).
513 | TCommentSpace i1
::t
::xs
->
515 let s = TH.str_of_tok t
in
516 let ii = TH.info_of_tok t
in
517 if s ==~
Common.regexp_alpha
519 pr2 (spf
"remaping: %s to an ident in macro name" s);
520 let acc = (TCommentSpace i1
) :: acc in
521 let acc = (TIdentDefine
(s,ii)) :: acc in
525 pr2 "WEIRD: weird #define body";
530 pr2 "WEIRD: weird #define body";
534 let acc = x
:: acc in
539 let fix_tokens_define2 xs
=
540 define_ident [] (define_line_1 [] xs
)
542 let fix_tokens_define a
=
543 Common.profile_code
"C parsing.fix_define" (fun () -> fix_tokens_define2 a
)
547 (*****************************************************************************)
548 (* for the cpp-builtin, standard.h, part 0 *)
549 (*****************************************************************************)
551 let macro_body_to_maybe_hint body
=
553 | [] -> DefineBody body
555 (match parsinghack_hint_of_string
s with
556 | Some hint
-> DefineHint hint
557 | None
-> DefineBody body
559 | xs
-> DefineBody body
562 let rec define_parse xs
=
565 | TDefine i1
::TIdentDefine
(s,i2
)::TOParDefine i3
::xs
->
566 (* note: the macro could be badly written and have no closing ')' for
567 * its param, which would make us go too far away, but I don't think
568 * it's important to handle such an error *)
569 let (tokparams
, _
, xs
) =
570 xs
+> Common.split_when
(function TCPar _
-> true | _
-> false) in
572 xs
+> Common.split_when
(function TDefEOL _
-> true | _
-> false) in
574 tokparams
+> Common.map_filter
(function
576 | TIdent
(s, _
) -> Some
s
579 | TDefParamVariadic
(s, _
) -> Some
s
581 | TEllipsis _
-> Some
"..."
584 (* bugfix: param of macros can be tricky *)
585 let s = TH.str_of_tok x
in
586 if s ==~
Common.regexp_alpha
588 pr2 (spf
"remaping: %s to a macro parameter" s);
594 (* bugfix: also substitute to ident in body so cpp_engine will
597 let body = body +> List.map
(fun tok
->
601 let s = TH.str_of_tok tok
in
602 let ii = TH.info_of_tok tok
in
603 if s ==~
Common.regexp_alpha
&& List.mem
s params
605 pr2 (spf
"remaping: %s to an ident in macro body" s);
609 ) +> List.map
(TH.visitor_info_of_tok
Ast_c.make_expanded
) in
610 let def = (s, (s, Params
params, macro_body_to_maybe_hint body)) in
613 | TDefine i1
::TIdentDefine
(s,i2
)::xs
->
615 xs
+> Common.split_when
(function TDefEOL _
-> true | _
-> false) in
616 let body = body +> List.map
617 (TH.visitor_info_of_tok
Ast_c.make_expanded
) in
618 let def = (s, (s, NoParam
, macro_body_to_maybe_hint body)) in
621 (* cf tests-bis/define_plus.c *)
623 let line = Ast_c.line_of_info i1
in
624 pr2 (spf
"WEIRD: no ident in define at line %d" line);
627 | x
::xs
-> define_parse xs
631 let extract_cpp_define xs
=
632 let cleaner = xs
+> List.filter
(fun x
->
633 not
(TH.is_comment x
)