permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / token_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2009 University of Urbana Champaign
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16
17 open Common
18
19 (*****************************************************************************)
20 (* Prelude *)
21 (*****************************************************************************)
22
23 (* This file may seems redundant with the tokens generated by Yacc
24 * from parser.mly in parser_c.mli. The problem is that we need for
25 * many reasons to remember in the ast_c the tokens invoved in this
26 * ast, not just the string, especially for the comment and cpp_passed
27 * tokens which pour le coup were not in the ast at all. So,
28 * to avoid recursive mutual dependencies, we provide this file
29 * so that ast_c does not need to depend on yacc which depends on
30 * ast_c, etc.
31 *
32 * Also, ocamlyacc imposes some stupid constraints on the way we can define
33 * the token type. ocamlyacc forces us to do a token type that
34 * cant be a pair of a sum type, it must be directly a sum type.
35 * We don't have this constraint here.
36 *
37 * Also, some yacc tokens are not used in the grammar because they are filtered
38 * in some intermediate phases. But they still must be declared because
39 * ocamllex may generate them, or some intermediate phase may also
40 * generate them (like some functions in parsing_hacks.ml).
41 * Here we don't have this problem again so we can have a clearer token type.
42 *
43 *
44 *)
45
46 (*****************************************************************************)
47 (* Cpp constructs put in comments in lexer or parsing_hack *)
48 (*****************************************************************************)
49
50 (* history: was in ast_c.ml before:
51 * This type is not in the Ast but is associated with the TCommentCpp
52 * token. I put this enum here because parser_c.mly need it. I could have put
53 * it also in lexer_parser.
54 *
55 * update: now in token_c.ml, and actually right now we want those tokens
56 * to be in the ast so that in the matching/transforming of C code, we
57 * can detect if some metavariables match code which have some
58 * cpp_passed tokens next to them (and so where we should issue a warning).
59 *)
60 type cppcommentkind =
61 | CppDirective
62 | CppIfDirective of ifdef (* ifdef - multipart directive *)
63 | CppAttr
64 | CppMacro
65 | CppPassingNormal (* ifdef 0, cplusplus, etc *)
66 | CppPassingCosWouldGetError (* expr passsing *)
67 | CppPassingExplicit (* skip_start/end tag *)
68
69 (* avoid circularity with Parser_c *)
70 and ifdef = IfDef | IfDef0 | Else | Endif | Other
71
72 (*****************************************************************************)
73 (* Types *)
74 (*****************************************************************************)
75
76 (*
77 * TODO? Do we want to handle also non OriginTok-like tokens here ?
78 * Right now we use this file to be able to later store in the
79 * ast some information about comments and passed cpp tokens, to
80 * improve our matching/transforming and unparsing in coccinelle.
81 * So we should be concerned really only with origin tok, so right
82 * now I use a simple Common.parse_info, not the more complex
83 * Ast_c.parse_info, or even more complex Ast_c.info.
84 * Also right now I defined only the token_tags of comment-like
85 * tokens.
86 *)
87
88 type info = Common.parse_info
89
90 (* I try to be consistent with the names in parser_c.mli *)
91 type token = token_tag * info
92 and token_tag =
93 | TCommentSpace
94 | TCommentNewline
95
96 | TComment
97
98 (* the passed tokens because of our limited handling of cpp *)
99 | TCommentCpp of cppcommentkind
100
101 (*| TUnknown ? *)
102
103
104
105 (* Later if decide to include more kinds of tokens, then may
106 * have to move the current token_tag like TCommentXxx in their
107 * own type and have a generic TCommentLike of comment_like_token
108 * in token_tag. Could also do like in token_helpers have some
109 * is_xxx predicate, but it's not very pretty (but required when
110 * some tokens can belong to multiple categories).
111 *
112 * It's supposed to be all the tokens that are not otherwise represented
113 * in the ast via regular constructors and info.
114 *)
115 type comment_like_token = token
116
117
118
119 (*****************************************************************************)
120 (* Getters *)
121 (*****************************************************************************)
122
123 (* simpler than in token_helpers :) because we don't have the ocamlyacc
124 * constraints on how to define the token type. *)
125 let info_of_token = snd
126
127
128
129 (*****************************************************************************)
130 (*****************************************************************************)
131 (* remaining tokens
132
133 could define a type token_class = Comment | Ident | Operator | ...
134
135 | TInt of (string * Ast_c.info)
136 | TFloat of ((string * Ast_c.floatType) * Ast_c.info)
137 | TChar of ((string * Ast_c.isWchar) * Ast_c.info)
138 | TString of ((string * Ast_c.isWchar) * Ast_c.info)
139
140 | TIdent of (string * Ast_c.info)
141 | TypedefIdent of (string * Ast_c.info)
142
143 | TOPar of (Ast_c.info)
144 | TCPar of (Ast_c.info)
145 | TOBrace of (Ast_c.info)
146 | TCBrace of (Ast_c.info)
147 | TOCro of (Ast_c.info)
148 | TCCro of (Ast_c.info)
149 | TDot of (Ast_c.info)
150 | TComma of (Ast_c.info)
151 | TPtrOp of (Ast_c.info)
152 | TInc of (Ast_c.info)
153 | TDec of (Ast_c.info)
154 | TAssign of (Ast_c.assignOp * Ast_c.info)
155 | TEq of (Ast_c.info)
156 | TWhy of (Ast_c.info)
157 | TTilde of (Ast_c.info)
158 | TBang of (Ast_c.info)
159 | TEllipsis of (Ast_c.info)
160 | TDotDot of (Ast_c.info)
161 | TPtVirg of (Ast_c.info)
162 | TOrLog of (Ast_c.info)
163 | TAndLog of (Ast_c.info)
164 | TOr of (Ast_c.info)
165 | TXor of (Ast_c.info)
166 | TAnd of (Ast_c.info)
167 | TEqEq of (Ast_c.info)
168 | TNotEq of (Ast_c.info)
169 | TInf of (Ast_c.info)
170 | TSup of (Ast_c.info)
171 | TInfEq of (Ast_c.info)
172 | TSupEq of (Ast_c.info)
173 | TShl of (Ast_c.info)
174 | TShr of (Ast_c.info)
175 | TPlus of (Ast_c.info)
176 | TMinus of (Ast_c.info)
177 | TMul of (Ast_c.info)
178 | TDiv of (Ast_c.info)
179 | TMod of (Ast_c.info)
180 | TMin of (Ast_c.info)
181 | TMax of (Ast_c.info)
182 | Tchar of (Ast_c.info)
183 | Tshort of (Ast_c.info)
184 | Tint of (Ast_c.info)
185 | Tdouble of (Ast_c.info)
186 | Tfloat of (Ast_c.info)
187 | Tlong of (Ast_c.info)
188 | Tunsigned of (Ast_c.info)
189 | Tsigned of (Ast_c.info)
190 | Tvoid of (Ast_c.info)
191 | Tauto of (Ast_c.info)
192 | Tregister of (Ast_c.info)
193 | Textern of (Ast_c.info)
194 | Tstatic of (Ast_c.info)
195 | Ttypedef of (Ast_c.info)
196 | Tconst of (Ast_c.info)
197 | Tvolatile of (Ast_c.info)
198 | Tstruct of (Ast_c.info)
199 | Tunion of (Ast_c.info)
200 | Tenum of (Ast_c.info)
201 | Tbreak of (Ast_c.info)
202 | Telse of (Ast_c.info)
203 | Tswitch of (Ast_c.info)
204 | Tcase of (Ast_c.info)
205 | Tcontinue of (Ast_c.info)
206 | Tfor of (Ast_c.info)
207 | Tdo of (Ast_c.info)
208 | Tif of (Ast_c.info)
209 | Twhile of (Ast_c.info)
210 | Treturn of (Ast_c.info)
211 | Tgoto of (Ast_c.info)
212 | Tdefault of (Ast_c.info)
213 | Tsizeof of (Ast_c.info)
214 | Trestrict of (Ast_c.info)
215 | Tasm of (Ast_c.info)
216 | Tattribute of (Ast_c.info)
217 | Tinline of (Ast_c.info)
218 | Ttypeof of (Ast_c.info)
219
220 | TDefine of (Ast_c.info)
221 | TDefParamVariadic of ((string * Ast_c.info))
222
223 | TCppEscapedNewline of (Ast_c.info)
224
225 | TOParDefine of (Ast_c.info)
226 | TOBraceDefineInit of (Ast_c.info)
227 | TIdentDefine of ((string * Ast_c.info))
228 | TDefEOL of (Ast_c.info)
229 | TInclude of ((string * string * bool ref * Ast_c.info))
230 | TIncludeStart of ((Ast_c.info * bool ref))
231 | TIncludeFilename of ((string * Ast_c.info))
232 | TIfdef of (((int * int) option ref * Ast_c.info))
233 | TIfdefelse of (((int * int) option ref * Ast_c.info))
234 | TIfdefelif of (((int * int) option ref * Ast_c.info))
235 | TEndif of (((int * int) option ref * Ast_c.info))
236 | TIfdefBool of ((bool * (int * int) option ref * Ast_c.info))
237 | TIfdefMisc of ((bool * (int * int) option ref * Ast_c.info))
238 | TIfdefVersion of ((bool * (int * int) option ref * Ast_c.info))
239 | TUndef of (string * Ast_c.info)
240 | TCppDirectiveOther of (Ast_c.info)
241
242 | TMacroAttr of ((string * Ast_c.info))
243 | TMacroStmt of ((string * Ast_c.info))
244 | TMacroString of ((string * Ast_c.info))
245 | TMacroDecl of ((string * Ast_c.info))
246 | TMacroDeclConst of (Ast_c.info)
247 | TMacroStructDecl of ((string * Ast_c.info))
248 | TMacroIterator of ((string * Ast_c.info))
249 | TMacroAttrStorage of ((string * Ast_c.info))
250
251 | TCommentSkipTagStart of (Ast_c.info)
252 | TCommentSkipTagEnd of (Ast_c.info)
253
254 | TCParEOL of (Ast_c.info)
255 | TAction of (Ast_c.info)
256
257 | TCommentMisc xxx
258
259 | EOF of (Ast_c.info)
260 *)
261
262
263 (*****************************************************************************)
264 (* Helpers *)
265 (*****************************************************************************)