Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | open Common | |
16 | ||
17 | (*****************************************************************************) | |
18 | (* The AST C related types *) | |
19 | (*****************************************************************************) | |
b1b2de81 C |
20 | (* |
21 | * Some stuff are tagged semantic: which means that they are computed | |
ae4735db C |
22 | * after parsing. |
23 | * | |
24 | * This means that some elements in this AST are present only if | |
b1b2de81 C |
25 | * some annotation/transformation has been done on the original AST returned |
26 | * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc. | |
27 | *) | |
28 | ||
29 | ||
30 | (* ------------------------------------------------------------------------- *) | |
31 | (* Token/info *) | |
32 | (* ------------------------------------------------------------------------- *) | |
34e49164 | 33 | |
ae4735db C |
34 | (* To allow some transformations over the AST, we keep as much information |
35 | * as possible in the AST such as the tokens content and their locations. | |
485bce71 C |
36 | * Those info are called 'info' (how original) and can be tagged. |
37 | * For instance one tag may say that the unparser should remove this token. | |
ae4735db | 38 | * |
485bce71 | 39 | * Update: Now I use a ref! in those 'info' so take care. |
0708f913 | 40 | * That means that modifications of the info of tokens can have |
ae4735db | 41 | * an effect on the info stored in the ast (which is sometimes |
0708f913 | 42 | * convenient, cf unparse_c.ml or comment_annotater_c.ml) |
ae4735db C |
43 | * |
44 | * convention: I often use 'ii' for the name of a list of info. | |
45 | * | |
46 | * Sometimes we want to add someting at the beginning or at the end | |
485bce71 C |
47 | * of a construct. For 'function' and 'decl' we want to add something |
48 | * to their left and for 'if' 'while' et 'for' and so on at their right. | |
49 | * We want some kinds of "virtual placeholders" that represent the start or | |
50 | * end of a construct. We use fakeInfo for that purpose. | |
51 | * To identify those cases I have added a fakestart/fakeend comment. | |
ae4735db | 52 | * |
485bce71 | 53 | * cocci: Each token will be decorated in the future by the mcodekind |
34e49164 C |
54 | * of cocci. It is the job of the pretty printer to look at this |
55 | * information and decide to print or not the token (and also the | |
56 | * pending '+' associated sometimes with the token). | |
ae4735db | 57 | * |
34e49164 C |
58 | * The first time that we parse the original C file, the mcodekind is |
59 | * empty, or more precisely all is tagged as a CONTEXT with NOTHING | |
60 | * associated. This is what I call a "clean" expr/statement/.... | |
ae4735db | 61 | * |
34e49164 C |
62 | * Each token will also be decorated in the future with an environment, |
63 | * because the pending '+' may contain metavariables that refer to some | |
64 | * C code. | |
ae4735db | 65 | * |
34e49164 C |
66 | *) |
67 | ||
3a314143 | 68 | (* for unparser: *) |
34e49164 | 69 | |
91eba41f | 70 | type posl = int * int (* line-col, for MetaPosValList, for position variables *) |
708f4980 | 71 | (* with sexp *) |
485bce71 C |
72 | |
73 | (* the virtual position is set in Parsing_hacks.insert_virtual_positions *) | |
34e49164 | 74 | type virtual_position = Common.parse_info * int (* character offset *) |
708f4980 | 75 | (* with sexp *) |
485bce71 | 76 | |
ae4735db | 77 | type parse_info = |
34e49164 C |
78 | (* Present both in ast and list of tokens *) |
79 | | OriginTok of Common.parse_info | |
80 | (* Present only in ast and generated after parsing. Used mainly | |
81 | * by Julia, to add stuff at virtual places, beginning of func or decl *) | |
82 | | FakeTok of string * virtual_position | |
83 | (* Present both in ast and list of tokens. *) | |
84 | | ExpandedTok of Common.parse_info * virtual_position | |
0708f913 | 85 | |
34e49164 C |
86 | (* Present neither in ast nor in list of tokens |
87 | * but only in the '+' of the mcode of some tokens. Those kind of tokens | |
88 | * are used to be able to use '=' to compare big ast portions. | |
89 | *) | |
90 | | AbstractLineTok of Common.parse_info (* local to the abstracted thing *) | |
708f4980 | 91 | (* with sexp *) |
34e49164 | 92 | |
ae4735db | 93 | type info = { |
34e49164 | 94 | pinfo : parse_info; |
b1b2de81 C |
95 | |
96 | (* this cocci_tag can be changed, which is how we can express some program | |
ae4735db | 97 | * transformations by tagging the tokens involved in this transformation. |
485bce71 | 98 | *) |
951c7801 | 99 | cocci_tag: (Ast_cocci.mcodekind * metavars_binding list) option ref; |
0708f913 | 100 | (* set in comment_annotater_c.ml *) |
485bce71 | 101 | comments_tag: comments_around ref; |
b1b2de81 | 102 | |
abad11c5 C |
103 | (* annotations on the token (mutable) *) |
104 | mutable annots_tag: Token_annot.annots | |
105 | ||
34e49164 C |
106 | (* todo? token_info : sometimes useful to know what token it was *) |
107 | } | |
108 | and il = info list | |
109 | ||
110 | (* wrap2 is like wrap, except that I use it often for separator such | |
111 | * as ','. In that case the info is associated to the argument that | |
ae4735db C |
112 | * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. |
113 | * | |
114 | * wrap3 is like wrap, except that I use it in case sometimes it | |
708f4980 C |
115 | * will be empty because the info will be included in a nested |
116 | * entity (e.g. for Ident in expr because it's inlined in the name) | |
117 | * so user should never assume List.length wrap3 > 0. | |
118 | *) | |
34e49164 C |
119 | and 'a wrap = 'a * il |
120 | and 'a wrap2 = 'a * il | |
708f4980 | 121 | and 'a wrap3 = 'a * il (* * evotype*) |
34e49164 | 122 | |
b1b2de81 C |
123 | (* ------------------------------------------------------------------------- *) |
124 | (* Name *) | |
125 | (* ------------------------------------------------------------------------- *) | |
126 | ||
127 | (* was called 'ident' before, but 'name' is I think better | |
951c7801 | 128 | * as concatenated strings can be used not only for identifiers and for |
b1b2de81 | 129 | * declarators, but also for fields, for labels, etc. |
951c7801 | 130 | * |
708f4980 C |
131 | * Note: because now the info is embeded in the name, the info for |
132 | * expression like Ident, or types like Typename, are not anymore | |
133 | * stored in the expression or type. Hence if you assume this, | |
134 | * which was true before, you are now wrong. So never write code like | |
135 | * let (unwrape,_), ii = e and use 'ii' believing it contains | |
136 | * the local ii to e. If you want to do that, use the appropiate | |
137 | * wrapper get_local_ii_of_expr_inlining_ii_of_name. | |
b1b2de81 | 138 | *) |
951c7801 | 139 | and name = |
b1b2de81 C |
140 | | RegularName of string wrap |
141 | | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list | |
142 | (* normally only used inside list of things, as in parameters or arguments | |
143 | * in which case, cf cpp-manual, it has a special meaning *) | |
144 | | CppVariadicName of string wrap (* ## s *) | |
951c7801 | 145 | | CppIdentBuilder of string wrap (* s ( ) *) * |
b1b2de81 C |
146 | ((string wrap) wrap2 list) (* arguments *) |
147 | ||
148 | ||
34e49164 C |
149 | (* ------------------------------------------------------------------------- *) |
150 | (* C Type *) | |
151 | (* ------------------------------------------------------------------------- *) | |
152 | (* Could have more precise type in fullType, in expression, etc, but | |
153 | * it requires to do too much things in parsing such as checking no | |
154 | * conflicting structname, computing value, etc. Better to separate | |
91eba41f | 155 | * concern. So I put '=>' to mean what we would really like. In fact |
34e49164 C |
156 | * what we really like is defining another fullType, expression, etc |
157 | * from scratch, because many stuff are just sugar. | |
ae4735db | 158 | * |
34e49164 C |
159 | * invariant: Array and FunctionType have also typeQualifier but they |
160 | * dont have sense. I put this to factorise some code. If you look in | |
91eba41f | 161 | * the grammar, you see that we can never specify const for the array |
ae4735db | 162 | * himself (but we can do it for pointer) or function, we always |
91eba41f | 163 | * have in the action rule of the grammar a { (nQ, FunctionType ...) }. |
ae4735db C |
164 | * |
165 | * | |
34e49164 C |
166 | * Because of ExprStatement, we can have more 'new scope' events, but |
167 | * rare I think. For instance with 'array of constExpression' there can | |
168 | * have an exprStatement and a new (local) struct defined. Same for | |
169 | * Constructor. | |
ae4735db | 170 | * |
b1b2de81 | 171 | *) |
34e49164 C |
172 | |
173 | ||
174 | and fullType = typeQualifier * typeC | |
708f4980 | 175 | and typeC = typeCbis wrap (* todo reput wrap3 *) |
34e49164 | 176 | |
b1b2de81 | 177 | and typeCbis = |
f59c9fb7 | 178 | NoType (* for c++ only *) |
34e49164 C |
179 | | BaseType of baseType |
180 | ||
181 | | Pointer of fullType | |
182 | | Array of constExpression option * fullType | |
183 | | FunctionType of functionType | |
184 | ||
ae4735db | 185 | | Enum of string option * enumType |
34e49164 C |
186 | | StructUnion of structUnion * string option * structType (* new scope *) |
187 | ||
188 | | EnumName of string | |
ae4735db | 189 | | StructUnionName of structUnion * string |
34e49164 | 190 | |
b1b2de81 | 191 | | TypeName of name * fullType option (* semantic: filled later *) |
ae4735db | 192 | |
3a314143 | 193 | | ParenType of fullType (* for unparser: *) |
34e49164 | 194 | |
ae4735db C |
195 | (* gccext: TypeOfType below may seems useless; Why declare a |
196 | * __typeof__(int) x; ? | |
b1b2de81 | 197 | * When used with macros, it allows to fix a problem of C which |
34e49164 | 198 | * is that type declaration can be spread around the ident. Indeed it |
ae4735db C |
199 | * may be difficult to have a macro such as |
200 | * '#define macro(type, ident) type ident;' | |
201 | * because when you want to do a | |
202 | * macro(char[256], x), | |
203 | * then it will generate invalid code, but with a | |
204 | * '#define macro(type, ident) __typeof(type) ident;' | |
205 | * it will work. | |
b1b2de81 | 206 | *) |
ae4735db C |
207 | | TypeOfExpr of expression |
208 | | TypeOfType of fullType | |
485bce71 C |
209 | |
210 | (* cppext: IfdefType TODO *) | |
ae4735db C |
211 | |
212 | (* -------------------------------------- *) | |
213 | and baseType = Void | |
214 | | IntType of intType | |
34e49164 | 215 | | FloatType of floatType |
1eddfd50 C |
216 | | SizeType |
217 | | SSizeType | |
218 | | PtrDiffType | |
34e49164 | 219 | |
ae4735db | 220 | (* stdC: type section |
34e49164 | 221 | * add a | SizeT ? |
ae4735db | 222 | * note: char and signed char are semantically different!! |
34e49164 C |
223 | *) |
224 | and intType = CChar (* obsolete? | CWchar *) | |
225 | | Si of signed | |
226 | ||
227 | and signed = sign * base | |
228 | and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *) | |
229 | and sign = Signed | UnSigned | |
230 | ||
231 | and floatType = CFloat | CDouble | CLongDouble | |
232 | ||
233 | ||
ae4735db | 234 | (* -------------------------------------- *) |
34e49164 | 235 | and structUnion = Struct | Union |
ae4735db C |
236 | and structType = field list |
237 | and field = | |
485bce71 | 238 | | DeclarationField of field_declaration |
b1b2de81 | 239 | (* gccext: *) |
708f4980 | 240 | | EmptyField of info |
b1b2de81 | 241 | |
485bce71 | 242 | (* cppext: *) |
ae4735db | 243 | | MacroDeclField of (string * argument wrap2 list) |
708f4980 | 244 | wrap (* optional ';'*) |
485bce71 C |
245 | |
246 | (* cppext: *) | |
247 | | CppDirectiveStruct of cpp_directive | |
248 | | IfdefStruct of ifdef_directive (* * field list list *) | |
249 | ||
34e49164 C |
250 | |
251 | (* before unparser, I didn't have a FieldDeclList but just a Field. *) | |
ae4735db | 252 | and field_declaration = |
485bce71 | 253 | | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *) |
34e49164 C |
254 | |
255 | (* At first I thought that a bitfield could be only Signed/Unsigned. | |
256 | * But it seems that gcc allow char i:4. C rule must say that you | |
ae4735db | 257 | * can cast into int so enum too, ... |
34e49164 | 258 | *) |
ae4735db | 259 | and fieldkind = |
b1b2de81 | 260 | | Simple of name option * fullType |
ae4735db | 261 | | BitField of name option * fullType * |
b1b2de81 | 262 | info (* : *) * constExpression |
ae4735db | 263 | (* fullType => BitFieldInt | BitFieldUnsigned *) |
34e49164 C |
264 | |
265 | ||
ae4735db | 266 | (* -------------------------------------- *) |
c491d8ee | 267 | and enumType = oneEnumType wrap2 (* , *) list |
34e49164 C |
268 | (* => string * int list *) |
269 | ||
c491d8ee | 270 | and oneEnumType = name * (info (* = *) * constExpression) option |
34e49164 | 271 | |
ae4735db | 272 | (* -------------------------------------- *) |
34e49164 C |
273 | (* return * (params * has "...") *) |
274 | and functionType = fullType * (parameterType wrap2 list * bool wrap) | |
ae4735db | 275 | and parameterType = |
b1b2de81 C |
276 | { p_namei: name option; |
277 | p_register: bool wrap; | |
278 | p_type: fullType; | |
279 | } | |
280 | (* => (bool (register) * fullType) list * bool *) | |
34e49164 C |
281 | |
282 | ||
ae4735db | 283 | and typeQualifier = typeQualifierbis wrap |
34e49164 C |
284 | and typeQualifierbis = {const: bool; volatile: bool} |
285 | ||
485bce71 C |
286 | (* gccext: cppext: *) |
287 | and attribute = attributebis wrap | |
288 | and attributebis = | |
951c7801 | 289 | | Attribute of string |
34e49164 C |
290 | |
291 | (* ------------------------------------------------------------------------- *) | |
292 | (* C expression *) | |
293 | (* ------------------------------------------------------------------------- *) | |
708f4980 | 294 | and expression = (expressionbis * exp_info ref (* semantic: *)) wrap3 |
485bce71 | 295 | and exp_info = exp_type option * test |
0708f913 | 296 | and exp_type = fullType (* Type_c.completed_and_simplified *) * local |
8babbc8f C |
297 | and local = LocalVar of parse_info | StaticLocalVar of parse_info |
298 | | NotLocalVar (* cocci: *) | |
485bce71 C |
299 | and test = Test | NotTest (* cocci: *) |
300 | ||
951c7801 | 301 | and expressionbis = |
34e49164 C |
302 | |
303 | (* Ident can be a enumeration constant, a simple variable, a name of a func. | |
304 | * With cppext, Ident can also be the name of a macro. Sparse says | |
b1b2de81 C |
305 | * "an identifier with a meaning is a symbol" *) |
306 | | Ident of name (* todo? more semantic info such as LocalFunc *) | |
307 | ||
951c7801 | 308 | | Constant of constant |
34e49164 | 309 | | FunCall of expression * argument wrap2 (* , *) list |
b1b2de81 | 310 | (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *) |
34e49164 C |
311 | | CondExpr of expression * expression option * expression |
312 | ||
313 | (* should be considered as statements, bad C langage *) | |
faf9a90c C |
314 | | Sequence of expression * expression |
315 | | Assignment of expression * assignOp * expression | |
34e49164 | 316 | |
91eba41f C |
317 | |
318 | | Postfix of expression * fixOp | |
319 | | Infix of expression * fixOp | |
320 | ||
951c7801 C |
321 | | Unary of expression * unaryOp |
322 | | Binary of expression * binaryOp * expression | |
34e49164 | 323 | |
91eba41f C |
324 | | ArrayAccess of expression * expression |
325 | ||
326 | (* field ident access *) | |
b1b2de81 C |
327 | | RecordAccess of expression * name |
328 | | RecordPtAccess of expression * name | |
34e49164 C |
329 | (* redundant normally, could replace it by DeRef RecordAcces *) |
330 | ||
ae4735db C |
331 | | SizeOfExpr of expression |
332 | | SizeOfType of fullType | |
333 | | Cast of fullType * expression | |
34e49164 | 334 | |
ae4735db C |
335 | (* gccext: *) |
336 | | StatementExpr of compound wrap (* ( ) new scope *) | |
7fe62b65 | 337 | | Constructor of fullType * initialiser |
34e49164 | 338 | |
3a314143 | 339 | (* for unparser: *) |
ae4735db | 340 | | ParenExpr of expression |
34e49164 | 341 | |
f59c9fb7 C |
342 | (* for C++: *) |
343 | | New of argument | |
4dfbc1c2 | 344 | | Delete of expression |
f59c9fb7 | 345 | |
485bce71 C |
346 | (* cppext: IfdefExpr TODO *) |
347 | ||
4dfbc1c2 | 348 | (* cppext: normally just expression *) |
708f4980 | 349 | and argument = (expression, weird_argument) Common.either |
ae4735db | 350 | and weird_argument = |
34e49164 C |
351 | | ArgType of parameterType |
352 | | ArgAction of action_macro | |
ae4735db | 353 | and action_macro = |
485bce71 | 354 | (* todo: ArgStatement of statement, possibly have ghost token *) |
ae4735db | 355 | | ActMisc of il |
34e49164 C |
356 | |
357 | ||
358 | (* I put string for Int and Float because int would not be enough because | |
359 | * OCaml int are 31 bits. So simpler to do string. Same reason to have | |
360 | * string instead of int list for the String case. | |
ae4735db | 361 | * |
b1b2de81 | 362 | * note: -2 is not a constant, it is the unary operator '-' |
34e49164 C |
363 | * applied to constant 2. So the string must represent a positive |
364 | * integer only. *) | |
365 | ||
ae4735db | 366 | and constant = |
0708f913 C |
367 | | String of (string * isWchar) |
368 | | MultiString of string list (* can contain MacroString, todo: more info *) | |
34e49164 | 369 | | Char of (string * isWchar) (* normally it is equivalent to Int *) |
708f4980 | 370 | | Int of (string * intType) |
34e49164 C |
371 | | Float of (string * floatType) |
372 | ||
373 | and isWchar = IsWchar | IsChar | |
374 | ||
ae4735db C |
375 | |
376 | and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not | |
485bce71 | 377 | | GetRefLabel (* gccext: GetRefLabel, via &&label notation *) |
34e49164 C |
378 | and assignOp = SimpleAssign | OpAssign of arithOp |
379 | and fixOp = Dec | Inc | |
380 | ||
381 | and binaryOp = Arith of arithOp | Logical of logicalOp | |
382 | ||
ae4735db | 383 | and arithOp = |
34e49164 | 384 | | Plus | Minus | Mul | Div | Mod |
ae4735db | 385 | | DecLeft | DecRight |
34e49164 C |
386 | | And | Or | Xor |
387 | ||
ae4735db C |
388 | and logicalOp = |
389 | | Inf | Sup | InfEq | SupEq | |
390 | | Eq | NotEq | |
34e49164 C |
391 | | AndLog | OrLog |
392 | ||
393 | and constExpression = expression (* => int *) | |
394 | ||
34e49164 C |
395 | (* ------------------------------------------------------------------------- *) |
396 | (* C statement *) | |
397 | (* ------------------------------------------------------------------------- *) | |
398 | (* note: that assignement is not a statement but an expression; | |
399 | * wonderful C langage. | |
ae4735db | 400 | * |
34e49164 | 401 | * note: I use 'and' for type definition cos gccext allow statement as |
ae4735db C |
402 | * expression, so need mutual recursive type definition. |
403 | * | |
b1b2de81 | 404 | *) |
34e49164 | 405 | |
708f4980 | 406 | and statement = statementbis wrap3 |
ae4735db | 407 | and statementbis = |
34e49164 C |
408 | | Labeled of labeled |
409 | | Compound of compound (* new scope *) | |
410 | | ExprStatement of exprStatement | |
411 | | Selection of selection (* have fakeend *) | |
412 | | Iteration of iteration (* have fakeend *) | |
413 | | Jump of jump | |
414 | ||
415 | (* simplify cocci: only at the beginning of a compound normally *) | |
ae4735db | 416 | | Decl of declaration |
34e49164 C |
417 | |
418 | (* gccext: *) | |
419 | | Asm of asmbody | |
420 | | NestedFunc of definition | |
421 | ||
422 | (* cppext: *) | |
423 | | MacroStmt | |
ae4735db | 424 | |
34e49164 C |
425 | |
426 | ||
b1b2de81 | 427 | and labeled = Label of name * statement |
ae4735db | 428 | | Case of expression * statement |
34e49164 C |
429 | | CaseRange of expression * expression * statement (* gccext: *) |
430 | | Default of statement | |
431 | ||
ae4735db C |
432 | (* cppext: |
433 | * old: compound = (declaration list * statement list) | |
434 | * old: (declaration, statement) either list | |
34e49164 | 435 | * Simplify cocci to just have statement list, by integrating Decl in stmt. |
ae4735db | 436 | * |
485bce71 | 437 | * update: now introduce also the _sequencable to allow ifdef in the middle. |
b1b2de81 C |
438 | * Indeed, I now allow ifdefs in the ast but they must be only between |
439 | * "sequencable" elements. They can be put in a type only if this type | |
ae4735db C |
440 | * is used in a list, like at the toplevel, used in a 'toplevel list', |
441 | * or inside a compound, used in a 'statement list'. I must not allow | |
442 | * ifdef anywhere. For instance I can not make ifdef a statement | |
b1b2de81 C |
443 | * cos some instruction like If accept only one statement and the |
444 | * ifdef directive must not take the place of a legitimate instruction. | |
445 | * We had a similar phenomena in SmPL where we have the notion | |
ae4735db | 446 | * of statement and sequencable statement too. Once you have |
b1b2de81 C |
447 | * such a type of sequencable thing, then s/xx list/xx_sequencable list/ |
448 | * and introduce the ifdef. | |
ae4735db | 449 | * |
b1b2de81 C |
450 | * update: those ifdefs are either passed, or present in the AST but in |
451 | * a flat form. To structure those flat ifdefs you have to run | |
452 | * a transformation that will put in a tree the statements inside | |
453 | * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference | |
454 | * between a IfdefStmt (flat) and IfdefStmt2 (tree structured). | |
ae4735db | 455 | * |
34e49164 | 456 | *) |
ae4735db | 457 | and compound = statement_sequencable list |
485bce71 C |
458 | |
459 | (* cppext: easier to put at statement_list level than statement level *) | |
ae4735db | 460 | and statement_sequencable = |
485bce71 | 461 | | StmtElem of statement |
b1b2de81 | 462 | |
ae4735db | 463 | (* cppext: *) |
485bce71 | 464 | | CppDirectiveStmt of cpp_directive |
ae4735db | 465 | | IfdefStmt of ifdef_directive |
485bce71 C |
466 | |
467 | (* this will be build in cpp_ast_c from the previous flat IfdefStmt *) | |
468 | | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list | |
34e49164 C |
469 | |
470 | and exprStatement = expression option | |
471 | ||
755320b0 C |
472 | and declOrExpr = ForDecl of declaration | ForExp of expression option wrap |
473 | ||
ae4735db | 474 | (* for Switch, need check that all elements in the compound start |
34e49164 C |
475 | * with a case:, otherwise unreachable code. |
476 | *) | |
ae4735db | 477 | and selection = |
34e49164 | 478 | | If of expression * statement * statement |
ae4735db | 479 | | Switch of expression * statement |
485bce71 | 480 | |
34e49164 | 481 | |
ae4735db | 482 | and iteration = |
34e49164 C |
483 | | While of expression * statement |
484 | | DoWhile of statement * expression | |
755320b0 | 485 | | For of declOrExpr * exprStatement wrap * exprStatement wrap * |
34e49164 | 486 | statement |
485bce71 | 487 | (* cppext: *) |
34e49164 C |
488 | | MacroIteration of string * argument wrap2 list * statement |
489 | ||
b1b2de81 | 490 | and jump = Goto of name |
ae4735db | 491 | | Continue | Break |
34e49164 C |
492 | | Return | ReturnExpr of expression |
493 | | GotoComputed of expression (* gccext: goto *exp ';' *) | |
494 | ||
495 | ||
496 | (* gccext: *) | |
497 | and asmbody = il (* string list *) * colon wrap (* : *) list | |
498 | and colon = Colon of colon_option wrap2 list | |
499 | and colon_option = colon_option_bis wrap | |
500 | and colon_option_bis = ColonMisc | ColonExpr of expression | |
501 | ||
502 | ||
503 | (* ------------------------------------------------------------------------- *) | |
504 | (* Declaration *) | |
505 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 506 | (* (string * ...) option cos can have empty declaration or struct tag |
34e49164 | 507 | * declaration. |
ae4735db C |
508 | * |
509 | * Before I had a Typedef constructor, but why make this special case and not | |
510 | * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2 | |
485bce71 | 511 | * declarations ? So I try to generalise and not have Typedef either. This |
34e49164 | 512 | * requires more work in parsing. Better to separate concern. |
ae4735db | 513 | * |
34e49164 C |
514 | * Before the need for unparser, I didn't have a DeclList but just a Decl. |
515 | * | |
516 | * I am not sure what it means to declare a prototype inline, but gcc | |
ae4735db | 517 | * accepts it. |
34e49164 C |
518 | *) |
519 | ||
ae4735db | 520 | and declaration = |
34e49164 C |
521 | | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *) |
522 | (* cppext: *) | |
5427db06 C |
523 | (* bool is true if there is a ; at the end *) |
524 | | MacroDecl of (string * argument wrap2 list * bool) wrap (* fakestart *) | |
17ba0788 C |
525 | | MacroDeclInit of |
526 | (string * argument wrap2 list * initialiser) wrap (* fakestart *) | |
34e49164 | 527 | |
ae4735db | 528 | and onedecl = |
4dfbc1c2 | 529 | { v_namei: (name * v_init) option; |
485bce71 | 530 | v_type: fullType; |
ae4735db | 531 | (* semantic: set in type annotated and used in cocci_vs_c |
978fd7e5 C |
532 | * when we transform some initialisation into affectation |
533 | *) | |
534 | v_type_bis: fullType (* Type_c.completed_and_simplified *) option ref; | |
485bce71 C |
535 | v_storage: storage; |
536 | v_local: local_decl; (* cocci: *) | |
537 | v_attr: attribute list; (* gccext: *) | |
538 | } | |
4dfbc1c2 C |
539 | and v_init = |
540 | NoInit | ValInit of info * initialiser | |
541 | | ConstrInit of argument wrap2 (* , *) list wrap | |
485bce71 | 542 | and storage = storagebis * bool (* gccext: inline or not *) |
34e49164 C |
543 | and storagebis = NoSto | StoTypedef | Sto of storageClass |
544 | and storageClass = Auto | Static | Register | Extern | |
545 | ||
b1b2de81 C |
546 | and local_decl = LocalDecl | NotLocalDecl |
547 | ||
978fd7e5 C |
548 | (* fullType is the type used if the type should be converted to |
549 | an assignment. It can be adjusted in the type annotation | |
550 | phase when typedef information is availalble *) | |
34e49164 | 551 | and initialiser = initialiserbis wrap |
ae4735db C |
552 | and initialiserbis = |
553 | | InitExpr of expression | |
554 | | InitList of initialiser wrap2 (* , *) list | |
34e49164 C |
555 | (* gccext: *) |
556 | | InitDesignators of designator list * initialiser | |
557 | | InitFieldOld of string * initialiser | |
558 | | InitIndexOld of expression * initialiser | |
559 | ||
560 | (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *) | |
ae4735db C |
561 | and designator = designatorbis wrap |
562 | and designatorbis = | |
563 | | DesignatorField of string | |
34e49164 C |
564 | | DesignatorIndex of expression |
565 | | DesignatorRange of expression * expression | |
ae4735db | 566 | |
34e49164 C |
567 | (* ------------------------------------------------------------------------- *) |
568 | (* Function definition *) | |
569 | (* ------------------------------------------------------------------------- *) | |
ae4735db C |
570 | (* Normally we should define another type functionType2 because there |
571 | * are more restrictions on what can define a function than a pointer | |
34e49164 | 572 | * function. For instance a function declaration can omit the name of the |
b1b2de81 | 573 | * parameter whereas a function definition can not. But, in some cases such |
ae4735db | 574 | * as 'f(void) {', there is no name too, so I simplified and reused the |
34e49164 | 575 | * same functionType type for both declaration and function definition. |
ae4735db | 576 | * |
b1b2de81 C |
577 | * Also old style C does not have type in the parameter, so again simpler |
578 | * to abuse the functionType and allow missing type. | |
34e49164 | 579 | *) |
b1b2de81 | 580 | and definition = definitionbis wrap (* ( ) { } fakestart sto *) |
ae4735db | 581 | and definitionbis = |
b1b2de81 | 582 | { f_name: name; |
708f4980 | 583 | f_type: functionType; (* less? a functionType2 ? *) |
485bce71 C |
584 | f_storage: storage; |
585 | f_body: compound; | |
586 | f_attr: attribute list; (* gccext: *) | |
91eba41f | 587 | f_old_c_style: declaration list option; |
485bce71 C |
588 | } |
589 | (* cppext: IfdefFunHeader TODO *) | |
34e49164 C |
590 | |
591 | (* ------------------------------------------------------------------------- *) | |
485bce71 | 592 | (* cppext: cpp directives, #ifdef, #define and #include body *) |
34e49164 | 593 | (* ------------------------------------------------------------------------- *) |
485bce71 | 594 | and cpp_directive = |
ae4735db C |
595 | | Define of define |
596 | | Include of includ | |
ae4735db | 597 | | PragmaAndCo of il |
b1b2de81 | 598 | (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *) |
485bce71 | 599 | |
708f4980 | 600 | and define = string wrap (* #define s eol *) * (define_kind * define_val) |
34e49164 C |
601 | and define_kind = |
602 | | DefineVar | |
485bce71 | 603 | | DefineFunc of ((string wrap) wrap2 list) wrap (* () *) |
3a314143 | 604 | | Undef |
ae4735db | 605 | and define_val = |
b1b2de81 | 606 | (* most common case; e.g. to define int constant *) |
ae4735db | 607 | | DefineExpr of expression |
91eba41f | 608 | |
34e49164 C |
609 | | DefineStmt of statement |
610 | | DefineType of fullType | |
485bce71 | 611 | | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *) |
91eba41f | 612 | |
34e49164 | 613 | | DefineFunction of definition |
485bce71 | 614 | | DefineInit of initialiser (* in practice only { } with possible ',' *) |
b1b2de81 | 615 | |
abad11c5 | 616 | | DefineMulti of statement list |
485bce71 | 617 | |
34e49164 C |
618 | | DefineText of string wrap |
619 | | DefineEmpty | |
620 | ||
485bce71 | 621 | | DefineTodo |
34e49164 C |
622 | |
623 | ||
485bce71 | 624 | |
ae4735db | 625 | and includ = |
485bce71 C |
626 | { i_include: inc_file wrap; (* #include s *) |
627 | (* cocci: computed in ? *) | |
628 | i_rel_pos: include_rel_pos option ref; | |
629 | (* cocci: cf -test incl *) | |
ae4735db | 630 | i_is_in_ifdef: bool; |
485bce71 C |
631 | (* cf cpp_ast_c.ml. set to None at parsing time. *) |
632 | i_content: (Common.filename (* full path *) * program) option; | |
633 | } | |
ae4735db | 634 | and inc_file = |
34e49164 C |
635 | | Local of inc_elem list |
636 | | NonLocal of inc_elem list | |
0708f913 | 637 | | Weird of string (* ex: #include SYSTEM_H *) |
34e49164 C |
638 | and inc_elem = string |
639 | ||
485bce71 | 640 | (* cocci: to tag the first of #include <xx/> and last of #include <yy/> |
ae4735db | 641 | * |
485bce71 C |
642 | * The first_of and last_of store the list of prefixes that was |
643 | * introduced by the include. On #include <a/b/x>, if the include was | |
644 | * the first in the file, it would give in first_of the following | |
ae4735db C |
645 | * prefixes a/b/c; a/b/; a/ ; <empty> |
646 | * | |
485bce71 C |
647 | * This is set after parsing, in cocci.ml, in update_rel_pos. |
648 | *) | |
ae4735db | 649 | and include_rel_pos = { |
485bce71 C |
650 | first_of : string list list; |
651 | last_of : string list list; | |
34e49164 C |
652 | } |
653 | ||
485bce71 C |
654 | |
655 | ||
b1b2de81 C |
656 | (* todo? to specialize if someone need more info *) |
657 | and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *) | |
658 | | IfdefDirective of (ifdefkind * matching_tag) wrap | |
ae4735db | 659 | and ifdefkind = |
b1b2de81 C |
660 | | Ifdef (* todo? of string ? of formula_cpp ? *) |
661 | | IfdefElseif (* same *) | |
662 | | IfdefElse (* same *) | |
ae4735db C |
663 | | IfdefEndif |
664 | (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use | |
b1b2de81 | 665 | * a global so it means if you parse the same file twice you may get |
ae4735db | 666 | * different id. I try now to avoid this pb by resetting it each |
b1b2de81 C |
667 | * time I parse a file. |
668 | *) | |
ae4735db | 669 | and matching_tag = |
b1b2de81 C |
670 | IfdefTag of (int (* tag *) * int (* total with this tag *)) |
671 | ||
672 | ||
485bce71 C |
673 | |
674 | ||
675 | ||
34e49164 C |
676 | (* ------------------------------------------------------------------------- *) |
677 | (* The toplevels elements *) | |
678 | (* ------------------------------------------------------------------------- *) | |
679 | and toplevel = | |
680 | | Declaration of declaration | |
681 | | Definition of definition | |
ae4735db | 682 | |
34e49164 | 683 | (* cppext: *) |
485bce71 C |
684 | | CppTop of cpp_directive |
685 | | IfdefTop of ifdef_directive (* * toplevel list *) | |
686 | ||
34e49164 | 687 | (* cppext: *) |
ae4735db C |
688 | | MacroTop of string * argument wrap2 list * il |
689 | ||
34e49164 C |
690 | | EmptyDef of il (* gccext: allow redundant ';' *) |
691 | | NotParsedCorrectly of il | |
692 | ||
34e49164 C |
693 | | FinalDef of info (* EOF *) |
694 | ||
695 | (* ------------------------------------------------------------------------- *) | |
696 | and program = toplevel list | |
697 | ||
34e49164 C |
698 | (*****************************************************************************) |
699 | (* Cocci Bindings *) | |
700 | (*****************************************************************************) | |
ae4735db C |
701 | (* Was previously in pattern.ml, but because of the transformer, |
702 | * we need to decorate each token with some cocci code AND the environment | |
34e49164 C |
703 | * for this cocci code. |
704 | *) | |
705 | and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc | |
ae4735db | 706 | and metavar_binding_kind = |
5636bb2c C |
707 | | MetaIdVal of string * |
708 | Ast_cocci.meta_name list (* negative constraints *) | |
34e49164 C |
709 | | MetaFuncVal of string |
710 | | MetaLocalFuncVal of string | |
711 | ||
5636bb2c C |
712 | | MetaExprVal of expression (* a "clean expr" *) * |
713 | (*subterm constraints, currently exprs*) | |
714 | Ast_cocci.meta_name list | |
34e49164 C |
715 | | MetaExprListVal of argument wrap2 list |
716 | | MetaParamVal of parameterType | |
717 | | MetaParamListVal of parameterType wrap2 list | |
718 | ||
719 | | MetaTypeVal of fullType | |
113803cf | 720 | | MetaInitVal of initialiser |
8f657093 | 721 | | MetaInitListVal of initialiser wrap2 list |
413ffc02 C |
722 | | MetaDeclVal of declaration |
723 | | MetaFieldVal of field | |
190f1acf | 724 | | MetaFieldListVal of field list |
34e49164 C |
725 | | MetaStmtVal of statement |
726 | ||
727 | (* Could also be in Lib_engine.metavars_binding2 with the ParenVal, | |
728 | * because don't need to have the value for a position in the env of | |
729 | * a '+'. But ParenVal or LabelVal are used only by CTL, they are not | |
730 | * variables accessible via SmPL whereas the position can be one day | |
731 | * so I think it's better to put MetaPosVal here *) | |
732 | | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *) | |
485bce71 C |
733 | | MetaPosValList of |
734 | (Common.filename * string (*element*) * posl * posl) list (* min, max *) | |
34e49164 C |
735 | | MetaListlenVal of int |
736 | ||
737 | ||
738 | (*****************************************************************************) | |
739 | (* C comments *) | |
740 | (*****************************************************************************) | |
741 | ||
ae4735db | 742 | (* convention: I often use "m" for comments as I can not use "c" |
485bce71 | 743 | * (already use for c stuff) and "com" is too long. |
34e49164 C |
744 | *) |
745 | ||
0708f913 C |
746 | (* this type will be associated to each token. |
747 | *) | |
34e49164 | 748 | and comments_around = { |
0708f913 C |
749 | mbefore: Token_c.comment_like_token list; |
750 | mafter: Token_c.comment_like_token list; | |
708f4980 C |
751 | |
752 | (* less: could remove ? do something simpler than CComment for | |
753 | * coccinelle, cf above. *) | |
754 | mbefore2: comment_and_relative_pos list; | |
755 | mafter2: comment_and_relative_pos list; | |
756 | } | |
34e49164 C |
757 | and comment_and_relative_pos = { |
758 | ||
759 | minfo: Common.parse_info; | |
760 | (* the int represent the number of lines of difference between the | |
761 | * current token and the comment. When on same line, this number is 0. | |
762 | * When previous line, -1. In some way the after/before in previous | |
763 | * record is useless because the sign of the integer can helps | |
764 | * do the difference too, but I keep it that way. | |
765 | *) | |
766 | mpos: int; | |
767 | (* todo? | |
ae4735db | 768 | * cppbetween: bool; touse? if false positive |
34e49164 C |
769 | * is_alone_in_line: bool; (*for labels, to avoid false positive*) |
770 | *) | |
708f4980 | 771 | } |
34e49164 C |
772 | |
773 | and comment = Common.parse_info | |
774 | and com = comment list ref | |
34e49164 | 775 | |
708f4980 | 776 | (* with sexp *) |
34e49164 C |
777 | |
778 | ||
779 | (*****************************************************************************) | |
780 | (* Some constructors *) | |
781 | (*****************************************************************************) | |
782 | let nullQualif = ({const=false; volatile= false}, []) | |
ae4735db | 783 | let nQ = nullQualif |
34e49164 C |
784 | |
785 | let defaultInt = (BaseType (IntType (Si (Signed, CInt)))) | |
786 | ||
787 | let noType () = ref (None,NotTest) | |
788 | let noInstr = (ExprStatement (None), []) | |
789 | let noTypedefDef () = None | |
790 | ||
ae4735db | 791 | let emptyMetavarsBinding = |
34e49164 C |
792 | ([]: metavars_binding) |
793 | ||
708f4980 | 794 | let emptyAnnotCocci = |
34e49164 | 795 | (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING), |
951c7801 | 796 | ([] : metavars_binding list)) |
34e49164 | 797 | |
ae4735db | 798 | let emptyAnnot = |
951c7801 | 799 | (None: (Ast_cocci.mcodekind * metavars_binding list) option) |
708f4980 C |
800 | |
801 | (* compatibility mode *) | |
ae4735db | 802 | let mcode_and_env_of_cocciref aref = |
708f4980 C |
803 | match !aref with |
804 | | Some x -> x | |
805 | | None -> emptyAnnotCocci | |
806 | ||
807 | ||
34e49164 C |
808 | let emptyComments= { |
809 | mbefore = []; | |
810 | mafter = []; | |
708f4980 C |
811 | mbefore2 = []; |
812 | mafter2 = []; | |
34e49164 C |
813 | } |
814 | ||
815 | ||
816 | (* for include, some meta information needed by cocci *) | |
ae4735db | 817 | let noRelPos () = |
34e49164 | 818 | ref (None: include_rel_pos option) |
ae4735db | 819 | let noInIfdef () = |
34e49164 C |
820 | ref false |
821 | ||
822 | ||
ae4735db | 823 | (* When want add some info in ast that does not correspond to |
34e49164 C |
824 | * an existing C element. |
825 | * old: or when don't want 'synchronize' on it in unparse_c.ml | |
826 | * (now have other mark for tha matter). | |
827 | *) | |
828 | let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1) | |
829 | ||
ae4735db | 830 | let fakeInfo pi = |
34e49164 C |
831 | { pinfo = FakeTok ("",no_virt_pos); |
832 | cocci_tag = ref emptyAnnot; | |
abad11c5 | 833 | annots_tag = Token_annot.empty; |
34e49164 C |
834 | comments_tag = ref emptyComments; |
835 | } | |
836 | ||
485bce71 C |
837 | let noii = [] |
838 | let noattr = [] | |
839 | let noi_content = (None: ((Common.filename * program) option)) | |
34e49164 C |
840 | |
841 | (*****************************************************************************) | |
842 | (* Wrappers *) | |
843 | (*****************************************************************************) | |
844 | let unwrap = fst | |
845 | ||
113803cf | 846 | let unwrap2 = fst |
34e49164 C |
847 | |
848 | let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e | |
849 | let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie) | |
850 | ||
708f4980 C |
851 | let unwrap_typeC (qu, (typeC, ii)) = typeC |
852 | let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii)) | |
853 | ||
854 | let unwrap_typeCbis (typeC, ii) = typeC | |
855 | ||
856 | let unwrap_st (unwrap_st, ii) = unwrap_st | |
857 | ||
858 | (* ------------------------------------------------------------------------- *) | |
859 | let mk_e unwrap_e ii = (unwrap_e, noType()), ii | |
860 | let mk_e_bis unwrap_e ty ii = (unwrap_e, ty), ii | |
861 | ||
862 | let mk_ty typeC ii = nQ, (typeC, ii) | |
863 | let mk_tybis typeC ii = (typeC, ii) | |
864 | ||
865 | let mk_st unwrap_st ii = (unwrap_st, ii) | |
866 | ||
867 | (* ------------------------------------------------------------------------- *) | |
868 | let get_ii_typeC_take_care (typeC, ii) = ii | |
869 | let get_ii_st_take_care (st, ii) = ii | |
870 | let get_ii_expr_take_care (e, ii) = ii | |
871 | ||
872 | let get_st_and_ii (st, ii) = st, ii | |
873 | let get_ty_and_ii (qu, (typeC, ii)) = qu, (typeC, ii) | |
874 | let get_e_and_ii (e, ii) = e, ii | |
875 | ||
876 | ||
877 | (* ------------------------------------------------------------------------- *) | |
34e49164 C |
878 | let get_type_expr ((unwrap_e, typ), iie) = !typ |
879 | let set_type_expr ((unwrap_e, oldtyp), iie) newtyp = | |
880 | oldtyp := newtyp | |
881 | (* old: (unwrap_e, newtyp), iie *) | |
882 | ||
ae4735db | 883 | let get_onlytype_expr ((unwrap_e, typ), iie) = |
91eba41f C |
884 | match !typ with |
885 | | Some (ft,_local), _test -> Some ft | |
886 | | None, _ -> None | |
887 | ||
ae4735db | 888 | let get_onlylocal_expr ((unwrap_e, typ), iie) = |
0708f913 C |
889 | match !typ with |
890 | | Some (ft,local), _test -> Some local | |
891 | | None, _ -> None | |
892 | ||
91eba41f | 893 | (* ------------------------------------------------------------------------- *) |
ae4735db | 894 | let rewrap_str s ii = |
34e49164 C |
895 | {ii with pinfo = |
896 | (match ii.pinfo with | |
897 | OriginTok pi -> OriginTok { pi with Common.str = s;} | |
898 | | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi) | |
899 | | FakeTok (_,vpi) -> FakeTok (s,vpi) | |
900 | | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})} | |
901 | ||
ae4735db | 902 | let rewrap_pinfo pi ii = |
34e49164 C |
903 | {ii with pinfo = pi} |
904 | ||
708f4980 C |
905 | |
906 | ||
34e49164 C |
907 | (* info about the current location *) |
908 | let get_pi = function | |
909 | OriginTok pi -> pi | |
910 | | ExpandedTok (_,(pi,_)) -> pi | |
911 | | FakeTok (_,(pi,_)) -> pi | |
912 | | AbstractLineTok pi -> pi | |
913 | ||
914 | (* original info *) | |
915 | let get_opi = function | |
916 | OriginTok pi -> pi | |
708f4980 | 917 | | ExpandedTok (pi,_) -> pi (* diff with get_pi *) |
34e49164 C |
918 | | FakeTok (_,_) -> failwith "no position information" |
919 | | AbstractLineTok pi -> pi | |
920 | ||
34e49164 C |
921 | let str_of_info ii = |
922 | match ii.pinfo with | |
923 | OriginTok pi -> pi.Common.str | |
924 | | ExpandedTok (pi,_) -> pi.Common.str | |
925 | | FakeTok (s,_) -> s | |
926 | | AbstractLineTok pi -> pi.Common.str | |
927 | ||
928 | let get_info f ii = | |
929 | match ii.pinfo with | |
930 | OriginTok pi -> f pi | |
931 | | ExpandedTok (_,(pi,_)) -> f pi | |
932 | | FakeTok (_,(pi,_)) -> f pi | |
933 | | AbstractLineTok pi -> f pi | |
934 | ||
935 | let get_orig_info f ii = | |
936 | match ii.pinfo with | |
937 | OriginTok pi -> f pi | |
708f4980 | 938 | | ExpandedTok (pi,_) -> f pi (* diff with get_info *) |
34e49164 C |
939 | | FakeTok (_,(pi,_)) -> f pi |
940 | | AbstractLineTok pi -> f pi | |
941 | ||
942 | let make_expanded ii = | |
943 | {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)} | |
944 | ||
945 | let pos_of_info ii = get_info (function x -> x.Common.charpos) ii | |
946 | let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii | |
947 | let line_of_info ii = get_orig_info (function x -> x.Common.line) ii | |
948 | let col_of_info ii = get_orig_info (function x -> x.Common.column) ii | |
949 | let file_of_info ii = get_orig_info (function x -> x.Common.file) ii | |
708f4980 | 950 | let mcode_of_info ii = fst (mcode_and_env_of_cocciref ii.cocci_tag) |
34e49164 C |
951 | let pinfo_of_info ii = ii.pinfo |
952 | let parse_info_of_info ii = get_pi ii.pinfo | |
953 | ||
ae4735db | 954 | let strloc_of_info ii = |
978fd7e5 C |
955 | spf "%s:%d" (file_of_info ii) (line_of_info ii) |
956 | ||
485bce71 C |
957 | let is_fake ii = |
958 | match ii.pinfo with | |
959 | FakeTok (_,_) -> true | |
960 | | _ -> false | |
961 | ||
ae4735db | 962 | let is_origintok ii = |
485bce71 C |
963 | match ii.pinfo with |
964 | | OriginTok pi -> true | |
965 | | _ -> false | |
966 | ||
91eba41f | 967 | (* ------------------------------------------------------------------------- *) |
34e49164 | 968 | type posrv = Real of Common.parse_info | Virt of virtual_position |
485bce71 | 969 | |
34e49164 C |
970 | let compare_pos ii1 ii2 = |
971 | let get_pos = function | |
972 | OriginTok pi -> Real pi | |
973 | | FakeTok (s,vpi) -> Virt vpi | |
974 | | ExpandedTok (pi,vpi) -> Virt vpi | |
975 | | AbstractLineTok pi -> Real pi in (* used for printing *) | |
976 | let pos1 = get_pos (pinfo_of_info ii1) in | |
977 | let pos2 = get_pos (pinfo_of_info ii2) in | |
978 | match (pos1,pos2) with | |
faf9a90c C |
979 | (Real p1, Real p2) -> |
980 | compare p1.Common.charpos p2.Common.charpos | |
34e49164 | 981 | | (Virt (p1,_), Real p2) -> |
b1b2de81 | 982 | if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1 |
34e49164 | 983 | | (Real p1, Virt (p2,_)) -> |
b1b2de81 | 984 | if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1) |
34e49164 C |
985 | | (Virt (p1,o1), Virt (p2,o2)) -> |
986 | let poi1 = p1.Common.charpos in | |
987 | let poi2 = p2.Common.charpos in | |
988 | match compare poi1 poi2 with | |
989 | -1 -> -1 | |
990 | | 0 -> compare o1 o2 | |
991 | | x -> x | |
992 | ||
ae4735db | 993 | let equal_posl (l1,c1) (l2,c2) = |
34e49164 C |
994 | (l1 =|= l2) && (c1 =|= c2) |
995 | ||
755320b0 C |
996 | let compare_posl (l1,c1) (l2,c2) = |
997 | match l2 - l1 with | |
998 | 0 -> c2 - c1 | |
999 | | r -> r | |
1000 | ||
34e49164 C |
1001 | let info_to_fixpos ii = |
1002 | match pinfo_of_info ii with | |
1003 | OriginTok pi -> Ast_cocci.Real pi.Common.charpos | |
1004 | | ExpandedTok (_,(pi,offset)) -> | |
1005 | Ast_cocci.Virt (pi.Common.charpos,offset) | |
1006 | | FakeTok (_,(pi,offset)) -> | |
1007 | Ast_cocci.Virt (pi.Common.charpos,offset) | |
1008 | | AbstractLineTok pi -> failwith "unexpected abstract" | |
1009 | ||
485bce71 | 1010 | (* cocci: *) |
34e49164 | 1011 | let is_test (e : expression) = |
708f4980 | 1012 | let (_,info), _ = e in |
34e49164 | 1013 | let (_,test) = !info in |
b1b2de81 | 1014 | test =*= Test |
34e49164 C |
1015 | |
1016 | (*****************************************************************************) | |
1017 | (* Abstract line *) | |
1018 | (*****************************************************************************) | |
1019 | ||
1020 | (* When we have extended the C Ast to add some info to the tokens, | |
1021 | * such as its line number in the file, we can not use anymore the | |
1022 | * ocaml '=' to compare Ast elements. To overcome this problem, to be | |
1023 | * able to use again '=', we just have to get rid of all those extra | |
1024 | * information, to "abstract those line" (al) information. | |
ae4735db | 1025 | * |
91eba41f C |
1026 | * Julia then modifies it a little to have a tokenindex, so the original |
1027 | * true al_info is in fact real_al_info. | |
34e49164 C |
1028 | *) |
1029 | ||
ae4735db | 1030 | let al_info tokenindex x = |
34e49164 C |
1031 | { pinfo = |
1032 | (AbstractLineTok | |
1033 | {charpos = tokenindex; | |
1034 | line = tokenindex; | |
1035 | column = tokenindex; | |
1036 | file = ""; | |
1037 | str = str_of_info x}); | |
1038 | cocci_tag = ref emptyAnnot; | |
abad11c5 | 1039 | annots_tag = Token_annot.empty; |
34e49164 C |
1040 | comments_tag = ref emptyComments; |
1041 | } | |
1042 | ||
ae4735db | 1043 | let semi_al_info x = |
34e49164 C |
1044 | { x with |
1045 | cocci_tag = ref emptyAnnot; | |
1046 | comments_tag = ref emptyComments; | |
1047 | } | |
1048 | ||
ae4735db | 1049 | let magic_real_number = -10 |
91eba41f | 1050 | |
ae4735db | 1051 | let real_al_info x = |
91eba41f C |
1052 | { pinfo = |
1053 | (AbstractLineTok | |
1054 | {charpos = magic_real_number; | |
1055 | line = magic_real_number; | |
1056 | column = magic_real_number; | |
1057 | file = ""; | |
1058 | str = str_of_info x}); | |
1059 | cocci_tag = ref emptyAnnot; | |
abad11c5 | 1060 | annots_tag = Token_annot.empty; |
91eba41f C |
1061 | comments_tag = ref emptyComments; |
1062 | } | |
1063 | ||
b1b2de81 C |
1064 | let al_comments x = |
1065 | let keep_cpp l = | |
1066 | List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in | |
1067 | let al_com (x,i) = | |
1068 | (x,{i with Common.charpos = magic_real_number; | |
1069 | Common.line = magic_real_number; | |
1070 | Common.column = magic_real_number}) in | |
1071 | {mbefore = []; (* duplicates mafter of the previous token *) | |
708f4980 C |
1072 | mafter = List.map al_com (keep_cpp x.mafter); |
1073 | mbefore2=[]; | |
1074 | mafter2=[]; | |
1075 | } | |
b1b2de81 | 1076 | |
ae4735db | 1077 | let al_info_cpp tokenindex x = |
b1b2de81 C |
1078 | { pinfo = |
1079 | (AbstractLineTok | |
1080 | {charpos = tokenindex; | |
1081 | line = tokenindex; | |
1082 | column = tokenindex; | |
1083 | file = ""; | |
1084 | str = str_of_info x}); | |
1085 | cocci_tag = ref emptyAnnot; | |
abad11c5 | 1086 | annots_tag = Token_annot.empty; |
b1b2de81 C |
1087 | comments_tag = ref (al_comments !(x.comments_tag)); |
1088 | } | |
1089 | ||
ae4735db | 1090 | let semi_al_info_cpp x = |
b1b2de81 C |
1091 | { x with |
1092 | cocci_tag = ref emptyAnnot; | |
abad11c5 | 1093 | annots_tag = Token_annot.empty; |
b1b2de81 C |
1094 | comments_tag = ref (al_comments !(x.comments_tag)); |
1095 | } | |
1096 | ||
ae4735db | 1097 | let real_al_info_cpp x = |
b1b2de81 C |
1098 | { pinfo = |
1099 | (AbstractLineTok | |
1100 | {charpos = magic_real_number; | |
1101 | line = magic_real_number; | |
1102 | column = magic_real_number; | |
1103 | file = ""; | |
1104 | str = str_of_info x}); | |
1105 | cocci_tag = ref emptyAnnot; | |
abad11c5 | 1106 | annots_tag = Token_annot.empty; |
b1b2de81 C |
1107 | comments_tag = ref (al_comments !(x.comments_tag)); |
1108 | } | |
1109 | ||
91eba41f | 1110 | |
34e49164 C |
1111 | (*****************************************************************************) |
1112 | (* Views *) | |
1113 | (*****************************************************************************) | |
1114 | ||
1115 | (* Transform a list of arguments (or parameters) where the commas are | |
1116 | * represented via the wrap2 and associated with an element, with | |
1117 | * a list where the comma are on their own. f(1,2,2) was | |
1118 | * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2]. | |
ae4735db | 1119 | * |
34e49164 C |
1120 | * Used in cocci_vs_c.ml, to have a more direct correspondance between |
1121 | * the ast_cocci of julia and ast_c. | |
1122 | *) | |
ae4735db | 1123 | let rec (split_comma: 'a wrap2 list -> ('a, il) either list) = |
34e49164 C |
1124 | function |
1125 | | [] -> [] | |
ae4735db C |
1126 | | (e, ii)::xs -> |
1127 | if null ii | |
34e49164 C |
1128 | then (Left e)::split_comma xs |
1129 | else Right ii::Left e::split_comma xs | |
1130 | ||
ae4735db | 1131 | let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) = |
34e49164 C |
1132 | function |
1133 | | [] -> [] | |
ae4735db | 1134 | | Right ii::Left e::xs -> |
34e49164 | 1135 | (e, ii)::unsplit_comma xs |
ae4735db | 1136 | | Left e::xs -> |
34e49164 C |
1137 | let empty_ii = [] in |
1138 | (e, empty_ii)::unsplit_comma xs | |
ae4735db | 1139 | | Right ii::_ -> |
abad11c5 | 1140 | raise (Impossible 59) |
34e49164 C |
1141 | |
1142 | ||
1143 | ||
1144 | ||
485bce71 C |
1145 | (*****************************************************************************) |
1146 | (* Helpers, could also be put in lib_parsing_c.ml instead *) | |
1147 | (*****************************************************************************) | |
1148 | ||
91eba41f C |
1149 | (* should maybe be in pretty_print_c ? *) |
1150 | ||
ae4735db | 1151 | let s_of_inc_file inc_file = |
485bce71 C |
1152 | match inc_file with |
1153 | | Local xs -> xs +> Common.join "/" | |
1154 | | NonLocal xs -> xs +> Common.join "/" | |
0708f913 | 1155 | | Weird s -> s |
485bce71 | 1156 | |
ae4735db | 1157 | let s_of_inc_file_bis inc_file = |
485bce71 C |
1158 | match inc_file with |
1159 | | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\"" | |
1160 | | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">" | |
0708f913 | 1161 | | Weird s -> s |
485bce71 | 1162 | |
ae4735db | 1163 | let fieldname_of_fieldkind fieldkind = |
b1b2de81 | 1164 | match fieldkind with |
485bce71 | 1165 | | Simple (sopt, ft) -> sopt |
b1b2de81 | 1166 | | BitField (sopt, ft, info, expr) -> sopt |
485bce71 | 1167 | |
91eba41f | 1168 | |
ae4735db | 1169 | let s_of_attr attr = |
91eba41f C |
1170 | attr |
1171 | +> List.map (fun (Attribute s, ii) -> s) | |
1172 | +> Common.join "," | |
113803cf | 1173 | |
708f4980 C |
1174 | |
1175 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 1176 | let str_of_name ident = |
b1b2de81 C |
1177 | match ident with |
1178 | | RegularName (s,ii) -> s | |
ae4735db | 1179 | | CppConcatenatedName xs -> |
b1b2de81 C |
1180 | xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##" |
1181 | | CppVariadicName (s, ii) -> "##" ^ s | |
ae4735db C |
1182 | | CppIdentBuilder ((s,iis), xs) -> |
1183 | s ^ "(" ^ | |
b1b2de81 C |
1184 | (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^ |
1185 | ")" | |
1186 | ||
ae4735db | 1187 | let get_s_and_ii_of_name name = |
708f4980 | 1188 | match name with |
ae4735db | 1189 | | RegularName (s, iis) -> s, iis |
708f4980 | 1190 | | CppIdentBuilder ((s, iis), xs) -> s, iis |
ae4735db | 1191 | | CppVariadicName (s,iis) -> |
708f4980 C |
1192 | let (iop, iis) = Common.tuple_of_list2 iis in |
1193 | s, [iis] | |
ae4735db | 1194 | | CppConcatenatedName xs -> |
b1b2de81 | 1195 | (match xs with |
abad11c5 | 1196 | | [] -> raise (Impossible 60) |
ae4735db | 1197 | | ((s,iis),noiiop)::xs -> |
708f4980 | 1198 | s, iis |
b1b2de81 | 1199 | ) |
b1b2de81 | 1200 | |
ae4735db | 1201 | let get_s_and_info_of_name name = |
708f4980 C |
1202 | let (s,ii) = get_s_and_ii_of_name name in |
1203 | s, List.hd ii | |
1204 | ||
ae4735db | 1205 | let info_of_name name = |
708f4980 C |
1206 | let (s,ii) = get_s_and_ii_of_name name in |
1207 | List.hd ii | |
1208 | ||
ae4735db | 1209 | let ii_of_name name = |
708f4980 C |
1210 | let (s,ii) = get_s_and_ii_of_name name in |
1211 | ii | |
1212 | ||
ae4735db | 1213 | let get_local_ii_of_expr_inlining_ii_of_name e = |
708f4980 C |
1214 | let (ebis,_),ii = e in |
1215 | match ebis, ii with | |
ae4735db | 1216 | | Ident name, noii -> |
708f4980 C |
1217 | assert(null noii); |
1218 | ii_of_name name | |
ae4735db | 1219 | | RecordAccess (e, name), ii -> |
708f4980 | 1220 | ii @ ii_of_name name |
ae4735db | 1221 | | RecordPtAccess (e, name), ii -> |
708f4980 C |
1222 | ii @ ii_of_name name |
1223 | | _, ii -> ii | |
1224 | ||
1225 | ||
1226 | let get_local_ii_of_tybis_inlining_ii_of_name ty = | |
1227 | match ty with | |
1228 | | TypeName (name, _typ), [] -> ii_of_name name | |
1229 | | _, ii -> ii | |
1230 | ||
978fd7e5 | 1231 | (* the following is used to obtain the argument to LocalVar *) |
ae4735db | 1232 | let info_of_type ft = |
978fd7e5 C |
1233 | let (qu, ty) = ft in |
1234 | (* bugfix: because of string->name, the ii can be deeper *) | |
1235 | let ii = get_local_ii_of_tybis_inlining_ii_of_name ty in | |
1236 | match ii with | |
97111a47 C |
1237 | | ii::_ -> Some ii.pinfo |
1238 | | [] -> None | |
978fd7e5 | 1239 | |
708f4980 C |
1240 | (* only Label and Goto have name *) |
1241 | let get_local_ii_of_st_inlining_ii_of_name st = | |
1242 | match st with | |
1243 | | Labeled (Label (name, st)), ii -> ii_of_name name @ ii | |
ae4735db | 1244 | | Jump (Goto name), ii -> |
708f4980 C |
1245 | let (i1, i3) = Common.tuple_of_list2 ii in |
1246 | [i1] @ ii_of_name name @ [i3] | |
1247 | | _, ii -> ii | |
1248 | ||
ae4735db | 1249 | |
708f4980 C |
1250 | |
1251 | (* ------------------------------------------------------------------------- *) | |
ae4735db | 1252 | let name_of_parameter param = |
b1b2de81 C |
1253 | param.p_namei +> Common.map_option (str_of_name) |
1254 | ||
abad11c5 C |
1255 | |
1256 | (* ------------------------------------------------------------------------- *) | |
1257 | (* Annotations on tokens *) | |
1258 | (* ------------------------------------------------------------------------- *) | |
1259 | ||
1260 | (* to put a given annotation on a token *) | |
1261 | let put_annot_info info key value = | |
1262 | info.annots_tag <- Token_annot.put_annot key value info.annots_tag | |
1263 | ||
1264 | (* to check if an annotation has such a token *) | |
1265 | let get_annot_info info key = | |
1266 | Token_annot.get_annot info.annots_tag key |