Release coccinelle-0.1.2
[bpt/coccinelle.git] / parsing_c / ast_c.ml
CommitLineData
485bce71 1(* Copyright (C) 2002, 2006, 2007, 2008 Yoann Padioleau
34e49164
C
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
11 *)
12open Common
13
14(*****************************************************************************)
15(* The AST C related types *)
16(*****************************************************************************)
17
485bce71
C
18(* To allow some transformations over the AST, we keep as much information
19 * as possible in the AST such as the tokens content and their locations.
20 * Those info are called 'info' (how original) and can be tagged.
21 * For instance one tag may say that the unparser should remove this token.
22 *
23 * Update: Now I use a ref! in those 'info' so take care.
24 *
25 * Sometimes we want to add someting at the beginning or at the end
26 * of a construct. For 'function' and 'decl' we want to add something
27 * to their left and for 'if' 'while' et 'for' and so on at their right.
28 * We want some kinds of "virtual placeholders" that represent the start or
29 * end of a construct. We use fakeInfo for that purpose.
30 * To identify those cases I have added a fakestart/fakeend comment.
31 *
32 * convention: I often use 'ii' for the name of a list of info.
33 *
34 * update: I now allow ifdefs in the ast but there must be only between
35 * "sequencable" elements. They can be put in a type only if this type
36 * is used only in a list, like at toplevel, used in 'toplevel list',
37 * or inside compound, used in 'statement list'. I must not allow
38 * ifdef anywhere. For instance I can not make ifdef a statement
39 * cos some instruction like If accept only one statement and the
40 * ifdef directive must not take the place of a legitimate instruction.
41 * We had a similar phenomena in SmPL where we have the notion
42 * of statement and sequencable statement too. Once you have
43 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
44 * and introduce the ifdef.
45 *
46 * update: those ifdefs are either passed, or present in the AST but in
47 * a flat form. To structure those flat ifdefs you have to run
48 * a transformation that will put in a tree the statements inside
49 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
50 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
51 *
52 * Some stuff are tagged semantic: which means that they are computed
53 * after parsing.
54 *
55 * cocci: Each token will be decorated in the future by the mcodekind
34e49164
C
56 * of cocci. It is the job of the pretty printer to look at this
57 * information and decide to print or not the token (and also the
58 * pending '+' associated sometimes with the token).
59 *
60 * The first time that we parse the original C file, the mcodekind is
61 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
62 * associated. This is what I call a "clean" expr/statement/....
63 *
64 * Each token will also be decorated in the future with an environment,
65 * because the pending '+' may contain metavariables that refer to some
66 * C code.
67 *
34e49164 68 *
485bce71
C
69 * All of this means that some elements in this AST are present only if
70 * some annotation/transformation has been done on the original AST returned
71 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
34e49164
C
72 *)
73
74(* forunparser: *)
75
76type posl = int * int (* lin-col, for MetaPosValList, for position variables *)
485bce71
C
77
78(* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
34e49164 79type virtual_position = Common.parse_info * int (* character offset *)
485bce71 80
34e49164
C
81type parse_info =
82 (* Present both in ast and list of tokens *)
83 | OriginTok of Common.parse_info
84 (* Present only in ast and generated after parsing. Used mainly
85 * by Julia, to add stuff at virtual places, beginning of func or decl *)
86 | FakeTok of string * virtual_position
87 (* Present both in ast and list of tokens. *)
88 | ExpandedTok of Common.parse_info * virtual_position
89 (* Present neither in ast nor in list of tokens
90 * but only in the '+' of the mcode of some tokens. Those kind of tokens
91 * are used to be able to use '=' to compare big ast portions.
92 *)
93 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
94
95type info = {
96 pinfo : parse_info;
485bce71
C
97 (* this tag can be changed, which is how we can express some progra
98 * transformations by tagging the tokens involved in this transformation.
99 *)
34e49164 100 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) ref;
485bce71
C
101 (* set in comment_annotater.ml *)
102 comments_tag: comments_around ref;
34e49164
C
103 (* todo? token_info : sometimes useful to know what token it was *)
104 }
105and il = info list
106
107(* wrap2 is like wrap, except that I use it often for separator such
108 * as ','. In that case the info is associated to the argument that
109 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
110and 'a wrap = 'a * il
111and 'a wrap2 = 'a * il
112
113(* ------------------------------------------------------------------------- *)
114(* C Type *)
115(* ------------------------------------------------------------------------- *)
116(* Could have more precise type in fullType, in expression, etc, but
117 * it requires to do too much things in parsing such as checking no
118 * conflicting structname, computing value, etc. Better to separate
119 * concern, so I put '=>' to mean what we would really like. In fact
120 * what we really like is defining another fullType, expression, etc
121 * from scratch, because many stuff are just sugar.
122 *
123 * invariant: Array and FunctionType have also typeQualifier but they
124 * dont have sense. I put this to factorise some code. If you look in
125 * grammar, you see that we can never specify const for the array
126 * himself (but we can do it for pointer).
127 *
128 *
129 * Because of ExprStatement, we can have more 'new scope' events, but
130 * rare I think. For instance with 'array of constExpression' there can
131 * have an exprStatement and a new (local) struct defined. Same for
132 * Constructor.
133 *
134 * Some stuff are tagged semantic: which means that they are computed
485bce71
C
135 * after parsing.
136*)
34e49164
C
137
138
139and fullType = typeQualifier * typeC
140and typeC = typeCbis wrap
141
142and typeCbis =
143 | BaseType of baseType
144
145 | Pointer of fullType
146 | Array of constExpression option * fullType
147 | FunctionType of functionType
148
149 | Enum of string option * enumType
150 | StructUnion of structUnion * string option * structType (* new scope *)
151
152 | EnumName of string
153 | StructUnionName of structUnion * string
154
155 | TypeName of string * fullType option (* semantic: filled later *)
156
157 | ParenType of fullType (* forunparser: *)
158
485bce71
C
159 (* gccext: TypeOfType may seems useless; why declare a
160 * __typeof__(int) x; ?
161 * But when used with macro, it allows to fix a problem of C which
34e49164 162 * is that type declaration can be spread around the ident. Indeed it
485bce71
C
163 * may be difficult to have a macro such as
164 * '#define macro(type, ident) type ident;'
165 * because when you want to do a
166 * macro(char[256], x),
167 * then it will generate invalid code, but with a
168 * '#define macro(type, ident) __typeof(type) ident;'
169 * it will work. *)
34e49164
C
170 | TypeOfExpr of expression
171 | TypeOfType of fullType
485bce71
C
172
173 (* cppext: IfdefType TODO *)
34e49164
C
174
175(* -------------------------------------- *)
176 and baseType = Void
177 | IntType of intType
178 | FloatType of floatType
179
180 (* stdC: type section
181 * add a | SizeT ?
182 * note: char and signed char are semantically different!!
183 *)
184 and intType = CChar (* obsolete? | CWchar *)
185 | Si of signed
186
187 and signed = sign * base
188 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
189 and sign = Signed | UnSigned
190
191 and floatType = CFloat | CDouble | CLongDouble
192
193
194 (* -------------------------------------- *)
195 and structUnion = Struct | Union
485bce71
C
196 and structType = field list
197 and field = fieldbis wrap
198 and fieldbis =
199 | DeclarationField of field_declaration
200 | EmptyField (* gccext: *)
201 (* cppext: *)
202 | MacroStructDeclTodo
203
204 (* cppext: *)
205 | CppDirectiveStruct of cpp_directive
206 | IfdefStruct of ifdef_directive (* * field list list *)
207
34e49164
C
208
209 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
485bce71
C
210 and field_declaration =
211 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
34e49164
C
212
213 (* At first I thought that a bitfield could be only Signed/Unsigned.
214 * But it seems that gcc allow char i:4. C rule must say that you
215 * can cast into int so enum too, ...
216 *)
217 and fieldkind = fieldkindbis wrap (* s : *)
218 and fieldkindbis =
219 | Simple of string option * fullType
220 | BitField of string option * fullType * constExpression
221 (* fullType => BitFieldInt | BitFieldUnsigned *)
222
223
224 (* -------------------------------------- *)
225 and enumType = (string * constExpression option) wrap (* s = *)
226 wrap2 (* , *) list
227 (* => string * int list *)
228
229
230 (* -------------------------------------- *)
231 (* return * (params * has "...") *)
232 and functionType = fullType * (parameterType wrap2 list * bool wrap)
233 and parameterType = (bool * string option * fullType) wrap (* reg s *)
234 (* => (bool (register) * fullType) list * bool *)
235
236
237and typeQualifier = typeQualifierbis wrap
238and typeQualifierbis = {const: bool; volatile: bool}
239
485bce71
C
240(* gccext: cppext: *)
241and attribute = attributebis wrap
242 and attributebis =
243 | Attribute of string
34e49164
C
244
245(* ------------------------------------------------------------------------- *)
246(* C expression *)
247(* ------------------------------------------------------------------------- *)
248and expression = (expressionbis * exp_info ref (* semantic: *)) wrap
485bce71
C
249 and exp_info = exp_type option * test
250 and exp_type = fullType * local
251 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
252 and test = Test | NotTest (* cocci: *)
253
254 and expressionbis =
34e49164
C
255
256 (* Ident can be a enumeration constant, a simple variable, a name of a func.
257 * With cppext, Ident can also be the name of a macro. Sparse says
258 * "an identifier with a meaning is a symbol".
259 *)
260 | Ident of string (* todo? more semantic info such as LocalFunc *)
261 | Constant of constant
262 | FunCall of expression * argument wrap2 (* , *) list
263 (* gccext: x ? /* empty */ : y <=> x ? x : y; *)
264 | CondExpr of expression * expression option * expression
265
266 (* should be considered as statements, bad C langage *)
267 | Sequence of expression * expression
268 | Assignment of expression * assignOp * expression
269
270 | Postfix of expression * fixOp
271 | Infix of expression * fixOp
272 | Unary of expression * unaryOp
273 | Binary of expression * binaryOp * expression
274
275 | ArrayAccess of expression * expression
276 | RecordAccess of expression * string
277 | RecordPtAccess of expression * string
278 (* redundant normally, could replace it by DeRef RecordAcces *)
279
280 | SizeOfExpr of expression
281 | SizeOfType of fullType
282 | Cast of fullType * expression
283
284 (* gccext: *)
285