Release coccinelle-0.1.1
[bpt/coccinelle.git] / parsing_c / ast_c.ml
CommitLineData
34e49164
C
1(* Copyright (C) 2002-2008 Yoann Padioleau
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
11 *)
12open Common
13
14(*****************************************************************************)
15(* The AST C related types *)
16(*****************************************************************************)
17
18(* Cocci: Each token will be decorated in the future by the mcodekind
19 * of cocci. It is the job of the pretty printer to look at this
20 * information and decide to print or not the token (and also the
21 * pending '+' associated sometimes with the token).
22 *
23 * The first time that we parse the original C file, the mcodekind is
24 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
25 * associated. This is what I call a "clean" expr/statement/....
26 *
27 * Each token will also be decorated in the future with an environment,
28 * because the pending '+' may contain metavariables that refer to some
29 * C code.
30 *
31 * Update: Now I use a ref! so take care.
32 *
33 * Sometimes we want to add someting at the beginning or at the end
34 * of a construct. For 'function' and 'decl' we want add something
35 * to their left and for 'if' 'while' et 'for' and so on at their right.
36 * We want some kinds of "virtual placeholders" that represent the start or
37 * end of a construct. We use fakeInfo for that purpose.
38 * To identify those cases I have added a fakestart/fakeend comment.
39 *
40 * convention: I often use 'ii' for the name of a list of info.
41 *
42 *)
43
44(* forunparser: *)
45
46type posl = int * int (* lin-col, for MetaPosValList, for position variables *)
47type virtual_position = Common.parse_info * int (* character offset *)
48type parse_info =
49 (* Present both in ast and list of tokens *)
50 | OriginTok of Common.parse_info
51 (* Present only in ast and generated after parsing. Used mainly
52 * by Julia, to add stuff at virtual places, beginning of func or decl *)
53 | FakeTok of string * virtual_position
54 (* Present both in ast and list of tokens. *)
55 | ExpandedTok of Common.parse_info * virtual_position
56 (* Present neither in ast nor in list of tokens
57 * but only in the '+' of the mcode of some tokens. Those kind of tokens
58 * are used to be able to use '=' to compare big ast portions.
59 *)
60 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
61
62type info = {
63 pinfo : parse_info;
64 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) ref;
65 comments_tag: comments_around ref; (* set in comment_annotater.ml *)
66 (* todo? token_info : sometimes useful to know what token it was *)
67 }
68and il = info list
69
70(* wrap2 is like wrap, except that I use it often for separator such
71 * as ','. In that case the info is associated to the argument that
72 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
73and 'a wrap = 'a * il
74and 'a wrap2 = 'a * il
75
76(* ------------------------------------------------------------------------- *)
77(* C Type *)
78(* ------------------------------------------------------------------------- *)
79(* Could have more precise type in fullType, in expression, etc, but
80 * it requires to do too much things in parsing such as checking no
81 * conflicting structname, computing value, etc. Better to separate
82 * concern, so I put '=>' to mean what we would really like. In fact
83 * what we really like is defining another fullType, expression, etc
84 * from scratch, because many stuff are just sugar.
85 *
86 * invariant: Array and FunctionType have also typeQualifier but they
87 * dont have sense. I put this to factorise some code. If you look in
88 * grammar, you see that we can never specify const for the array
89 * himself (but we can do it for pointer).
90 *
91 *
92 * Because of ExprStatement, we can have more 'new scope' events, but
93 * rare I think. For instance with 'array of constExpression' there can
94 * have an exprStatement and a new (local) struct defined. Same for
95 * Constructor.
96 *
97 * Some stuff are tagged semantic: which means that they are computed
98 * after parsing. *)
99
100
101and fullType = typeQualifier * typeC
102and typeC = typeCbis wrap
103
104and typeCbis =
105 | BaseType of baseType
106
107 | Pointer of fullType
108 | Array of constExpression option * fullType
109 | FunctionType of functionType
110
111 | Enum of string option * enumType
112 | StructUnion of structUnion * string option * structType (* new scope *)
113
114 | EnumName of string
115 | StructUnionName of structUnion * string
116
117 | TypeName of string * fullType option (* semantic: filled later *)
118
119 | ParenType of fullType (* forunparser: *)
120
121 (* gccext: TypeOfType may seems useless, why declare a __typeof__(int)
122 * x; ? But when used with macro, it allows to fix a problem of C which
123 * is that type declaration can be spread around the ident. Indeed it
124 * may be difficult to have a macro such as '#define macro(type,
125 * ident) type ident;' because when you want to do a macro(char[256],
126 * x), then it will generate invalid code, but with a '#define
127 * macro(type, ident) __typeof(type) ident;' it will work. *)
128 | TypeOfExpr of expression
129 | TypeOfType of fullType
130
131(* -------------------------------------- *)
132 and baseType = Void
133 | IntType of intType
134 | FloatType of floatType
135
136 (* stdC: type section
137 * add a | SizeT ?
138 * note: char and signed char are semantically different!!
139 *)
140 and intType = CChar (* obsolete? | CWchar *)
141 | Si of signed
142
143 and signed = sign * base
144 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
145 and sign = Signed | UnSigned
146
147 and floatType = CFloat | CDouble | CLongDouble
148
149
150 (* -------------------------------------- *)
151 and structUnion = Struct | Union
152 and structType = (field wrap) list (* ; *)
153
154 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
155 and field = FieldDeclList of fieldkind wrap2 list (* , *)
156 | EmptyField (* gccext: *)
157
158 (* At first I thought that a bitfield could be only Signed/Unsigned.
159 * But it seems that gcc allow char i:4. C rule must say that you
160 * can cast into int so enum too, ...
161 *)
162 and fieldkind = fieldkindbis wrap (* s : *)
163 and fieldkindbis =
164 | Simple of string option * fullType
165 | BitField of string option * fullType * constExpression
166 (* fullType => BitFieldInt | BitFieldUnsigned *)
167
168
169 (* -------------------------------------- *)
170 and enumType = (string * constExpression option) wrap (* s = *)
171 wrap2 (* , *) list
172 (* => string * int list *)
173
174
175 (* -------------------------------------- *)
176 (* return * (params * has "...") *)
177 and functionType = fullType * (parameterType wrap2 list * bool wrap)
178 and parameterType = (bool * string option * fullType) wrap (* reg s *)
179 (* => (bool (register) * fullType) list * bool *)
180
181
182and typeQualifier = typeQualifierbis wrap
183and typeQualifierbis = {const: bool; volatile: bool}
184
185
186(* ------------------------------------------------------------------------- *)
187(* C expression *)
188(* ------------------------------------------------------------------------- *)
189and expression = (expressionbis * exp_info ref (* semantic: *)) wrap
190and local = LocalVar of parse_info | NotLocalVar
191and test = Test | NotTest
192and exp_type = fullType * local
193and exp_info = exp_type option * test
194and expressionbis =
195
196 (* Ident can be a enumeration constant, a simple variable, a name of a func.
197 * With cppext, Ident can also be the name of a macro. Sparse says
198 * "an identifier with a meaning is a symbol".
199 *)
200 | Ident of string (* todo? more semantic info such as LocalFunc *)
201 | Constant of constant
202 | FunCall of expression * argument wrap2 (* , *) list
203 (* gccext: x ? /* empty */ : y <=> x ? x : y; *)
204 | CondExpr of expression * expression option * expression
205
206 (* should be considered as statements, bad C langage *)
207 | Sequence of expression * expression
208 | Assignment of expression * assignOp * expression
209
210 | Postfix of expression * fixOp
211 | Infix of expression * fixOp
212 | Unary of expression * unaryOp
213 | Binary of expression * binaryOp * expression
214
215 | ArrayAccess of expression * expression
216 | RecordAccess of expression * string
217 | RecordPtAccess of expression * string
218 (* redundant normally, could replace it by DeRef RecordAcces *)
219
220 | SizeOfExpr of expression
221 | SizeOfType of fullType
222 | Cast of fullType * expression
223
224 (* gccext: *)
225