Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / ast_c.ml
CommitLineData
0708f913
C
1(* Yoann Padioleau
2 *
3 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
34e49164
C
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14open Common
15
16(*****************************************************************************)
17(* The AST C related types *)
18(*****************************************************************************)
b1b2de81
C
19(*
20 * Some stuff are tagged semantic: which means that they are computed
21 * after parsing.
22 *
23 * This means that some elements in this AST are present only if
24 * some annotation/transformation has been done on the original AST returned
25 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
26 *)
27
28
29(* ------------------------------------------------------------------------- *)
30(* Token/info *)
31(* ------------------------------------------------------------------------- *)
34e49164 32
485bce71
C
33(* To allow some transformations over the AST, we keep as much information
34 * as possible in the AST such as the tokens content and their locations.
35 * Those info are called 'info' (how original) and can be tagged.
36 * For instance one tag may say that the unparser should remove this token.
37 *
38 * Update: Now I use a ref! in those 'info' so take care.
0708f913
C
39 * That means that modifications of the info of tokens can have
40 * an effect on the info stored in the ast (which is sometimes
41 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
42 *
b1b2de81 43 * convention: I often use 'ii' for the name of a list of info.
0708f913 44 *
485bce71
C
45 * Sometimes we want to add someting at the beginning or at the end
46 * of a construct. For 'function' and 'decl' we want to add something
47 * to their left and for 'if' 'while' et 'for' and so on at their right.
48 * We want some kinds of "virtual placeholders" that represent the start or
49 * end of a construct. We use fakeInfo for that purpose.
50 * To identify those cases I have added a fakestart/fakeend comment.
51 *
485bce71 52 * cocci: Each token will be decorated in the future by the mcodekind
34e49164
C
53 * of cocci. It is the job of the pretty printer to look at this
54 * information and decide to print or not the token (and also the
55 * pending '+' associated sometimes with the token).
56 *
57 * The first time that we parse the original C file, the mcodekind is
58 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
59 * associated. This is what I call a "clean" expr/statement/....
60 *
61 * Each token will also be decorated in the future with an environment,
62 * because the pending '+' may contain metavariables that refer to some
63 * C code.
64 *
34e49164
C
65 *)
66
67(* forunparser: *)
68
91eba41f 69type posl = int * int (* line-col, for MetaPosValList, for position variables *)
708f4980 70 (* with sexp *)
485bce71
C
71
72(* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
34e49164 73type virtual_position = Common.parse_info * int (* character offset *)
708f4980 74 (* with sexp *)
485bce71 75
34e49164
C
76type parse_info =
77 (* Present both in ast and list of tokens *)
78 | OriginTok of Common.parse_info
79 (* Present only in ast and generated after parsing. Used mainly
80 * by Julia, to add stuff at virtual places, beginning of func or decl *)
81 | FakeTok of string * virtual_position
82 (* Present both in ast and list of tokens. *)
83 | ExpandedTok of Common.parse_info * virtual_position
0708f913 84
34e49164
C
85 (* Present neither in ast nor in list of tokens
86 * but only in the '+' of the mcode of some tokens. Those kind of tokens
87 * are used to be able to use '=' to compare big ast portions.
88 *)
89 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
708f4980 90 (* with sexp *)
34e49164
C
91
92type info = {
93 pinfo : parse_info;
b1b2de81
C
94
95 (* this cocci_tag can be changed, which is how we can express some program
485bce71
C
96 * transformations by tagging the tokens involved in this transformation.
97 *)
708f4980 98 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) option ref;
0708f913 99 (* set in comment_annotater_c.ml *)
485bce71 100 comments_tag: comments_around ref;
b1b2de81 101
34e49164
C
102 (* todo? token_info : sometimes useful to know what token it was *)
103 }
104and il = info list
105
106(* wrap2 is like wrap, except that I use it often for separator such
107 * as ','. In that case the info is associated to the argument that
708f4980
C
108 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])].
109 *
110 * wrap3 is like wrap, except that I use it in case sometimes it
111 * will be empty because the info will be included in a nested
112 * entity (e.g. for Ident in expr because it's inlined in the name)
113 * so user should never assume List.length wrap3 > 0.
114 *)
34e49164
C
115and 'a wrap = 'a * il
116and 'a wrap2 = 'a * il
708f4980 117and 'a wrap3 = 'a * il (* * evotype*)
34e49164 118
b1b2de81
C
119(* ------------------------------------------------------------------------- *)
120(* Name *)
121(* ------------------------------------------------------------------------- *)
122
123(* was called 'ident' before, but 'name' is I think better
124 * as concatenated strings can be used not only for identifiers and for
125 * declarators, but also for fields, for labels, etc.
708f4980
C
126 *
127 * Note: because now the info is embeded in the name, the info for
128 * expression like Ident, or types like Typename, are not anymore
129 * stored in the expression or type. Hence if you assume this,
130 * which was true before, you are now wrong. So never write code like
131 * let (unwrape,_), ii = e and use 'ii' believing it contains
132 * the local ii to e. If you want to do that, use the appropiate
133 * wrapper get_local_ii_of_expr_inlining_ii_of_name.
b1b2de81
C
134 *)
135and name =
136 | RegularName of string wrap
137 | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list
138 (* normally only used inside list of things, as in parameters or arguments
139 * in which case, cf cpp-manual, it has a special meaning *)
140 | CppVariadicName of string wrap (* ## s *)
141 | CppIdentBuilder of string wrap (* s ( ) *) *
142 ((string wrap) wrap2 list) (* arguments *)
143
144
34e49164
C
145(* ------------------------------------------------------------------------- *)
146(* C Type *)
147(* ------------------------------------------------------------------------- *)
148(* Could have more precise type in fullType, in expression, etc, but
149 * it requires to do too much things in parsing such as checking no
150 * conflicting structname, computing value, etc. Better to separate
91eba41f 151 * concern. So I put '=>' to mean what we would really like. In fact
34e49164
C
152 * what we really like is defining another fullType, expression, etc
153 * from scratch, because many stuff are just sugar.
154 *
155 * invariant: Array and FunctionType have also typeQualifier but they
156 * dont have sense. I put this to factorise some code. If you look in
91eba41f
C
157 * the grammar, you see that we can never specify const for the array
158 * himself (but we can do it for pointer) or function, we always
159 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
34e49164
C
160 *
161 *
162 * Because of ExprStatement, we can have more 'new scope' events, but
163 * rare I think. For instance with 'array of constExpression' there can
164 * have an exprStatement and a new (local) struct defined. Same for
165 * Constructor.
166 *
b1b2de81 167 *)
34e49164
C
168
169
170and fullType = typeQualifier * typeC
708f4980 171 and typeC = typeCbis wrap (* todo reput wrap3 *)
34e49164 172
b1b2de81 173 and typeCbis =
34e49164
C
174 | BaseType of baseType
175
176 | Pointer of fullType
177 | Array of constExpression option * fullType
178 | FunctionType of functionType
179
180 | Enum of string option * enumType
181 | StructUnion of structUnion * string option * structType (* new scope *)
182
183 | EnumName of string
184 | StructUnionName of structUnion * string
185
b1b2de81 186 | TypeName of name * fullType option (* semantic: filled later *)
34e49164
C
187
188 | ParenType of fullType (* forunparser: *)
189
b1b2de81 190 (* gccext: TypeOfType below may seems useless; Why declare a
485bce71 191 * __typeof__(int) x; ?
b1b2de81 192 * When used with macros, it allows to fix a problem of C which
34e49164 193 * is that type declaration can be spread around the ident. Indeed it
485bce71
C
194 * may be difficult to have a macro such as
195 * '#define macro(type, ident) type ident;'
196 * because when you want to do a
197 * macro(char[256], x),
198 * then it will generate invalid code, but with a
199 * '#define macro(type, ident) __typeof(type) ident;'
b1b2de81
C
200 * it will work.
201 *)
34e49164
C
202 | TypeOfExpr of expression
203 | TypeOfType of fullType
485bce71
C
204
205 (* cppext: IfdefType TODO *)
34e49164
C
206
207(* -------------------------------------- *)
208 and baseType = Void
209 | IntType of intType
210 | FloatType of floatType
211
212 (* stdC: type section
213 * add a | SizeT ?
214 * note: char and signed char are semantically different!!
215 *)
216 and intType = CChar (* obsolete? | CWchar *)
217 | Si of signed
218
219 and signed = sign * base
220 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
221 and sign = Signed | UnSigned
222
223 and floatType = CFloat | CDouble | CLongDouble
224
225
226 (* -------------------------------------- *)
227 and structUnion = Struct | Union
485bce71 228 and structType = field list
708f4980 229 and field =
485bce71 230 | DeclarationField of field_declaration
b1b2de81 231 (* gccext: *)
708f4980 232 | EmptyField of info
b1b2de81 233
485bce71 234 (* cppext: *)
708f4980
C
235 | MacroDeclField of (string * argument wrap2 list)
236 wrap (* optional ';'*)
485bce71
C
237
238 (* cppext: *)
239 | CppDirectiveStruct of cpp_directive
240 | IfdefStruct of ifdef_directive (* * field list list *)
241
34e49164
C
242
243 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
485bce71
C
244 and field_declaration =
245 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
34e49164
C
246
247 (* At first I thought that a bitfield could be only Signed/Unsigned.
248 * But it seems that gcc allow char i:4. C rule must say that you
249 * can cast into int so enum too, ...
250 *)
b1b2de81
C
251 and fieldkind =
252 | Simple of name option * fullType
253 | BitField of name option * fullType *
254 info (* : *) * constExpression
255 (* fullType => BitFieldInt | BitFieldUnsigned *)
34e49164
C
256
257
258 (* -------------------------------------- *)
b1b2de81 259 and enumType = (name * (info (* = *) * constExpression) option)
34e49164
C
260 wrap2 (* , *) list
261 (* => string * int list *)
262
263
264 (* -------------------------------------- *)
265 (* return * (params * has "...") *)
266 and functionType = fullType * (parameterType wrap2 list * bool wrap)
b1b2de81
C
267 and parameterType =
268 { p_namei: name option;
269 p_register: bool wrap;
270 p_type: fullType;
271 }
272 (* => (bool (register) * fullType) list * bool *)
34e49164
C
273
274
275and typeQualifier = typeQualifierbis wrap
276and typeQualifierbis = {const: bool; volatile: bool}
277
485bce71
C
278(* gccext: cppext: *)
279and attribute = attributebis wrap
280 and attributebis =
281 | Attribute of string
34e49164
C
282
283(* ------------------------------------------------------------------------- *)
284(* C expression *)
285(* ------------------------------------------------------------------------- *)
708f4980 286and expression = (expressionbis * exp_info ref (* semantic: *)) wrap3
485bce71 287 and exp_info = exp_type option * test
0708f913 288 and exp_type = fullType (* Type_c.completed_and_simplified *) * local
485bce71
C
289 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
290 and test = Test | NotTest (* cocci: *)
291
292 and expressionbis =
34e49164
C
293
294 (* Ident can be a enumeration constant, a simple variable, a name of a func.
295 * With cppext, Ident can also be the name of a macro. Sparse says
b1b2de81
C
296 * "an identifier with a meaning is a symbol" *)
297 | Ident of name (* todo? more semantic info such as LocalFunc *)
298
34e49164
C
299 | Constant of constant
300 | FunCall of expression * argument wrap2 (* , *) list
b1b2de81 301 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
34e49164
C
302 | CondExpr of expression * expression option * expression
303
304 (* should be considered as statements, bad C langage *)
faf9a90c
C
305 | Sequence of expression * expression
306 | Assignment of expression * assignOp * expression
34e49164 307
91eba41f
C
308
309 | Postfix of expression * fixOp
310 | Infix of expression * fixOp
311
34e49164
C
312 | Unary of expression * unaryOp
313 | Binary of expression * binaryOp * expression
314
91eba41f
C
315 | ArrayAccess of expression * expression
316
317 (* field ident access *)
b1b2de81
C
318 | RecordAccess of expression * name
319 | RecordPtAccess of expression * name
34e49164
C
320 (* redundant normally, could replace it by DeRef RecordAcces *)
321
322 | SizeOfExpr of expression
323 | SizeOfType of fullType
324 | Cast of fullType * expression
325
326 (* gccext: *)
708f4980 327 | StatementExpr of compound wrap (* ( ) new scope *)
34e49164
C
328 | Constructor of fullType * initialiser wrap2 (* , *) list
329
330 (* forunparser: *)
331 | ParenExpr of expression
332
485bce71
C
333 (* cppext: IfdefExpr TODO *)
334
34e49164 335 (* cppext: normmally just expression *)
708f4980 336 and argument = (expression, weird_argument) Common.either
0708f913 337 and weird_argument =
34e49164
C
338 | ArgType of parameterType
339 | ArgAction of action_macro
340 and action_macro =
485bce71 341 (* todo: ArgStatement of statement, possibly have ghost token *)
34e49164
C
342 | ActMisc of il
343
344
345 (* I put string for Int and Float because int would not be enough because
346 * OCaml int are 31 bits. So simpler to do string. Same reason to have
347 * string instead of int list for the String case.
348 *
b1b2de81 349 * note: -2 is not a constant, it is the unary operator '-'
34e49164
C
350 * applied to constant 2. So the string must represent a positive
351 * integer only. *)
352
353 and constant =
0708f913
C
354 | String of (string * isWchar)
355 | MultiString of string list (* can contain MacroString, todo: more info *)
34e49164 356 | Char of (string * isWchar) (* normally it is equivalent to Int *)
708f4980 357 | Int of (string * intType)
34e49164
C
358 | Float of (string * floatType)
359
360 and isWchar = IsWchar | IsChar
361
485bce71
C
362
363 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
364 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
34e49164
C
365 and assignOp = SimpleAssign | OpAssign of arithOp
366 and fixOp = Dec | Inc
367
368 and binaryOp = Arith of arithOp | Logical of logicalOp
369
370 and arithOp =
371 | Plus | Minus | Mul | Div | Mod
372 | DecLeft | DecRight
373 | And | Or | Xor
374
375 and logicalOp =
376 | Inf | Sup | InfEq | SupEq
377 | Eq | NotEq
378 | AndLog | OrLog
379
380 and constExpression = expression (* => int *)
381
382
383(* ------------------------------------------------------------------------- *)
384(* C statement *)
385(* ------------------------------------------------------------------------- *)
386(* note: that assignement is not a statement but an expression;
387 * wonderful C langage.
388 *
389 * note: I use 'and' for type definition cos gccext allow statement as
b1b2de81
C
390 * expression, so need mutual recursive type definition.
391 *
392 *)
34e49164 393
708f4980 394and statement = statementbis wrap3
b1b2de81 395 and statementbis =
34e49164
C
396 | Labeled of labeled
397 | Compound of compound (* new scope *)
398 | ExprStatement of exprStatement
399 | Selection of selection (* have fakeend *)
400 | Iteration of iteration (* have fakeend *)
401 | Jump of jump
402
403 (* simplify cocci: only at the beginning of a compound normally *)
404 | Decl of declaration
405
406 (* gccext: *)
407 | Asm of asmbody
408 | NestedFunc of definition
409
410 (* cppext: *)
411 | MacroStmt
412
413
414
b1b2de81 415 and labeled = Label of name * statement
34e49164
C
416 | Case of expression * statement
417 | CaseRange of expression * expression * statement (* gccext: *)
418 | Default of statement
419
420 (* cppext:
421 * old: compound = (declaration list * statement list)
422 * old: (declaration, statement) either list
423 * Simplify cocci to just have statement list, by integrating Decl in stmt.
485bce71
C
424 *
425 * update: now introduce also the _sequencable to allow ifdef in the middle.
b1b2de81
C
426 * Indeed, I now allow ifdefs in the ast but they must be only between
427 * "sequencable" elements. They can be put in a type only if this type
428 * is used in a list, like at the toplevel, used in a 'toplevel list',
429 * or inside a compound, used in a 'statement list'. I must not allow
430 * ifdef anywhere. For instance I can not make ifdef a statement
431 * cos some instruction like If accept only one statement and the
432 * ifdef directive must not take the place of a legitimate instruction.
433 * We had a similar phenomena in SmPL where we have the notion
434 * of statement and sequencable statement too. Once you have
435 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
436 * and introduce the ifdef.
437 *
438 * update: those ifdefs are either passed, or present in the AST but in
439 * a flat form. To structure those flat ifdefs you have to run
440 * a transformation that will put in a tree the statements inside
441 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
442 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
443 *
34e49164 444 *)
485bce71
C
445 and compound = statement_sequencable list
446
447 (* cppext: easier to put at statement_list level than statement level *)
448 and statement_sequencable =
449 | StmtElem of statement
b1b2de81 450
485bce71
C
451 (* cppext: *)
452 | CppDirectiveStmt of cpp_directive
453 | IfdefStmt of ifdef_directive
454
455 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
456 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
34e49164
C
457
458 and exprStatement = expression option
459
460 (* for Switch, need check that all elements in the compound start
461 * with a case:, otherwise unreachable code.
462 *)
463 and selection =
464 | If of expression * statement * statement
465 | Switch of expression * statement
485bce71 466
34e49164
C
467
468 and iteration =
469 | While of expression * statement
470 | DoWhile of statement * expression
471 | For of exprStatement wrap * exprStatement wrap * exprStatement wrap *
472 statement
485bce71 473 (* cppext: *)
34e49164
C
474 | MacroIteration of string * argument wrap2 list * statement
475
b1b2de81 476 and jump = Goto of name
34e49164
C
477 | Continue | Break
478 | Return | ReturnExpr of expression
479 | GotoComputed of expression (* gccext: goto *exp ';' *)
480
481
482 (* gccext: *)
483 and asmbody = il (* string list *) * colon wrap (* : *) list
484 and colon = Colon of colon_option wrap2 list
485 and colon_option = colon_option_bis wrap
486 and colon_option_bis = ColonMisc | ColonExpr of expression
487
488
489(* ------------------------------------------------------------------------- *)
490(* Declaration *)
491(* ------------------------------------------------------------------------- *)
492(* (string * ...) option cos can have empty declaration or struct tag
493 * declaration.
494 *
b1b2de81 495 * Before I had a Typedef constructor, but why make this special case and not
34e49164 496 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
485bce71 497 * declarations ? So I try to generalise and not have Typedef either. This
34e49164
C
498 * requires more work in parsing. Better to separate concern.
499 *
500 * Before the need for unparser, I didn't have a DeclList but just a Decl.
501 *
502 * I am not sure what it means to declare a prototype inline, but gcc
503 * accepts it.
504 *)
505
34e49164
C
506and declaration =
507 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
508 (* cppext: *)
708f4980 509 | MacroDecl of (string * argument wrap2 list) wrap (* fakestart *)
34e49164
C
510
511 and onedecl =
b1b2de81 512 { v_namei: (name * (info (* = *) * initialiser) option) option;
485bce71
C
513 v_type: fullType;
514 v_storage: storage;
515 v_local: local_decl; (* cocci: *)
516 v_attr: attribute list; (* gccext: *)
517 }
518 and storage = storagebis * bool (* gccext: inline or not *)
34e49164
C
519 and storagebis = NoSto | StoTypedef | Sto of storageClass
520 and storageClass = Auto | Static | Register | Extern
521
b1b2de81
C
522 and local_decl = LocalDecl | NotLocalDecl
523
34e49164
C
524 and initialiser = initialiserbis wrap
525 and initialiserbis =
526 | InitExpr of expression
527 | InitList of initialiser wrap2 (* , *) list
528 (* gccext: *)
529 | InitDesignators of designator list * initialiser
530 | InitFieldOld of string * initialiser
531 | InitIndexOld of expression * initialiser
532
533 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
534 and designator = designatorbis wrap
535 and designatorbis =
536 | DesignatorField of string
537 | DesignatorIndex of expression
538 | DesignatorRange of expression * expression
539
540(* ------------------------------------------------------------------------- *)
541(* Function definition *)
542(* ------------------------------------------------------------------------- *)
543(* Normally we should define another type functionType2 because there
544 * are more restrictions on what can define a function than a pointer
545 * function. For instance a function declaration can omit the name of the
b1b2de81 546 * parameter whereas a function definition can not. But, in some cases such
34e49164
C
547 * as 'f(void) {', there is no name too, so I simplified and reused the
548 * same functionType type for both declaration and function definition.
b1b2de81
C
549 *
550 * Also old style C does not have type in the parameter, so again simpler
551 * to abuse the functionType and allow missing type.
34e49164 552 *)
b1b2de81 553and definition = definitionbis wrap (* ( ) { } fakestart sto *)
485bce71 554 and definitionbis =
b1b2de81 555 { f_name: name;
708f4980 556 f_type: functionType; (* less? a functionType2 ? *)
485bce71
C
557 f_storage: storage;
558 f_body: compound;
559 f_attr: attribute list; (* gccext: *)
91eba41f 560 f_old_c_style: declaration list option;
485bce71
C
561 }
562 (* cppext: IfdefFunHeader TODO *)
34e49164
C
563
564(* ------------------------------------------------------------------------- *)
485bce71 565(* cppext: cpp directives, #ifdef, #define and #include body *)
34e49164 566(* ------------------------------------------------------------------------- *)
485bce71 567and cpp_directive =
485bce71 568 | Define of define
b1b2de81 569 | Include of includ
485bce71
C
570 | Undef of string wrap
571 | PragmaAndCo of il
b1b2de81 572(*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
485bce71 573
708f4980 574and define = string wrap (* #define s eol *) * (define_kind * define_val)
34e49164
C
575 and define_kind =
576 | DefineVar
485bce71 577 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
34e49164 578 and define_val =
b1b2de81
C
579 (* most common case; e.g. to define int constant *)
580 | DefineExpr of expression
91eba41f 581
34e49164
C
582 | DefineStmt of statement
583 | DefineType of fullType
485bce71 584 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
91eba41f 585
34e49164 586 | DefineFunction of definition
485bce71 587 | DefineInit of initialiser (* in practice only { } with possible ',' *)
b1b2de81 588
485bce71
C
589 (* TODO DefineMulti of define_val list *)
590
34e49164
C
591 | DefineText of string wrap
592 | DefineEmpty
593
485bce71 594 | DefineTodo
34e49164
C
595
596
485bce71
C
597
598and includ =
599 { i_include: inc_file wrap; (* #include s *)
600 (* cocci: computed in ? *)
601 i_rel_pos: include_rel_pos option ref;
602 (* cocci: cf -test incl *)
603 i_is_in_ifdef: bool;
604 (* cf cpp_ast_c.ml. set to None at parsing time. *)
605 i_content: (Common.filename (* full path *) * program) option;
606 }
34e49164
C
607 and inc_file =
608 | Local of inc_elem list
609 | NonLocal of inc_elem list
0708f913 610 | Weird of string (* ex: #include SYSTEM_H *)
34e49164
C
611 and inc_elem = string
612
485bce71
C
613 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
614 *
615 * The first_of and last_of store the list of prefixes that was
616 * introduced by the include. On #include <a/b/x>, if the include was
617 * the first in the file, it would give in first_of the following
618 * prefixes a/b/c; a/b/; a/ ; <empty>
619 *
620 * This is set after parsing, in cocci.ml, in update_rel_pos.
621 *)
34e49164 622 and include_rel_pos = {
485bce71
C
623 first_of : string list list;
624 last_of : string list list;
34e49164
C
625 }
626
485bce71
C
627
628
b1b2de81
C
629(* todo? to specialize if someone need more info *)
630and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
631 | IfdefDirective of (ifdefkind * matching_tag) wrap
632 and ifdefkind =
633 | Ifdef (* todo? of string ? of formula_cpp ? *)
634 | IfdefElseif (* same *)
635 | IfdefElse (* same *)
636 | IfdefEndif
637 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
638 * a global so it means if you parse the same file twice you may get
639 * different id. I try now to avoid this pb by resetting it each
640 * time I parse a file.
641 *)
642 and matching_tag =
643 IfdefTag of (int (* tag *) * int (* total with this tag *))
644
645
485bce71
C
646
647
648
34e49164
C
649(* ------------------------------------------------------------------------- *)
650(* The toplevels elements *)
651(* ------------------------------------------------------------------------- *)
652and toplevel =
653 | Declaration of declaration
654 | Definition of definition
655
656 (* cppext: *)
485bce71
C
657 | CppTop of cpp_directive
658 | IfdefTop of ifdef_directive (* * toplevel list *)
659
34e49164
C
660 (* cppext: *)
661 | MacroTop of string * argument wrap2 list * il
662
663 | EmptyDef of il (* gccext: allow redundant ';' *)
664 | NotParsedCorrectly of il
665
34e49164
C
666 | FinalDef of info (* EOF *)
667
668(* ------------------------------------------------------------------------- *)
669and program = toplevel list
670
34e49164
C
671(*****************************************************************************)
672(* Cocci Bindings *)
673(*****************************************************************************)
674(* Was previously in pattern.ml, but because of the transformer,
675 * we need to decorate each token with some cocci code AND the environment
676 * for this cocci code.
677 *)
678and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
679 and metavar_binding_kind =
680 | MetaIdVal of string
681 | MetaFuncVal of string
682 | MetaLocalFuncVal of string
683
684 | MetaExprVal of expression (* a "clean expr" *)
685 | MetaExprListVal of argument wrap2 list
686 | MetaParamVal of parameterType
687 | MetaParamListVal of parameterType wrap2 list
688
689 | MetaTypeVal of fullType
113803cf 690 | MetaInitVal of initialiser
34e49164
C
691 | MetaStmtVal of statement
692
693 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
694 * because don't need to have the value for a position in the env of
695 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
696 * variables accessible via SmPL whereas the position can be one day
697 * so I think it's better to put MetaPosVal here *)
698 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
485bce71
C
699 | MetaPosValList of
700 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
34e49164
C
701 | MetaListlenVal of int
702
703
704(*****************************************************************************)
705(* C comments *)
706(*****************************************************************************)
707
485bce71
C
708(* convention: I often use "m" for comments as I can not use "c"
709 * (already use for c stuff) and "com" is too long.
34e49164
C
710 *)
711
0708f913
C
712(* this type will be associated to each token.
713 *)
34e49164 714and comments_around = {
0708f913
C
715 mbefore: Token_c.comment_like_token list;
716 mafter: Token_c.comment_like_token list;
708f4980
C
717
718 (* less: could remove ? do something simpler than CComment for
719 * coccinelle, cf above. *)
720 mbefore2: comment_and_relative_pos list;
721 mafter2: comment_and_relative_pos list;
722 }
34e49164
C
723 and comment_and_relative_pos = {
724
725 minfo: Common.parse_info;
726 (* the int represent the number of lines of difference between the
727 * current token and the comment. When on same line, this number is 0.
728 * When previous line, -1. In some way the after/before in previous
729 * record is useless because the sign of the integer can helps
730 * do the difference too, but I keep it that way.
731 *)
732 mpos: int;
733 (* todo?
734 * cppbetween: bool; touse? if false positive
735 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
736 *)
708f4980 737 }
34e49164
C
738
739and comment = Common.parse_info
740and com = comment list ref
34e49164 741
708f4980 742 (* with sexp *)
34e49164
C
743
744
745(*****************************************************************************)
746(* Some constructors *)
747(*****************************************************************************)
748let nullQualif = ({const=false; volatile= false}, [])
749let nQ = nullQualif
750
751let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
752
753let noType () = ref (None,NotTest)
754let noInstr = (ExprStatement (None), [])
755let noTypedefDef () = None
756
757let emptyMetavarsBinding =
758 ([]: metavars_binding)
759
708f4980 760let emptyAnnotCocci =
34e49164
C
761 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
762 emptyMetavarsBinding)
763
708f4980
C
764let emptyAnnot =
765 (None: (Ast_cocci.mcodekind * metavars_binding) option)
766
767(* compatibility mode *)
768let mcode_and_env_of_cocciref aref =
769 match !aref with
770 | Some x -> x
771 | None -> emptyAnnotCocci
772
773
34e49164
C
774let emptyComments= {
775 mbefore = [];
776 mafter = [];
708f4980
C
777 mbefore2 = [];
778 mafter2 = [];
34e49164
C
779}
780
781
782(* for include, some meta information needed by cocci *)
783let noRelPos () =
784 ref (None: include_rel_pos option)
785let noInIfdef () =
786 ref false
787
788
789(* When want add some info in ast that does not correspond to
790 * an existing C element.
791 * old: or when don't want 'synchronize' on it in unparse_c.ml
792 * (now have other mark for tha matter).
793 *)
794let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
795
796let fakeInfo pi =
797 { pinfo = FakeTok ("",no_virt_pos);
798 cocci_tag = ref emptyAnnot;
799 comments_tag = ref emptyComments;
800 }
801
485bce71
C
802let noii = []
803let noattr = []
804let noi_content = (None: ((Common.filename * program) option))
34e49164
C
805
806(*****************************************************************************)
807(* Wrappers *)
808(*****************************************************************************)
809let unwrap = fst
810
113803cf 811let unwrap2 = fst
34e49164
C
812
813let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
814let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
815
708f4980
C
816let unwrap_typeC (qu, (typeC, ii)) = typeC
817let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
818
819let unwrap_typeCbis (typeC, ii) = typeC
820
821let unwrap_st (unwrap_st, ii) = unwrap_st
822
823(* ------------------------------------------------------------------------- *)
824let mk_e unwrap_e ii = (unwrap_e, noType()), ii
825let mk_e_bis unwrap_e ty ii = (unwrap_e, ty), ii
826
827let mk_ty typeC ii = nQ, (typeC, ii)
828let mk_tybis typeC ii = (typeC, ii)
829
830let mk_st unwrap_st ii = (unwrap_st, ii)
831
832(* ------------------------------------------------------------------------- *)
833let get_ii_typeC_take_care (typeC, ii) = ii
834let get_ii_st_take_care (st, ii) = ii
835let get_ii_expr_take_care (e, ii) = ii
836
837let get_st_and_ii (st, ii) = st, ii
838let get_ty_and_ii (qu, (typeC, ii)) = qu, (typeC, ii)
839let get_e_and_ii (e, ii) = e, ii
840
841
842(* ------------------------------------------------------------------------- *)
34e49164
C
843let get_type_expr ((unwrap_e, typ), iie) = !typ
844let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
845 oldtyp := newtyp
846 (* old: (unwrap_e, newtyp), iie *)
847
91eba41f
C
848let get_onlytype_expr ((unwrap_e, typ), iie) =
849 match !typ with
850 | Some (ft,_local), _test -> Some ft
851 | None, _ -> None
852
0708f913
C
853let get_onlylocal_expr ((unwrap_e, typ), iie) =
854 match !typ with
855 | Some (ft,local), _test -> Some local
856 | None, _ -> None
857
91eba41f 858(* ------------------------------------------------------------------------- *)
34e49164
C
859let rewrap_str s ii =
860 {ii with pinfo =
861 (match ii.pinfo with
862 OriginTok pi -> OriginTok { pi with Common.str = s;}
863 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
864 | FakeTok (_,vpi) -> FakeTok (s,vpi)
865 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
866
867let rewrap_pinfo pi ii =
868 {ii with pinfo = pi}
869
708f4980
C
870
871
34e49164
C
872(* info about the current location *)
873let get_pi = function
874 OriginTok pi -> pi
875 | ExpandedTok (_,(pi,_)) -> pi
876 | FakeTok (_,(pi,_)) -> pi
877 | AbstractLineTok pi -> pi
878
879(* original info *)
880let get_opi = function
881 OriginTok pi -> pi
708f4980 882 | ExpandedTok (pi,_) -> pi (* diff with get_pi *)
34e49164
C
883 | FakeTok (_,_) -> failwith "no position information"
884 | AbstractLineTok pi -> pi
885
34e49164
C
886let str_of_info ii =
887 match ii.pinfo with
888 OriginTok pi -> pi.Common.str
889 | ExpandedTok (pi,_) -> pi.Common.str
890 | FakeTok (s,_) -> s
891 | AbstractLineTok pi -> pi.Common.str
892
893let get_info f ii =
894 match ii.pinfo with
895 OriginTok pi -> f pi
896 | ExpandedTok (_,(pi,_)) -> f pi
897 | FakeTok (_,(pi,_)) -> f pi
898 | AbstractLineTok pi -> f pi
899
900let get_orig_info f ii =
901 match ii.pinfo with
902 OriginTok pi -> f pi
708f4980 903 | ExpandedTok (pi,_) -> f pi (* diff with get_info *)
34e49164
C
904 | FakeTok (_,(pi,_)) -> f pi
905 | AbstractLineTok pi -> f pi
906
907let make_expanded ii =
908 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
909
910let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
911let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
912let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
913let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
914let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
708f4980 915let mcode_of_info ii = fst (mcode_and_env_of_cocciref ii.cocci_tag)
34e49164
C
916let pinfo_of_info ii = ii.pinfo
917let parse_info_of_info ii = get_pi ii.pinfo
918
485bce71
C
919let is_fake ii =
920 match ii.pinfo with
921 FakeTok (_,_) -> true
922 | _ -> false
923
924let is_origintok ii =
925 match ii.pinfo with
926 | OriginTok pi -> true
927 | _ -> false
928
91eba41f 929(* ------------------------------------------------------------------------- *)
34e49164 930type posrv = Real of Common.parse_info | Virt of virtual_position
485bce71 931
34e49164
C
932let compare_pos ii1 ii2 =
933 let get_pos = function
934 OriginTok pi -> Real pi
935 | FakeTok (s,vpi) -> Virt vpi
936 | ExpandedTok (pi,vpi) -> Virt vpi
937 | AbstractLineTok pi -> Real pi in (* used for printing *)
938 let pos1 = get_pos (pinfo_of_info ii1) in
939 let pos2 = get_pos (pinfo_of_info ii2) in
940 match (pos1,pos2) with
faf9a90c
C
941 (Real p1, Real p2) ->
942 compare p1.Common.charpos p2.Common.charpos
34e49164 943 | (Virt (p1,_), Real p2) ->
b1b2de81 944 if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1
34e49164 945 | (Real p1, Virt (p2,_)) ->
b1b2de81 946 if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1)
34e49164
C
947 | (Virt (p1,o1), Virt (p2,o2)) ->
948 let poi1 = p1.Common.charpos in
949 let poi2 = p2.Common.charpos in
950 match compare poi1 poi2 with
951 -1 -> -1
952 | 0 -> compare o1 o2
953 | x -> x
954
955let equal_posl (l1,c1) (l2,c2) =
956 (l1 =|= l2) && (c1 =|= c2)
957
958let info_to_fixpos ii =
959 match pinfo_of_info ii with
960 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
961 | ExpandedTok (_,(pi,offset)) ->
962 Ast_cocci.Virt (pi.Common.charpos,offset)
963 | FakeTok (_,(pi,offset)) ->
964 Ast_cocci.Virt (pi.Common.charpos,offset)
965 | AbstractLineTok pi -> failwith "unexpected abstract"
966
485bce71 967(* cocci: *)
34e49164 968let is_test (e : expression) =
708f4980 969 let (_,info), _ = e in
34e49164 970 let (_,test) = !info in
b1b2de81 971 test =*= Test
34e49164
C
972
973(*****************************************************************************)
974(* Abstract line *)
975(*****************************************************************************)
976
977(* When we have extended the C Ast to add some info to the tokens,
978 * such as its line number in the file, we can not use anymore the
979 * ocaml '=' to compare Ast elements. To overcome this problem, to be
980 * able to use again '=', we just have to get rid of all those extra
981 * information, to "abstract those line" (al) information.
91eba41f
C
982 *
983 * Julia then modifies it a little to have a tokenindex, so the original
984 * true al_info is in fact real_al_info.
34e49164
C
985 *)
986
987let al_info tokenindex x =
988 { pinfo =
989 (AbstractLineTok
990 {charpos = tokenindex;
991 line = tokenindex;
992 column = tokenindex;
993 file = "";
994 str = str_of_info x});
995 cocci_tag = ref emptyAnnot;
996 comments_tag = ref emptyComments;
997 }
998
999let semi_al_info x =
1000 { x with
1001 cocci_tag = ref emptyAnnot;
1002 comments_tag = ref emptyComments;
1003 }
1004
91eba41f
C
1005let magic_real_number = -10
1006
1007let real_al_info x =
1008 { pinfo =
1009 (AbstractLineTok
1010 {charpos = magic_real_number;
1011 line = magic_real_number;
1012 column = magic_real_number;
1013 file = "";
1014 str = str_of_info x});
1015 cocci_tag = ref emptyAnnot;
1016 comments_tag = ref emptyComments;
1017 }
1018
b1b2de81
C
1019let al_comments x =
1020 let keep_cpp l =
1021 List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in
1022 let al_com (x,i) =
1023 (x,{i with Common.charpos = magic_real_number;
1024 Common.line = magic_real_number;
1025 Common.column = magic_real_number}) in
1026 {mbefore = []; (* duplicates mafter of the previous token *)
708f4980
C
1027 mafter = List.map al_com (keep_cpp x.mafter);
1028 mbefore2=[];
1029 mafter2=[];
1030 }
b1b2de81
C
1031
1032let al_info_cpp tokenindex x =
1033 { pinfo =
1034 (AbstractLineTok
1035 {charpos = tokenindex;
1036 line = tokenindex;
1037 column = tokenindex;
1038 file = "";
1039 str = str_of_info x});
1040 cocci_tag = ref emptyAnnot;
1041 comments_tag = ref (al_comments !(x.comments_tag));
1042 }
1043
1044let semi_al_info_cpp x =
1045 { x with
1046 cocci_tag = ref emptyAnnot;
1047 comments_tag = ref (al_comments !(x.comments_tag));
1048 }
1049
1050let real_al_info_cpp x =
1051 { pinfo =
1052 (AbstractLineTok
1053 {charpos = magic_real_number;
1054 line = magic_real_number;
1055 column = magic_real_number;
1056 file = "";
1057 str = str_of_info x});
1058 cocci_tag = ref emptyAnnot;
1059 comments_tag = ref (al_comments !(x.comments_tag));
1060 }
1061
91eba41f 1062
34e49164
C
1063(*****************************************************************************)
1064(* Views *)
1065(*****************************************************************************)
1066
1067(* Transform a list of arguments (or parameters) where the commas are
1068 * represented via the wrap2 and associated with an element, with
1069 * a list where the comma are on their own. f(1,2,2) was
1070 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1071 *
1072 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1073 * the ast_cocci of julia and ast_c.
1074 *)
1075let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
1076 function
1077 | [] -> []
1078 | (e, ii)::xs ->
1079 if null ii
1080 then (Left e)::split_comma xs
1081 else Right ii::Left e::split_comma xs
1082
1083let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
1084 function
1085 | [] -> []
1086 | Right ii::Left e::xs ->
1087 (e, ii)::unsplit_comma xs
1088 | Left e::xs ->
1089 let empty_ii = [] in
1090 (e, empty_ii)::unsplit_comma xs
1091 | Right ii::_ ->
1092 raise Impossible
1093
1094
1095
1096
485bce71
C
1097(*****************************************************************************)
1098(* Helpers, could also be put in lib_parsing_c.ml instead *)
1099(*****************************************************************************)
1100
91eba41f
C
1101(* should maybe be in pretty_print_c ? *)
1102
485bce71
C
1103let s_of_inc_file inc_file =
1104 match inc_file with
1105 | Local xs -> xs +> Common.join "/"
1106 | NonLocal xs -> xs +> Common.join "/"
0708f913 1107 | Weird s -> s
485bce71
C
1108
1109let s_of_inc_file_bis inc_file =
1110 match inc_file with
1111 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
1112 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
0708f913 1113 | Weird s -> s
485bce71
C
1114
1115let fieldname_of_fieldkind fieldkind =
b1b2de81 1116 match fieldkind with
485bce71 1117 | Simple (sopt, ft) -> sopt
b1b2de81 1118 | BitField (sopt, ft, info, expr) -> sopt
485bce71 1119
91eba41f
C
1120
1121let s_of_attr attr =
1122 attr
1123 +> List.map (fun (Attribute s, ii) -> s)
1124 +> Common.join ","
113803cf 1125
708f4980
C
1126
1127(* ------------------------------------------------------------------------- *)
b1b2de81
C
1128let str_of_name ident =
1129 match ident with
1130 | RegularName (s,ii) -> s
1131 | CppConcatenatedName xs ->
1132 xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##"
1133 | CppVariadicName (s, ii) -> "##" ^ s
1134 | CppIdentBuilder ((s,iis), xs) ->
1135 s ^ "(" ^
1136 (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^
1137 ")"
1138
708f4980
C
1139let get_s_and_ii_of_name name =
1140 match name with
1141 | RegularName (s, iis) -> s, iis
1142 | CppIdentBuilder ((s, iis), xs) -> s, iis
1143 | CppVariadicName (s,iis) ->
1144 let (iop, iis) = Common.tuple_of_list2 iis in
1145 s, [iis]
b1b2de81
C
1146 | CppConcatenatedName xs ->
1147 (match xs with
1148 | [] -> raise Impossible
708f4980
C
1149 | ((s,iis),noiiop)::xs ->
1150 s, iis
b1b2de81 1151 )
b1b2de81 1152
708f4980
C
1153let get_s_and_info_of_name name =
1154 let (s,ii) = get_s_and_ii_of_name name in
1155 s, List.hd ii
1156
1157let info_of_name name =
1158 let (s,ii) = get_s_and_ii_of_name name in
1159 List.hd ii
1160
1161let ii_of_name name =
1162 let (s,ii) = get_s_and_ii_of_name name in
1163 ii
1164
b1b2de81
C
1165
1166
708f4980
C
1167let get_local_ii_of_expr_inlining_ii_of_name e =
1168 let (ebis,_),ii = e in
1169 match ebis, ii with
1170 | Ident name, noii ->
1171 assert(null noii);
1172 ii_of_name name
1173 | RecordAccess (e, name), ii ->
1174 ii @ ii_of_name name
1175 | RecordPtAccess (e, name), ii ->
1176 ii @ ii_of_name name
1177 | _, ii -> ii
1178
1179
1180let get_local_ii_of_tybis_inlining_ii_of_name ty =
1181 match ty with
1182 | TypeName (name, _typ), [] -> ii_of_name name
1183 | _, ii -> ii
1184
1185(* only Label and Goto have name *)
1186let get_local_ii_of_st_inlining_ii_of_name st =
1187 match st with
1188 | Labeled (Label (name, st)), ii -> ii_of_name name @ ii
1189 | Jump (Goto name), ii ->
1190 let (i1, i3) = Common.tuple_of_list2 ii in
1191 [i1] @ ii_of_name name @ [i3]
1192 | _, ii -> ii
1193
1194
1195
1196(* ------------------------------------------------------------------------- *)
0708f913 1197let name_of_parameter param =
b1b2de81
C
1198 param.p_namei +> Common.map_option (str_of_name)
1199