Coccinelle release 0.2.5-rc7.
[bpt/coccinelle.git] / parsing_c / ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15 open Common
16
17 (*****************************************************************************)
18 (* The AST C related types *)
19 (*****************************************************************************)
20 (*
21 * Some stuff are tagged semantic: which means that they are computed
22 * after parsing.
23 *
24 * This means that some elements in this AST are present only if
25 * some annotation/transformation has been done on the original AST returned
26 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
27 *)
28
29
30 (* ------------------------------------------------------------------------- *)
31 (* Token/info *)
32 (* ------------------------------------------------------------------------- *)
33
34 (* To allow some transformations over the AST, we keep as much information
35 * as possible in the AST such as the tokens content and their locations.
36 * Those info are called 'info' (how original) and can be tagged.
37 * For instance one tag may say that the unparser should remove this token.
38 *
39 * Update: Now I use a ref! in those 'info' so take care.
40 * That means that modifications of the info of tokens can have
41 * an effect on the info stored in the ast (which is sometimes
42 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
43 *
44 * convention: I often use 'ii' for the name of a list of info.
45 *
46 * Sometimes we want to add someting at the beginning or at the end
47 * of a construct. For 'function' and 'decl' we want to add something
48 * to their left and for 'if' 'while' et 'for' and so on at their right.
49 * We want some kinds of "virtual placeholders" that represent the start or
50 * end of a construct. We use fakeInfo for that purpose.
51 * To identify those cases I have added a fakestart/fakeend comment.
52 *
53 * cocci: Each token will be decorated in the future by the mcodekind
54 * of cocci. It is the job of the pretty printer to look at this
55 * information and decide to print or not the token (and also the
56 * pending '+' associated sometimes with the token).
57 *
58 * The first time that we parse the original C file, the mcodekind is
59 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
60 * associated. This is what I call a "clean" expr/statement/....
61 *
62 * Each token will also be decorated in the future with an environment,
63 * because the pending '+' may contain metavariables that refer to some
64 * C code.
65 *
66 *)
67
68 (* for unparser: *)
69
70 type posl = int * int (* line-col, for MetaPosValList, for position variables *)
71 (* with sexp *)
72
73 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
74 type virtual_position = Common.parse_info * int (* character offset *)
75 (* with sexp *)
76
77 type parse_info =
78 (* Present both in ast and list of tokens *)
79 | OriginTok of Common.parse_info
80 (* Present only in ast and generated after parsing. Used mainly
81 * by Julia, to add stuff at virtual places, beginning of func or decl *)
82 | FakeTok of string * virtual_position
83 (* Present both in ast and list of tokens. *)
84 | ExpandedTok of Common.parse_info * virtual_position
85
86 (* Present neither in ast nor in list of tokens
87 * but only in the '+' of the mcode of some tokens. Those kind of tokens
88 * are used to be able to use '=' to compare big ast portions.
89 *)
90 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
91 (* with sexp *)
92
93 type info = {
94 pinfo : parse_info;
95
96 (* this cocci_tag can be changed, which is how we can express some program
97 * transformations by tagging the tokens involved in this transformation.
98 *)
99 cocci_tag: (Ast_cocci.mcodekind * metavars_binding list) option ref;
100 (* set in comment_annotater_c.ml *)
101 comments_tag: comments_around ref;
102
103 (* todo? token_info : sometimes useful to know what token it was *)
104 }
105 and il = info list
106
107 (* wrap2 is like wrap, except that I use it often for separator such
108 * as ','. In that case the info is associated to the argument that
109 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])].
110 *
111 * wrap3 is like wrap, except that I use it in case sometimes it
112 * will be empty because the info will be included in a nested
113 * entity (e.g. for Ident in expr because it's inlined in the name)
114 * so user should never assume List.length wrap3 > 0.
115 *)
116 and 'a wrap = 'a * il
117 and 'a wrap2 = 'a * il
118 and 'a wrap3 = 'a * il (* * evotype*)
119
120 (* ------------------------------------------------------------------------- *)
121 (* Name *)
122 (* ------------------------------------------------------------------------- *)
123
124 (* was called 'ident' before, but 'name' is I think better
125 * as concatenated strings can be used not only for identifiers and for
126 * declarators, but also for fields, for labels, etc.
127 *
128 * Note: because now the info is embeded in the name, the info for
129 * expression like Ident, or types like Typename, are not anymore
130 * stored in the expression or type. Hence if you assume this,
131 * which was true before, you are now wrong. So never write code like
132 * let (unwrape,_), ii = e and use 'ii' believing it contains
133 * the local ii to e. If you want to do that, use the appropiate
134 * wrapper get_local_ii_of_expr_inlining_ii_of_name.
135 *)
136 and name =
137 | RegularName of string wrap
138 | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list
139 (* normally only used inside list of things, as in parameters or arguments
140 * in which case, cf cpp-manual, it has a special meaning *)
141 | CppVariadicName of string wrap (* ## s *)
142 | CppIdentBuilder of string wrap (* s ( ) *) *
143 ((string wrap) wrap2 list) (* arguments *)
144
145
146 (* ------------------------------------------------------------------------- *)
147 (* C Type *)
148 (* ------------------------------------------------------------------------- *)
149 (* Could have more precise type in fullType, in expression, etc, but
150 * it requires to do too much things in parsing such as checking no
151 * conflicting structname, computing value, etc. Better to separate
152 * concern. So I put '=>' to mean what we would really like. In fact
153 * what we really like is defining another fullType, expression, etc
154 * from scratch, because many stuff are just sugar.
155 *
156 * invariant: Array and FunctionType have also typeQualifier but they
157 * dont have sense. I put this to factorise some code. If you look in
158 * the grammar, you see that we can never specify const for the array
159 * himself (but we can do it for pointer) or function, we always
160 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
161 *
162 *
163 * Because of ExprStatement, we can have more 'new scope' events, but
164 * rare I think. For instance with 'array of constExpression' there can
165 * have an exprStatement and a new (local) struct defined. Same for
166 * Constructor.
167 *
168 *)
169
170
171 and fullType = typeQualifier * typeC
172 and typeC = typeCbis wrap (* todo reput wrap3 *)
173
174 and typeCbis =
175 NoType (* for c++ only *)
176 | BaseType of baseType
177
178 | Pointer of fullType
179 | Array of constExpression option * fullType
180 | FunctionType of functionType
181
182 | Enum of string option * enumType
183 | StructUnion of structUnion * string option * structType (* new scope *)
184
185 | EnumName of string
186 | StructUnionName of structUnion * string
187
188 | TypeName of name * fullType option (* semantic: filled later *)
189
190 | ParenType of fullType (* for unparser: *)
191
192 (* gccext: TypeOfType below may seems useless; Why declare a
193 * __typeof__(int) x; ?
194 * When used with macros, it allows to fix a problem of C which
195 * is that type declaration can be spread around the ident. Indeed it
196 * may be difficult to have a macro such as
197 * '#define macro(type, ident) type ident;'
198 * because when you want to do a
199 * macro(char[256], x),
200 * then it will generate invalid code, but with a
201 * '#define macro(type, ident) __typeof(type) ident;'
202 * it will work.
203 *)
204 | TypeOfExpr of expression
205 | TypeOfType of fullType
206
207 (* cppext: IfdefType TODO *)
208
209 (* -------------------------------------- *)
210 and baseType = Void
211 | IntType of intType
212 | FloatType of floatType
213 | SizeType
214 | SSizeType
215 | PtrDiffType
216
217 (* stdC: type section
218 * add a | SizeT ?
219 * note: char and signed char are semantically different!!
220 *)
221 and intType = CChar (* obsolete? | CWchar *)
222 | Si of signed
223
224 and signed = sign * base
225 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
226 and sign = Signed | UnSigned
227
228 and floatType = CFloat | CDouble | CLongDouble
229
230
231 (* -------------------------------------- *)
232 and structUnion = Struct | Union
233 and structType = field list
234 and field =
235 | DeclarationField of field_declaration
236 (* gccext: *)
237 | EmptyField of info
238
239 (* cppext: *)
240 | MacroDeclField of (string * argument wrap2 list)
241 wrap (* optional ';'*)
242
243 (* cppext: *)
244 | CppDirectiveStruct of cpp_directive
245 | IfdefStruct of ifdef_directive (* * field list list *)
246
247
248 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
249 and field_declaration =
250 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
251
252 (* At first I thought that a bitfield could be only Signed/Unsigned.
253 * But it seems that gcc allow char i:4. C rule must say that you
254 * can cast into int so enum too, ...
255 *)
256 and fieldkind =
257 | Simple of name option * fullType
258 | BitField of name option * fullType *
259 info (* : *) * constExpression
260 (* fullType => BitFieldInt | BitFieldUnsigned *)
261
262
263 (* -------------------------------------- *)
264 and enumType = oneEnumType wrap2 (* , *) list
265 (* => string * int list *)
266
267 and oneEnumType = name * (info (* = *) * constExpression) option
268
269 (* -------------------------------------- *)
270 (* return * (params * has "...") *)
271 and functionType = fullType * (parameterType wrap2 list * bool wrap)
272 and parameterType =
273 { p_namei: name option;
274 p_register: bool wrap;
275 p_type: fullType;
276 }
277 (* => (bool (register) * fullType) list * bool *)
278
279
280 and typeQualifier = typeQualifierbis wrap
281 and typeQualifierbis = {const: bool; volatile: bool}
282
283 (* gccext: cppext: *)
284 and attribute = attributebis wrap
285 and attributebis =
286 | Attribute of string
287
288 (* ------------------------------------------------------------------------- *)
289 (* C expression *)
290 (* ------------------------------------------------------------------------- *)
291 and expression = (expressionbis * exp_info ref (* semantic: *)) wrap3
292 and exp_info = exp_type option * test
293 and exp_type = fullType (* Type_c.completed_and_simplified *) * local
294 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
295 and test = Test | NotTest (* cocci: *)
296
297 and expressionbis =
298
299 (* Ident can be a enumeration constant, a simple variable, a name of a func.
300 * With cppext, Ident can also be the name of a macro. Sparse says
301 * "an identifier with a meaning is a symbol" *)
302 | Ident of name (* todo? more semantic info such as LocalFunc *)
303
304 | Constant of constant
305 | FunCall of expression * argument wrap2 (* , *) list
306 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
307 | CondExpr of expression * expression option * expression
308
309 (* should be considered as statements, bad C langage *)
310 | Sequence of expression * expression
311 | Assignment of expression * assignOp * expression
312
313
314 | Postfix of expression * fixOp
315 | Infix of expression * fixOp
316
317 | Unary of expression * unaryOp
318 | Binary of expression * binaryOp * expression
319
320 | ArrayAccess of expression * expression
321
322 (* field ident access *)
323 | RecordAccess of expression * name
324 | RecordPtAccess of expression * name
325 (* redundant normally, could replace it by DeRef RecordAcces *)
326
327 | SizeOfExpr of expression
328 | SizeOfType of fullType
329 | Cast of fullType * expression
330
331 (* gccext: *)
332 | StatementExpr of compound wrap (* ( ) new scope *)
333 | Constructor of fullType * initialiser wrap2 (* , *) list
334
335 (* for unparser: *)
336 | ParenExpr of expression
337
338 (* for C++: *)
339 | New of argument
340 | Delete of expression
341
342 (* cppext: IfdefExpr TODO *)
343
344 (* cppext: normally just expression *)
345 and argument = (expression, weird_argument) Common.either
346 and weird_argument =
347 | ArgType of parameterType
348 | ArgAction of action_macro
349 and action_macro =
350 (* todo: ArgStatement of statement, possibly have ghost token *)
351 | ActMisc of il
352
353
354 (* I put string for Int and Float because int would not be enough because
355 * OCaml int are 31 bits. So simpler to do string. Same reason to have
356 * string instead of int list for the String case.
357 *
358 * note: -2 is not a constant, it is the unary operator '-'
359 * applied to constant 2. So the string must represent a positive
360 * integer only. *)
361
362 and constant =
363 | String of (string * isWchar)
364 | MultiString of string list (* can contain MacroString, todo: more info *)
365 | Char of (string * isWchar) (* normally it is equivalent to Int *)
366 | Int of (string * intType)
367 | Float of (string * floatType)
368
369 and isWchar = IsWchar | IsChar
370
371
372 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
373 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
374 and assignOp = SimpleAssign | OpAssign of arithOp
375 and fixOp = Dec | Inc
376
377 and binaryOp = Arith of arithOp | Logical of logicalOp
378
379 and arithOp =
380 | Plus | Minus | Mul | Div | Mod
381 | DecLeft | DecRight
382 | And | Or | Xor
383
384 and logicalOp =
385 | Inf | Sup | InfEq | SupEq
386 | Eq | NotEq
387 | AndLog | OrLog
388
389 and constExpression = expression (* => int *)
390
391 (* ------------------------------------------------------------------------- *)
392 (* C statement *)
393 (* ------------------------------------------------------------------------- *)
394 (* note: that assignement is not a statement but an expression;
395 * wonderful C langage.
396 *
397 * note: I use 'and' for type definition cos gccext allow statement as
398 * expression, so need mutual recursive type definition.
399 *
400 *)
401
402 and statement = statementbis wrap3
403 and statementbis =
404 | Labeled of labeled
405 | Compound of compound (* new scope *)
406 | ExprStatement of exprStatement
407 | Selection of selection (* have fakeend *)
408 | Iteration of iteration (* have fakeend *)
409 | Jump of jump
410
411 (* simplify cocci: only at the beginning of a compound normally *)
412 | Decl of declaration
413
414 (* gccext: *)
415 | Asm of asmbody
416 | NestedFunc of definition
417
418 (* cppext: *)
419 | MacroStmt
420
421
422
423 and labeled = Label of name * statement
424 | Case of expression * statement
425 | CaseRange of expression * expression * statement (* gccext: *)
426 | Default of statement
427
428 (* cppext:
429 * old: compound = (declaration list * statement list)
430 * old: (declaration, statement) either list
431 * Simplify cocci to just have statement list, by integrating Decl in stmt.
432 *
433 * update: now introduce also the _sequencable to allow ifdef in the middle.
434 * Indeed, I now allow ifdefs in the ast but they must be only between
435 * "sequencable" elements. They can be put in a type only if this type
436 * is used in a list, like at the toplevel, used in a 'toplevel list',
437 * or inside a compound, used in a 'statement list'. I must not allow
438 * ifdef anywhere. For instance I can not make ifdef a statement
439 * cos some instruction like If accept only one statement and the
440 * ifdef directive must not take the place of a legitimate instruction.
441 * We had a similar phenomena in SmPL where we have the notion
442 * of statement and sequencable statement too. Once you have
443 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
444 * and introduce the ifdef.
445 *
446 * update: those ifdefs are either passed, or present in the AST but in
447 * a flat form. To structure those flat ifdefs you have to run
448 * a transformation that will put in a tree the statements inside
449 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
450 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
451 *
452 *)
453 and compound = statement_sequencable list
454
455 (* cppext: easier to put at statement_list level than statement level *)
456 and statement_sequencable =
457 | StmtElem of statement
458
459 (* cppext: *)
460 | CppDirectiveStmt of cpp_directive
461 | IfdefStmt of ifdef_directive
462
463 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
464 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
465
466 and exprStatement = expression option
467
468 (* for Switch, need check that all elements in the compound start
469 * with a case:, otherwise unreachable code.
470 *)
471 and selection =
472 | If of expression * statement * statement
473 | Switch of expression * statement
474
475
476 and iteration =
477 | While of expression * statement
478 | DoWhile of statement * expression
479 | For of exprStatement wrap * exprStatement wrap * exprStatement wrap *
480 statement
481 (* cppext: *)
482 | MacroIteration of string * argument wrap2 list * statement
483
484 and jump = Goto of name
485 | Continue | Break
486 | Return | ReturnExpr of expression
487 | GotoComputed of expression (* gccext: goto *exp ';' *)
488
489
490 (* gccext: *)
491 and asmbody = il (* string list *) * colon wrap (* : *) list
492 and colon = Colon of colon_option wrap2 list
493 and colon_option = colon_option_bis wrap
494 and colon_option_bis = ColonMisc | ColonExpr of expression
495
496
497 (* ------------------------------------------------------------------------- *)
498 (* Declaration *)
499 (* ------------------------------------------------------------------------- *)
500 (* (string * ...) option cos can have empty declaration or struct tag
501 * declaration.
502 *
503 * Before I had a Typedef constructor, but why make this special case and not
504 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
505 * declarations ? So I try to generalise and not have Typedef either. This
506 * requires more work in parsing. Better to separate concern.
507 *
508 * Before the need for unparser, I didn't have a DeclList but just a Decl.
509 *
510 * I am not sure what it means to declare a prototype inline, but gcc
511 * accepts it.
512 *)
513
514 and declaration =
515 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
516 (* cppext: *)
517 | MacroDecl of (string * argument wrap2 list) wrap (* fakestart *)
518
519 and onedecl =
520 { v_namei: (name * v_init) option;
521 v_type: fullType;
522 (* semantic: set in type annotated and used in cocci_vs_c
523 * when we transform some initialisation into affectation
524 *)
525 v_type_bis: fullType (* Type_c.completed_and_simplified *) option ref;
526 v_storage: storage;
527 v_local: local_decl; (* cocci: *)
528 v_attr: attribute list; (* gccext: *)
529 }
530 and v_init =
531 NoInit | ValInit of info * initialiser
532 | ConstrInit of argument wrap2 (* , *) list wrap
533 and storage = storagebis * bool (* gccext: inline or not *)
534 and storagebis = NoSto | StoTypedef | Sto of storageClass
535 and storageClass = Auto | Static | Register | Extern
536
537 and local_decl = LocalDecl | NotLocalDecl
538
539 (* fullType is the type used if the type should be converted to
540 an assignment. It can be adjusted in the type annotation
541 phase when typedef information is availalble *)
542 and initialiser = initialiserbis wrap
543 and initialiserbis =
544 | InitExpr of expression
545 | InitList of initialiser wrap2 (* , *) list
546 (* gccext: *)
547 | InitDesignators of designator list * initialiser
548 | InitFieldOld of string * initialiser
549 | InitIndexOld of expression * initialiser
550
551 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
552 and designator = designatorbis wrap
553 and designatorbis =
554 | DesignatorField of string
555 | DesignatorIndex of expression
556 | DesignatorRange of expression * expression
557
558 (* ------------------------------------------------------------------------- *)
559 (* Function definition *)
560 (* ------------------------------------------------------------------------- *)
561 (* Normally we should define another type functionType2 because there
562 * are more restrictions on what can define a function than a pointer
563 * function. For instance a function declaration can omit the name of the
564 * parameter whereas a function definition can not. But, in some cases such
565 * as 'f(void) {', there is no name too, so I simplified and reused the
566 * same functionType type for both declaration and function definition.
567 *
568 * Also old style C does not have type in the parameter, so again simpler
569 * to abuse the functionType and allow missing type.
570 *)
571 and definition = definitionbis wrap (* ( ) { } fakestart sto *)
572 and definitionbis =
573 { f_name: name;
574 f_type: functionType; (* less? a functionType2 ? *)
575 f_storage: storage;
576 f_body: compound;
577 f_attr: attribute list; (* gccext: *)
578 f_old_c_style: declaration list option;
579 }
580 (* cppext: IfdefFunHeader TODO *)
581
582 (* ------------------------------------------------------------------------- *)
583 (* cppext: cpp directives, #ifdef, #define and #include body *)
584 (* ------------------------------------------------------------------------- *)
585 and cpp_directive =
586 | Define of define
587 | Include of includ
588 | PragmaAndCo of il
589 (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
590
591 and define = string wrap (* #define s eol *) * (define_kind * define_val)
592 and define_kind =
593 | DefineVar
594 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
595 | Undef
596 and define_val =
597 (* most common case; e.g. to define int constant *)
598 | DefineExpr of expression
599
600 | DefineStmt of statement
601 | DefineType of fullType
602 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
603
604 | DefineFunction of definition
605 | DefineInit of initialiser (* in practice only { } with possible ',' *)
606
607 (* TODO DefineMulti of define_val list *)
608
609 | DefineText of string wrap
610 | DefineEmpty
611
612 | DefineTodo
613
614
615
616 and includ =
617 { i_include: inc_file wrap; (* #include s *)
618 (* cocci: computed in ? *)
619 i_rel_pos: include_rel_pos option ref;
620 (* cocci: cf -test incl *)
621 i_is_in_ifdef: bool;
622 (* cf cpp_ast_c.ml. set to None at parsing time. *)
623 i_content: (Common.filename (* full path *) * program) option;
624 }
625 and inc_file =
626 | Local of inc_elem list
627 | NonLocal of inc_elem list
628 | Weird of string (* ex: #include SYSTEM_H *)
629 and inc_elem = string
630
631 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
632 *
633 * The first_of and last_of store the list of prefixes that was
634 * introduced by the include. On #include <a/b/x>, if the include was
635 * the first in the file, it would give in first_of the following
636 * prefixes a/b/c; a/b/; a/ ; <empty>
637 *
638 * This is set after parsing, in cocci.ml, in update_rel_pos.
639 *)
640 and include_rel_pos = {
641 first_of : string list list;
642 last_of : string list list;
643 }
644
645
646
647 (* todo? to specialize if someone need more info *)
648 and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
649 | IfdefDirective of (ifdefkind * matching_tag) wrap
650 and ifdefkind =
651 | Ifdef (* todo? of string ? of formula_cpp ? *)
652 | IfdefElseif (* same *)
653 | IfdefElse (* same *)
654 | IfdefEndif
655 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
656 * a global so it means if you parse the same file twice you may get
657 * different id. I try now to avoid this pb by resetting it each
658 * time I parse a file.
659 *)
660 and matching_tag =
661 IfdefTag of (int (* tag *) * int (* total with this tag *))
662
663
664
665
666
667 (* ------------------------------------------------------------------------- *)
668 (* The toplevels elements *)
669 (* ------------------------------------------------------------------------- *)
670 and toplevel =
671 | Declaration of declaration
672 | Definition of definition
673
674 (* cppext: *)
675 | CppTop of cpp_directive
676 | IfdefTop of ifdef_directive (* * toplevel list *)
677
678 (* cppext: *)
679 | MacroTop of string * argument wrap2 list * il
680
681 | EmptyDef of il (* gccext: allow redundant ';' *)
682 | NotParsedCorrectly of il
683
684 | FinalDef of info (* EOF *)
685
686 (* ------------------------------------------------------------------------- *)
687 and program = toplevel list
688
689 (*****************************************************************************)
690 (* Cocci Bindings *)
691 (*****************************************************************************)
692 (* Was previously in pattern.ml, but because of the transformer,
693 * we need to decorate each token with some cocci code AND the environment
694 * for this cocci code.
695 *)
696 and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
697 and metavar_binding_kind =
698 | MetaIdVal of string *
699 Ast_cocci.meta_name list (* negative constraints *)
700 | MetaFuncVal of string
701 | MetaLocalFuncVal of string
702
703 | MetaExprVal of expression (* a "clean expr" *) *
704 (*subterm constraints, currently exprs*)
705 Ast_cocci.meta_name list
706 | MetaExprListVal of argument wrap2 list
707 | MetaParamVal of parameterType
708 | MetaParamListVal of parameterType wrap2 list
709
710 | MetaTypeVal of fullType
711 | MetaInitVal of initialiser
712 | MetaDeclVal of declaration
713 | MetaFieldVal of field
714 | MetaStmtVal of statement
715
716 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
717 * because don't need to have the value for a position in the env of
718 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
719 * variables accessible via SmPL whereas the position can be one day
720 * so I think it's better to put MetaPosVal here *)
721 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
722 | MetaPosValList of
723 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
724 | MetaListlenVal of int
725
726
727 (*****************************************************************************)
728 (* C comments *)
729 (*****************************************************************************)
730
731 (* convention: I often use "m" for comments as I can not use "c"
732 * (already use for c stuff) and "com" is too long.
733 *)
734
735 (* this type will be associated to each token.
736 *)
737 and comments_around = {
738 mbefore: Token_c.comment_like_token list;
739 mafter: Token_c.comment_like_token list;
740
741 (* less: could remove ? do something simpler than CComment for
742 * coccinelle, cf above. *)
743 mbefore2: comment_and_relative_pos list;
744 mafter2: comment_and_relative_pos list;
745 }
746 and comment_and_relative_pos = {
747
748 minfo: Common.parse_info;
749 (* the int represent the number of lines of difference between the
750 * current token and the comment. When on same line, this number is 0.
751 * When previous line, -1. In some way the after/before in previous
752 * record is useless because the sign of the integer can helps
753 * do the difference too, but I keep it that way.
754 *)
755 mpos: int;
756 (* todo?
757 * cppbetween: bool; touse? if false positive
758 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
759 *)
760 }
761
762 and comment = Common.parse_info
763 and com = comment list ref
764
765 (* with sexp *)
766
767
768 (*****************************************************************************)
769 (* Some constructors *)
770 (*****************************************************************************)
771 let nullQualif = ({const=false; volatile= false}, [])
772 let nQ = nullQualif
773
774 let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
775
776 let noType () = ref (None,NotTest)
777 let noInstr = (ExprStatement (None), [])
778 let noTypedefDef () = None
779
780 let emptyMetavarsBinding =
781 ([]: metavars_binding)
782
783 let emptyAnnotCocci =
784 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
785 ([] : metavars_binding list))
786
787 let emptyAnnot =
788 (None: (Ast_cocci.mcodekind * metavars_binding list) option)
789
790 (* compatibility mode *)
791 let mcode_and_env_of_cocciref aref =
792 match !aref with
793 | Some x -> x
794 | None -> emptyAnnotCocci
795
796
797 let emptyComments= {
798 mbefore = [];
799 mafter = [];
800 mbefore2 = [];
801 mafter2 = [];
802 }
803
804
805 (* for include, some meta information needed by cocci *)
806 let noRelPos () =
807 ref (None: include_rel_pos option)
808 let noInIfdef () =
809 ref false
810
811
812 (* When want add some info in ast that does not correspond to
813 * an existing C element.
814 * old: or when don't want 'synchronize' on it in unparse_c.ml
815 * (now have other mark for tha matter).
816 *)
817 let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
818
819 let fakeInfo pi =
820 { pinfo = FakeTok ("",no_virt_pos);
821 cocci_tag = ref emptyAnnot;
822 comments_tag = ref emptyComments;
823 }
824
825 let noii = []
826 let noattr = []
827 let noi_content = (None: ((Common.filename * program) option))
828
829 (*****************************************************************************)
830 (* Wrappers *)
831 (*****************************************************************************)
832 let unwrap = fst
833
834 let unwrap2 = fst
835
836 let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
837 let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
838
839 let unwrap_typeC (qu, (typeC, ii)) = typeC
840 let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
841
842 let unwrap_typeCbis (typeC, ii) = typeC
843
844 let unwrap_st (unwrap_st, ii) = unwrap_st
845
846 (* ------------------------------------------------------------------------- *)
847 let mk_e unwrap_e ii = (unwrap_e, noType()), ii
848 let mk_e_bis unwrap_e ty ii = (unwrap_e, ty), ii
849
850 let mk_ty typeC ii = nQ, (typeC, ii)
851 let mk_tybis typeC ii = (typeC, ii)
852
853 let mk_st unwrap_st ii = (unwrap_st, ii)
854
855 (* ------------------------------------------------------------------------- *)
856 let get_ii_typeC_take_care (typeC, ii) = ii
857 let get_ii_st_take_care (st, ii) = ii
858 let get_ii_expr_take_care (e, ii) = ii
859
860 let get_st_and_ii (st, ii) = st, ii
861 let get_ty_and_ii (qu, (typeC, ii)) = qu, (typeC, ii)
862 let get_e_and_ii (e, ii) = e, ii
863
864
865 (* ------------------------------------------------------------------------- *)
866 let get_type_expr ((unwrap_e, typ), iie) = !typ
867 let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
868 oldtyp := newtyp
869 (* old: (unwrap_e, newtyp), iie *)
870
871 let get_onlytype_expr ((unwrap_e, typ), iie) =
872 match !typ with
873 | Some (ft,_local), _test -> Some ft
874 | None, _ -> None
875
876 let get_onlylocal_expr ((unwrap_e, typ), iie) =
877 match !typ with
878 | Some (ft,local), _test -> Some local
879 | None, _ -> None
880
881 (* ------------------------------------------------------------------------- *)
882 let rewrap_str s ii =
883 {ii with pinfo =
884 (match ii.pinfo with
885 OriginTok pi -> OriginTok { pi with Common.str = s;}
886 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
887 | FakeTok (_,vpi) -> FakeTok (s,vpi)
888 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
889
890 let rewrap_pinfo pi ii =
891 {ii with pinfo = pi}
892
893
894
895 (* info about the current location *)
896 let get_pi = function
897 OriginTok pi -> pi
898 | ExpandedTok (_,(pi,_)) -> pi
899 | FakeTok (_,(pi,_)) -> pi
900 | AbstractLineTok pi -> pi
901
902 (* original info *)
903 let get_opi = function
904 OriginTok pi -> pi
905 | ExpandedTok (pi,_) -> pi (* diff with get_pi *)
906 | FakeTok (_,_) -> failwith "no position information"
907 | AbstractLineTok pi -> pi
908
909 let str_of_info ii =
910 match ii.pinfo with
911 OriginTok pi -> pi.Common.str
912 | ExpandedTok (pi,_) -> pi.Common.str
913 | FakeTok (s,_) -> s
914 | AbstractLineTok pi -> pi.Common.str
915
916 let get_info f ii =
917 match ii.pinfo with
918 OriginTok pi -> f pi
919 | ExpandedTok (_,(pi,_)) -> f pi
920 | FakeTok (_,(pi,_)) -> f pi
921 | AbstractLineTok pi -> f pi
922
923 let get_orig_info f ii =
924 match ii.pinfo with
925 OriginTok pi -> f pi
926 | ExpandedTok (pi,_) -> f pi (* diff with get_info *)
927 | FakeTok (_,(pi,_)) -> f pi
928 | AbstractLineTok pi -> f pi
929
930 let make_expanded ii =
931 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
932
933 let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
934 let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
935 let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
936 let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
937 let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
938 let mcode_of_info ii = fst (mcode_and_env_of_cocciref ii.cocci_tag)
939 let pinfo_of_info ii = ii.pinfo
940 let parse_info_of_info ii = get_pi ii.pinfo
941
942 let strloc_of_info ii =
943 spf "%s:%d" (file_of_info ii) (line_of_info ii)
944
945 let is_fake ii =
946 match ii.pinfo with
947 FakeTok (_,_) -> true
948 | _ -> false
949
950 let is_origintok ii =
951 match ii.pinfo with
952 | OriginTok pi -> true
953 | _ -> false
954
955 (* ------------------------------------------------------------------------- *)
956 type posrv = Real of Common.parse_info | Virt of virtual_position
957
958 let compare_pos ii1 ii2 =
959 let get_pos = function
960 OriginTok pi -> Real pi
961 | FakeTok (s,vpi) -> Virt vpi
962 | ExpandedTok (pi,vpi) -> Virt vpi
963 | AbstractLineTok pi -> Real pi in (* used for printing *)
964 let pos1 = get_pos (pinfo_of_info ii1) in
965 let pos2 = get_pos (pinfo_of_info ii2) in
966 match (pos1,pos2) with
967 (Real p1, Real p2) ->
968 compare p1.Common.charpos p2.Common.charpos
969 | (Virt (p1,_), Real p2) ->
970 if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1
971 | (Real p1, Virt (p2,_)) ->
972 if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1)
973 | (Virt (p1,o1), Virt (p2,o2)) ->
974 let poi1 = p1.Common.charpos in
975 let poi2 = p2.Common.charpos in
976 match compare poi1 poi2 with
977 -1 -> -1
978 | 0 -> compare o1 o2
979 | x -> x
980
981 let equal_posl (l1,c1) (l2,c2) =
982 (l1 =|= l2) && (c1 =|= c2)
983
984 let info_to_fixpos ii =
985 match pinfo_of_info ii with
986 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
987 | ExpandedTok (_,(pi,offset)) ->
988 Ast_cocci.Virt (pi.Common.charpos,offset)
989 | FakeTok (_,(pi,offset)) ->
990 Ast_cocci.Virt (pi.Common.charpos,offset)
991 | AbstractLineTok pi -> failwith "unexpected abstract"
992
993 (* cocci: *)
994 let is_test (e : expression) =
995 let (_,info), _ = e in
996 let (_,test) = !info in
997 test =*= Test
998
999 (*****************************************************************************)
1000 (* Abstract line *)
1001 (*****************************************************************************)
1002
1003 (* When we have extended the C Ast to add some info to the tokens,
1004 * such as its line number in the file, we can not use anymore the
1005 * ocaml '=' to compare Ast elements. To overcome this problem, to be
1006 * able to use again '=', we just have to get rid of all those extra
1007 * information, to "abstract those line" (al) information.
1008 *
1009 * Julia then modifies it a little to have a tokenindex, so the original
1010 * true al_info is in fact real_al_info.
1011 *)
1012
1013 let al_info tokenindex x =
1014 { pinfo =
1015 (AbstractLineTok
1016 {charpos = tokenindex;
1017 line = tokenindex;
1018 column = tokenindex;
1019 file = "";
1020 str = str_of_info x});
1021 cocci_tag = ref emptyAnnot;
1022 comments_tag = ref emptyComments;
1023 }
1024
1025 let semi_al_info x =
1026 { x with
1027 cocci_tag = ref emptyAnnot;
1028 comments_tag = ref emptyComments;
1029 }
1030
1031 let magic_real_number = -10
1032
1033 let real_al_info x =
1034 { pinfo =
1035 (AbstractLineTok
1036 {charpos = magic_real_number;
1037 line = magic_real_number;
1038 column = magic_real_number;
1039 file = "";
1040 str = str_of_info x});
1041 cocci_tag = ref emptyAnnot;
1042 comments_tag = ref emptyComments;
1043 }
1044
1045 let al_comments x =
1046 let keep_cpp l =
1047 List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in
1048 let al_com (x,i) =
1049 (x,{i with Common.charpos = magic_real_number;
1050 Common.line = magic_real_number;
1051 Common.column = magic_real_number}) in
1052 {mbefore = []; (* duplicates mafter of the previous token *)
1053 mafter = List.map al_com (keep_cpp x.mafter);
1054 mbefore2=[];
1055 mafter2=[];
1056 }
1057
1058 let al_info_cpp tokenindex x =
1059 { pinfo =
1060 (AbstractLineTok
1061 {charpos = tokenindex;
1062 line = tokenindex;
1063 column = tokenindex;
1064 file = "";
1065 str = str_of_info x});
1066 cocci_tag = ref emptyAnnot;
1067 comments_tag = ref (al_comments !(x.comments_tag));
1068 }
1069
1070 let semi_al_info_cpp x =
1071 { x with
1072 cocci_tag = ref emptyAnnot;
1073 comments_tag = ref (al_comments !(x.comments_tag));
1074 }
1075
1076 let real_al_info_cpp x =
1077 { pinfo =
1078 (AbstractLineTok
1079 {charpos = magic_real_number;
1080 line = magic_real_number;
1081 column = magic_real_number;
1082 file = "";
1083 str = str_of_info x});
1084 cocci_tag = ref emptyAnnot;
1085 comments_tag = ref (al_comments !(x.comments_tag));
1086 }
1087
1088
1089 (*****************************************************************************)
1090 (* Views *)
1091 (*****************************************************************************)
1092
1093 (* Transform a list of arguments (or parameters) where the commas are
1094 * represented via the wrap2 and associated with an element, with
1095 * a list where the comma are on their own. f(1,2,2) was
1096 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1097 *
1098 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1099 * the ast_cocci of julia and ast_c.
1100 *)
1101 let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
1102 function
1103 | [] -> []
1104 | (e, ii)::xs ->
1105 if null ii
1106 then (Left e)::split_comma xs
1107 else Right ii::Left e::split_comma xs
1108
1109 let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
1110 function
1111 | [] -> []
1112 | Right ii::Left e::xs ->
1113 (e, ii)::unsplit_comma xs
1114 | Left e::xs ->
1115 let empty_ii = [] in
1116 (e, empty_ii)::unsplit_comma xs
1117 | Right ii::_ ->
1118 raise Impossible
1119
1120
1121
1122
1123 (*****************************************************************************)
1124 (* Helpers, could also be put in lib_parsing_c.ml instead *)
1125 (*****************************************************************************)
1126
1127 (* should maybe be in pretty_print_c ? *)
1128
1129 let s_of_inc_file inc_file =
1130 match inc_file with
1131 | Local xs -> xs +> Common.join "/"
1132 | NonLocal xs -> xs +> Common.join "/"
1133 | Weird s -> s
1134
1135 let s_of_inc_file_bis inc_file =
1136 match inc_file with
1137 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
1138 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
1139 | Weird s -> s
1140
1141 let fieldname_of_fieldkind fieldkind =
1142 match fieldkind with
1143 | Simple (sopt, ft) -> sopt
1144 | BitField (sopt, ft, info, expr) -> sopt
1145
1146
1147 let s_of_attr attr =
1148 attr
1149 +> List.map (fun (Attribute s, ii) -> s)
1150 +> Common.join ","
1151
1152
1153 (* ------------------------------------------------------------------------- *)
1154 let str_of_name ident =
1155 match ident with
1156 | RegularName (s,ii) -> s
1157 | CppConcatenatedName xs ->
1158 xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##"
1159 | CppVariadicName (s, ii) -> "##" ^ s
1160 | CppIdentBuilder ((s,iis), xs) ->
1161 s ^ "(" ^
1162 (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^
1163 ")"
1164
1165 let get_s_and_ii_of_name name =
1166 match name with
1167 | RegularName (s, iis) -> s, iis
1168 | CppIdentBuilder ((s, iis), xs) -> s, iis
1169 | CppVariadicName (s,iis) ->
1170 let (iop, iis) = Common.tuple_of_list2 iis in
1171 s, [iis]
1172 | CppConcatenatedName xs ->
1173 (match xs with
1174 | [] -> raise Impossible
1175 | ((s,iis),noiiop)::xs ->
1176 s, iis
1177 )
1178
1179 let get_s_and_info_of_name name =
1180 let (s,ii) = get_s_and_ii_of_name name in
1181 s, List.hd ii
1182
1183 let info_of_name name =
1184 let (s,ii) = get_s_and_ii_of_name name in
1185 List.hd ii
1186
1187 let ii_of_name name =
1188 let (s,ii) = get_s_and_ii_of_name name in
1189 ii
1190
1191 let get_local_ii_of_expr_inlining_ii_of_name e =
1192 let (ebis,_),ii = e in
1193 match ebis, ii with
1194 | Ident name, noii ->
1195 assert(null noii);
1196 ii_of_name name
1197 | RecordAccess (e, name), ii ->
1198 ii @ ii_of_name name
1199 | RecordPtAccess (e, name), ii ->
1200 ii @ ii_of_name name
1201 | _, ii -> ii
1202
1203
1204 let get_local_ii_of_tybis_inlining_ii_of_name ty =
1205 match ty with
1206 | TypeName (name, _typ), [] -> ii_of_name name
1207 | _, ii -> ii
1208
1209 (* the following is used to obtain the argument to LocalVar *)
1210 let info_of_type ft =
1211 let (qu, ty) = ft in
1212 (* bugfix: because of string->name, the ii can be deeper *)
1213 let ii = get_local_ii_of_tybis_inlining_ii_of_name ty in
1214 match ii with
1215 | ii::_ -> ii.pinfo
1216 | [] -> failwith "type has no text; need to think again"
1217
1218 (* only Label and Goto have name *)
1219 let get_local_ii_of_st_inlining_ii_of_name st =
1220 match st with
1221 | Labeled (Label (name, st)), ii -> ii_of_name name @ ii
1222 | Jump (Goto name), ii ->
1223 let (i1, i3) = Common.tuple_of_list2 ii in
1224 [i1] @ ii_of_name name @ [i3]
1225 | _, ii -> ii
1226
1227
1228
1229 (* ------------------------------------------------------------------------- *)
1230 let name_of_parameter param =
1231 param.p_namei +> Common.map_option (str_of_name)
1232