3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
17 (*****************************************************************************)
18 (* The AST C related types *)
19 (*****************************************************************************)
21 * Some stuff are tagged semantic: which means that they are computed
24 * This means that some elements in this AST are present only if
25 * some annotation/transformation has been done on the original AST returned
26 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
30 (* ------------------------------------------------------------------------- *)
32 (* ------------------------------------------------------------------------- *)
34 (* To allow some transformations over the AST, we keep as much information
35 * as possible in the AST such as the tokens content and their locations.
36 * Those info are called 'info' (how original) and can be tagged.
37 * For instance one tag may say that the unparser should remove this token.
39 * Update: Now I use a ref! in those 'info' so take care.
40 * That means that modifications of the info of tokens can have
41 * an effect on the info stored in the ast (which is sometimes
42 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
44 * convention: I often use 'ii' for the name of a list of info.
46 * Sometimes we want to add someting at the beginning or at the end
47 * of a construct. For 'function' and 'decl' we want to add something
48 * to their left and for 'if' 'while' et 'for' and so on at their right.
49 * We want some kinds of "virtual placeholders" that represent the start or
50 * end of a construct. We use fakeInfo for that purpose.
51 * To identify those cases I have added a fakestart/fakeend comment.
53 * cocci: Each token will be decorated in the future by the mcodekind
54 * of cocci. It is the job of the pretty printer to look at this
55 * information and decide to print or not the token (and also the
56 * pending '+' associated sometimes with the token).
58 * The first time that we parse the original C file, the mcodekind is
59 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
60 * associated. This is what I call a "clean" expr/statement/....
62 * Each token will also be decorated in the future with an environment,
63 * because the pending '+' may contain metavariables that refer to some
70 type posl
= int * int (* line-col, for MetaPosValList, for position variables *)
73 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
74 type virtual_position
= Common.parse_info
* int (* character offset *)
78 (* Present both in ast and list of tokens *)
79 | OriginTok
of Common.parse_info
80 (* Present only in ast and generated after parsing. Used mainly
81 * by Julia, to add stuff at virtual places, beginning of func or decl *)
82 | FakeTok
of string * virtual_position
83 (* Present both in ast and list of tokens. *)
84 | ExpandedTok
of Common.parse_info
* virtual_position
86 (* Present neither in ast nor in list of tokens
87 * but only in the '+' of the mcode of some tokens. Those kind of tokens
88 * are used to be able to use '=' to compare big ast portions.
90 | AbstractLineTok
of Common.parse_info
(* local to the abstracted thing *)
96 (* this cocci_tag can be changed, which is how we can express some program
97 * transformations by tagging the tokens involved in this transformation.
99 cocci_tag
: (Ast_cocci.mcodekind
* metavars_binding list
) option ref;
100 (* set in comment_annotater_c.ml *)
101 comments_tag
: comments_around
ref;
103 (* annotations on the token (mutable) *)
104 mutable annots_tag
: Token_annot.annots
106 (* todo? token_info : sometimes useful to know what token it was *)
110 (* wrap2 is like wrap, except that I use it often for separator such
111 * as ','. In that case the info is associated to the argument that
112 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])].
114 * wrap3 is like wrap, except that I use it in case sometimes it
115 * will be empty because the info will be included in a nested
116 * entity (e.g. for Ident in expr because it's inlined in the name)
117 * so user should never assume List.length wrap3 > 0.
119 and 'a wrap
= 'a
* il
120 and 'a wrap2
= 'a
* il
121 and 'a wrap3
= 'a
* il
(* * evotype*)
123 (* ------------------------------------------------------------------------- *)
125 (* ------------------------------------------------------------------------- *)
127 (* was called 'ident' before, but 'name' is I think better
128 * as concatenated strings can be used not only for identifiers and for
129 * declarators, but also for fields, for labels, etc.
131 * Note: because now the info is embeded in the name, the info for
132 * expression like Ident, or types like Typename, are not anymore
133 * stored in the expression or type. Hence if you assume this,
134 * which was true before, you are now wrong. So never write code like
135 * let (unwrape,_), ii = e and use 'ii' believing it contains
136 * the local ii to e. If you want to do that, use the appropiate
137 * wrapper get_local_ii_of_expr_inlining_ii_of_name.
140 | RegularName
of string wrap
141 | CppConcatenatedName
of (string wrap
) wrap2
(* the ## separators *) list
142 (* normally only used inside list of things, as in parameters or arguments
143 * in which case, cf cpp-manual, it has a special meaning *)
144 | CppVariadicName
of string wrap
(* ## s *)
145 | CppIdentBuilder
of string wrap
(* s ( ) *) *
146 ((string wrap
) wrap2 list
) (* arguments *)
149 (* ------------------------------------------------------------------------- *)
151 (* ------------------------------------------------------------------------- *)
152 (* Could have more precise type in fullType, in expression, etc, but
153 * it requires to do too much things in parsing such as checking no
154 * conflicting structname, computing value, etc. Better to separate
155 * concern. So I put '=>' to mean what we would really like. In fact
156 * what we really like is defining another fullType, expression, etc
157 * from scratch, because many stuff are just sugar.
159 * invariant: Array and FunctionType have also typeQualifier but they
160 * dont have sense. I put this to factorise some code. If you look in
161 * the grammar, you see that we can never specify const for the array
162 * himself (but we can do it for pointer) or function, we always
163 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
166 * Because of ExprStatement, we can have more 'new scope' events, but
167 * rare I think. For instance with 'array of constExpression' there can
168 * have an exprStatement and a new (local) struct defined. Same for
174 and fullType
= typeQualifier
* typeC
175 and typeC
= typeCbis wrap
(* todo reput wrap3 *)
178 NoType
(* for c++ only, and for K&R C *)
179 | BaseType
of baseType
181 | Pointer
of fullType
182 | Array
of constExpression
option * fullType
183 | FunctionType
of functionType
185 | Enum
of string option * enumType
186 | StructUnion
of structUnion
* string option * structType
(* new scope *)
189 | StructUnionName
of structUnion
* string
191 | TypeName
of name
* fullType
option (* semantic: filled later *)
193 | ParenType
of fullType
(* for unparser: *)
195 (* gccext: TypeOfType below may seems useless; Why declare a
196 * __typeof__(int) x; ?
197 * When used with macros, it allows to fix a problem of C which
198 * is that type declaration can be spread around the ident. Indeed it
199 * may be difficult to have a macro such as
200 * '#define macro(type, ident) type ident;'
201 * because when you want to do a
202 * macro(char[256], x),
203 * then it will generate invalid code, but with a
204 * '#define macro(type, ident) __typeof(type) ident;'
207 | TypeOfExpr
of expression
208 | TypeOfType
of fullType
210 (* cppext: IfdefType TODO *)
212 (* -------------------------------------- *)
215 | FloatType
of floatType
220 (* stdC: type section
222 * note: char and signed char are semantically different!!
224 and intType
= CChar
(* obsolete? | CWchar *)
227 and signed
= sign
* base
228 and base
= CChar2
| CShort
| CInt
| CLong
| CLongLong
(* gccext: *)
229 and sign
= Signed
| UnSigned
231 and floatType
= CFloat
| CDouble
| CLongDouble
234 (* -------------------------------------- *)
235 and structUnion
= Struct
| Union
236 and structType
= field list
238 | DeclarationField
of field_declaration
243 | MacroDeclField
of (string * argument wrap2 list
)
244 wrap
(* optional ';'*)
247 | CppDirectiveStruct
of cpp_directive
248 | IfdefStruct
of ifdef_directive
(* * field list list *)
251 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
252 and field_declaration
=
253 | FieldDeclList
of fieldkind wrap2 list
(* , *) wrap
(* ; *)
255 (* At first I thought that a bitfield could be only Signed/Unsigned.
256 * But it seems that gcc allow char i:4. C rule must say that you
257 * can cast into int so enum too, ...
260 | Simple
of name
option * fullType
261 | BitField
of name
option * fullType
*
262 info
(* : *) * constExpression
263 (* fullType => BitFieldInt | BitFieldUnsigned *)
266 (* -------------------------------------- *)
267 and enumType
= oneEnumType wrap2
(* , *) list
268 (* => string * int list *)
270 and oneEnumType
= name
* (info
(* = *) * constExpression
) option
272 (* -------------------------------------- *)
273 (* return * (params * has "...") *)
274 and functionType
= fullType
* (parameterType wrap2 list
* bool wrap
)
276 { p_namei
: name
option;
277 p_register
: bool wrap
;
280 (* => (bool (register) * fullType) list * bool *)
283 and typeQualifier
= typeQualifierbis wrap
284 and typeQualifierbis
= {const
: bool; volatile
: bool}
286 (* gccext: cppext: *)
287 and attribute
= attributebis wrap
289 | Attribute
of string
291 (* ------------------------------------------------------------------------- *)
293 (* ------------------------------------------------------------------------- *)
294 and expression
= (expressionbis
* exp_info
ref (* semantic: *)) wrap3
295 and exp_info
= exp_type
option * test
296 and exp_type
= fullType
(* Type_c.completed_and_simplified *) * local
297 and local
= LocalVar
of parse_info
| StaticLocalVar
of parse_info
298 | NotLocalVar
(* cocci: *)
299 and test
= Test
| NotTest
(* cocci: *)
303 (* Ident can be a enumeration constant, a simple variable, a name of a func.
304 * With cppext, Ident can also be the name of a macro. Sparse says
305 * "an identifier with a meaning is a symbol" *)
306 | Ident
of name
(* todo? more semantic info such as LocalFunc *)
308 | Constant
of constant
309 | FunCall
of expression
* argument wrap2
(* , *) list
310 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
311 | CondExpr
of expression
* expression
option * expression
313 (* should be considered as statements, bad C langage *)
314 | Sequence
of expression
* expression
315 | Assignment
of expression
* assignOp
* expression
318 | Postfix
of expression
* fixOp
319 | Infix
of expression
* fixOp
321 | Unary
of expression
* unaryOp
322 | Binary
of expression
* binaryOp
* expression
324 | ArrayAccess
of expression
* expression
326 (* field ident access *)
327 | RecordAccess
of expression
* name
328 | RecordPtAccess
of expression
* name
329 (* redundant normally, could replace it by DeRef RecordAcces *)
331 | SizeOfExpr
of expression
332 | SizeOfType
of fullType
333 | Cast
of fullType
* expression
336 | StatementExpr
of compound wrap
(* ( ) new scope *)
337 | Constructor
of fullType
* initialiser
340 | ParenExpr
of expression
343 | New
of (argument wrap2
(* , *) list
) option * argument
344 | Delete
of expression
346 (* cppext: IfdefExpr TODO *)
348 (* cppext: normally just expression *)
349 and argument
= (expression
, weird_argument
) Common.either
351 | ArgType
of parameterType
352 | ArgAction
of action_macro
354 (* todo: ArgStatement of statement, possibly have ghost token *)
358 (* I put string for Int and Float because int would not be enough because
359 * OCaml int are 31 bits. So simpler to do string. Same reason to have
360 * string instead of int list for the String case.
362 * note: -2 is not a constant, it is the unary operator '-'
363 * applied to constant 2. So the string must represent a positive
367 | String
of (string * isWchar
)
368 | MultiString
of string list
(* can contain MacroString, todo: more info *)
369 | Char
of (string * isWchar
) (* normally it is equivalent to Int *)
370 | Int
of (string * intType
)
371 | Float
of (string * floatType
)
373 and isWchar
= IsWchar
| IsChar
376 and unaryOp
= GetRef
| DeRef
| UnPlus
| UnMinus
| Tilde
| Not
377 | GetRefLabel
(* gccext: GetRefLabel, via &&label notation *)
378 and assignOp
= SimpleAssign
| OpAssign
of arithOp
379 and fixOp
= Dec
| Inc
381 and binaryOp
= Arith
of arithOp
| Logical
of logicalOp
384 | Plus
| Minus
| Mul
| Div
| Mod
386 | And
| Or
| Xor
| Max
| Min
389 | Inf
| Sup
| InfEq
| SupEq
393 and constExpression
= expression
(* => int *)
395 (* ------------------------------------------------------------------------- *)
397 (* ------------------------------------------------------------------------- *)
398 (* note: that assignement is not a statement but an expression;
399 * wonderful C langage.
401 * note: I use 'and' for type definition cos gccext allow statement as
402 * expression, so need mutual recursive type definition.
406 and statement
= statementbis wrap3
409 | Compound
of compound
(* new scope *)
410 | ExprStatement
of exprStatement
411 | Selection
of selection
(* have fakeend *)
412 | Iteration
of iteration
(* have fakeend *)
415 (* simplify cocci: only at the beginning of a compound normally *)
416 | Decl
of declaration
420 | NestedFunc
of definition
427 and labeled
= Label
of name
* statement
428 | Case
of expression
* statement
429 | CaseRange
of expression
* expression
* statement
(* gccext: *)
430 | Default
of statement
433 * old: compound = (declaration list * statement list)
434 * old: (declaration, statement) either list
435 * Simplify cocci to just have statement list, by integrating Decl in stmt.
437 * update: now introduce also the _sequencable to allow ifdef in the middle.
438 * Indeed, I now allow ifdefs in the ast but they must be only between
439 * "sequencable" elements. They can be put in a type only if this type
440 * is used in a list, like at the toplevel, used in a 'toplevel list',
441 * or inside a compound, used in a 'statement list'. I must not allow
442 * ifdef anywhere. For instance I can not make ifdef a statement
443 * cos some instruction like If accept only one statement and the
444 * ifdef directive must not take the place of a legitimate instruction.
445 * We had a similar phenomena in SmPL where we have the notion
446 * of statement and sequencable statement too. Once you have
447 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
448 * and introduce the ifdef.
450 * update: those ifdefs are either passed, or present in the AST but in
451 * a flat form. To structure those flat ifdefs you have to run
452 * a transformation that will put in a tree the statements inside
453 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
454 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
457 and compound
= statement_sequencable list
459 (* cppext: easier to put at statement_list level than statement level *)
460 and statement_sequencable
=
461 | StmtElem
of statement
464 | CppDirectiveStmt
of cpp_directive
465 | IfdefStmt
of ifdef_directive
467 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
468 | IfdefStmt2
of ifdef_directive list
* (statement_sequencable list
) list
470 and exprStatement
= expression
option
472 and declOrExpr
= ForDecl
of declaration
| ForExp
of expression
option wrap
474 (* for Switch, need check that all elements in the compound start
475 * with a case:, otherwise unreachable code.
478 | If
of expression
* statement
* statement
479 | Switch
of expression
* statement
483 | While
of expression
* statement
484 | DoWhile
of statement
* expression
485 | For
of declOrExpr
* exprStatement wrap
* exprStatement wrap
*
488 | MacroIteration
of string * argument wrap2 list
* statement
490 and jump
= Goto
of name
492 | Return
| ReturnExpr
of expression
493 | GotoComputed
of expression
(* gccext: goto *exp ';' *)
497 and asmbody
= il
(* string list *) * colon wrap
(* : *) list
498 and colon
= Colon
of colon_option wrap2 list
499 and colon_option
= colon_option_bis wrap
500 and colon_option_bis
= ColonMisc
| ColonExpr
of expression
503 (* ------------------------------------------------------------------------- *)
505 (* ------------------------------------------------------------------------- *)
506 (* (string * ...) option cos can have empty declaration or struct tag
509 * Before I had a Typedef constructor, but why make this special case and not
510 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
511 * declarations ? So I try to generalise and not have Typedef either. This
512 * requires more work in parsing. Better to separate concern.
514 * Before the need for unparser, I didn't have a DeclList but just a Decl.
516 * I am not sure what it means to declare a prototype inline, but gcc
521 | DeclList
of onedecl wrap2
(* , *) list wrap
(* ; fakestart sto *)
523 (* bool is true if there is a ; at the end *)
524 | MacroDecl
of (string * argument wrap2 list
* bool) wrap
(* fakestart *)
526 (string * argument wrap2 list
* initialiser
) wrap
(* fakestart *)
529 { v_namei
: (name
* v_init
) option;
531 (* semantic: set in type annotated and used in cocci_vs_c
532 * when we transform some initialisation into affectation
534 v_type_bis
: fullType
(* Type_c.completed_and_simplified *) option ref;
536 v_local
: local_decl
; (* cocci: *)
537 v_attr
: attribute list
; (* gccext: *)
540 NoInit
| ValInit
of info
* initialiser
541 | ConstrInit
of argument wrap2
(* , *) list wrap
542 and storage
= storagebis
* bool (* gccext: inline or not *)
543 and storagebis
= NoSto
| StoTypedef
| Sto
of storageClass
544 and storageClass
= Auto
| Static
| Register
| Extern
546 and local_decl
= LocalDecl
| NotLocalDecl
548 (* fullType is the type used if the type should be converted to
549 an assignment. It can be adjusted in the type annotation
550 phase when typedef information is availalble *)
551 and initialiser
= initialiserbis wrap
553 | InitExpr
of expression
554 | InitList
of initialiser wrap2
(* , *) list
556 | InitDesignators
of designator list
* initialiser
557 | InitFieldOld
of string * initialiser
558 | InitIndexOld
of expression
* initialiser
560 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
561 and designator
= designatorbis wrap
563 | DesignatorField
of string
564 | DesignatorIndex
of expression
565 | DesignatorRange
of expression
* expression
567 (* ------------------------------------------------------------------------- *)
568 (* Function definition *)
569 (* ------------------------------------------------------------------------- *)
570 (* Normally we should define another type functionType2 because there
571 * are more restrictions on what can define a function than a pointer
572 * function. For instance a function declaration can omit the name of the
573 * parameter whereas a function definition can not. But, in some cases such
574 * as 'f(void) {', there is no name too, so I simplified and reused the
575 * same functionType type for both declaration and function definition.
577 * Also old style C does not have type in the parameter, so again simpler
578 * to abuse the functionType and allow missing type.
580 and definition
= definitionbis wrap
(* ( ) { } fakestart sto *)
583 f_type
: functionType
; (* less? a functionType2 ? *)
586 f_attr
: attribute list
; (* gccext: *)
587 f_old_c_style
: declaration list
option;
589 (* cppext: IfdefFunHeader TODO *)
591 (* ------------------------------------------------------------------------- *)
592 (* cppext: cpp directives, #ifdef, #define and #include body *)
593 (* ------------------------------------------------------------------------- *)
598 (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
600 and define
= string wrap
(* #define s eol *) * (define_kind
* define_val
)
603 | DefineFunc
of ((string wrap
) wrap2 list
) wrap
(* () *)
606 (* most common case; e.g. to define int constant *)
607 | DefineExpr
of expression
609 | DefineStmt
of statement
610 | DefineType
of fullType
611 | DefineDoWhileZero
of (statement
* expression
) wrap
(* do { } while(0) *)
613 | DefineFunction
of definition
614 | DefineInit
of initialiser
(* in practice only { } with possible ',' *)
616 | DefineMulti
of statement list
618 | DefineText
of string wrap
626 { i_include
: inc_file wrap
; (* #include s *)
627 (* cocci: computed in ? *)
628 i_rel_pos
: include_rel_pos
option ref;
629 (* cocci: cf -test incl *)
631 (* cf cpp_ast_c.ml. set to None at parsing time. *)
632 i_content
: (Common.filename
(* full path *) * program
) option;
635 | Local
of inc_elem list
636 | NonLocal
of inc_elem list
637 | Weird
of string (* ex: #include SYSTEM_H *)
638 and inc_elem
= string
640 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
642 * The first_of and last_of store the list of prefixes that was
643 * introduced by the include. On #include <a/b/x>, if the include was
644 * the first in the file, it would give in first_of the following
645 * prefixes a/b/c; a/b/; a/ ; <empty>
647 * This is set after parsing, in cocci.ml, in update_rel_pos.
649 and include_rel_pos
= {
650 first_of
: string list list
;
651 last_of
: string list list
;
656 (* todo? to specialize if someone need more info *)
657 and ifdef_directive
= (* or and 'a ifdefed = 'a list wrap *)
658 | IfdefDirective
of (ifdefkind
* matching_tag
) wrap
660 | Ifdef
(* todo? of string ? of formula_cpp ? *)
661 | IfdefElseif
(* same *)
662 | IfdefElse
(* same *)
664 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
665 * a global so it means if you parse the same file twice you may get
666 * different id. I try now to avoid this pb by resetting it each
667 * time I parse a file.
670 IfdefTag
of (int (* tag *) * int (* total with this tag *))
676 (* ------------------------------------------------------------------------- *)
677 (* The toplevels elements *)
678 (* ------------------------------------------------------------------------- *)
680 | Declaration
of declaration
681 | Definition
of definition
684 | CppTop
of cpp_directive
685 | IfdefTop
of ifdef_directive
(* * toplevel list *)
688 | MacroTop
of string * argument wrap2 list
* il
690 | EmptyDef
of il
(* gccext: allow redundant ';' *)
691 | NotParsedCorrectly
of il
693 | FinalDef
of info
(* EOF *)
696 | Namespace
of toplevel list
* il
698 (* ------------------------------------------------------------------------- *)
699 and program
= toplevel list
701 (*****************************************************************************)
703 (*****************************************************************************)
704 (* Was previously in pattern.ml, but because of the transformer,
705 * we need to decorate each token with some cocci code AND the environment
706 * for this cocci code.
708 and metavars_binding
= (Ast_cocci.meta_name
, metavar_binding_kind
) assoc
709 and metavar_binding_kind
=
710 | MetaIdVal
of string *
711 Ast_cocci.meta_name list
(* negative constraints *)
712 | MetaFuncVal
of string
713 | MetaLocalFuncVal
of string
715 | MetaExprVal
of expression
(* a "clean expr" *) *
716 (*subterm constraints, currently exprs*)
717 Ast_cocci.meta_name list
718 | MetaExprListVal
of argument wrap2 list
719 | MetaParamVal
of parameterType
720 | MetaParamListVal
of parameterType wrap2 list
722 | MetaTypeVal
of fullType
723 | MetaInitVal
of initialiser
724 | MetaInitListVal
of initialiser wrap2 list
725 | MetaDeclVal
of declaration
726 | MetaFieldVal
of field
727 | MetaFieldListVal
of field list
728 | MetaStmtVal
of statement
730 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
731 * because don't need to have the value for a position in the env of
732 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
733 * variables accessible via SmPL whereas the position can be one day
734 * so I think it's better to put MetaPosVal here *)
735 | MetaPosVal
of (Ast_cocci.fixpos
* Ast_cocci.fixpos
) (* max, min *)
737 (Common.filename
* string (*element*) * posl
* posl
) list
(* min, max *)
738 | MetaListlenVal
of int
741 (*****************************************************************************)
743 (*****************************************************************************)
745 (* convention: I often use "m" for comments as I can not use "c"
746 * (already use for c stuff) and "com" is too long.
749 (* this type will be associated to each token.
751 and comments_around
= {
752 mbefore
: Token_c.comment_like_token list
;
753 mafter
: Token_c.comment_like_token list
;
755 (* less: could remove ? do something simpler than CComment for
756 * coccinelle, cf above. *)
757 mbefore2
: comment_and_relative_pos list
;
758 mafter2
: comment_and_relative_pos list
;
760 and comment_and_relative_pos
= {
762 minfo
: Common.parse_info
;
763 (* the int represent the number of lines of difference between the
764 * current token and the comment. When on same line, this number is 0.
765 * When previous line, -1. In some way the after/before in previous
766 * record is useless because the sign of the integer can helps
767 * do the difference too, but I keep it that way.
771 * cppbetween: bool; touse? if false positive
772 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
776 and comment
= Common.parse_info
777 and com
= comment list
ref
782 (*****************************************************************************)
783 (* Some constructors *)
784 (*****************************************************************************)
785 let nullQualif = ({const
=false; volatile
= false}, [])
788 let defaultInt = (BaseType
(IntType
(Si
(Signed
, CInt
))))
790 let noType () = ref (None
,NotTest
)
791 let noInstr = (ExprStatement
(None
), [])
792 let noTypedefDef () = None
794 let emptyMetavarsBinding =
795 ([]: metavars_binding
)
797 let emptyAnnotCocci =
798 (Ast_cocci.CONTEXT
(Ast_cocci.NoPos
,Ast_cocci.NOTHING
),
799 ([] : metavars_binding list
))
802 (None
: (Ast_cocci.mcodekind
* metavars_binding list
) option)
804 (* compatibility mode *)
805 let mcode_and_env_of_cocciref aref
=
808 | None
-> emptyAnnotCocci
819 (* for include, some meta information needed by cocci *)
821 ref (None
: include_rel_pos
option)
826 (* When want add some info in ast that does not correspond to
827 * an existing C element.
828 * old: or when don't want 'synchronize' on it in unparse_c.ml
829 * (now have other mark for tha matter).
831 let no_virt_pos = ({str
="";charpos
=0;line
=0;column
=0;file
=""},-1)
834 { pinfo
= FakeTok
("",no_virt_pos);
835 cocci_tag
= ref emptyAnnot;
836 annots_tag
= Token_annot.empty
;
837 comments_tag
= ref emptyComments;
842 let noi_content = (None
: ((Common.filename
* program
) option))
844 (*****************************************************************************)
846 (*****************************************************************************)
851 let unwrap_expr ((unwrap_e
, typ
), iie
) = unwrap_e
852 let rewrap_expr ((_old_unwrap_e
, typ
), iie
) newe
= ((newe
, typ
), iie
)
854 let unwrap_typeC (qu
, (typeC
, ii
)) = typeC
855 let rewrap_typeC (qu
, (typeC
, ii
)) newtypeC
= (qu
, (newtypeC
, ii
))
857 let unwrap_typeCbis (typeC
, ii
) = typeC
859 let unwrap_st (unwrap_st, ii
) = unwrap_st
861 (* ------------------------------------------------------------------------- *)
862 let mk_e unwrap_e ii
= (unwrap_e
, noType()), ii
863 let mk_e_bis unwrap_e ty ii
= (unwrap_e
, ty
), ii
865 let mk_ty typeC ii
= nQ, (typeC
, ii
)
866 let mk_tybis typeC ii
= (typeC
, ii
)
868 let mk_st unwrap_st ii
= (unwrap_st, ii
)
870 (* ------------------------------------------------------------------------- *)
871 let get_ii_typeC_take_care (typeC
, ii
) = ii
872 let get_ii_st_take_care (st
, ii
) = ii
873 let get_ii_expr_take_care (e
, ii
) = ii
875 let get_st_and_ii (st
, ii
) = st
, ii
876 let get_ty_and_ii (qu
, (typeC
, ii
)) = qu
, (typeC
, ii
)
877 let get_e_and_ii (e
, ii
) = e
, ii
880 (* ------------------------------------------------------------------------- *)
881 let get_type_expr ((unwrap_e
, typ
), iie
) = !typ
882 let set_type_expr ((unwrap_e
, oldtyp
), iie
) newtyp
=
884 (* old: (unwrap_e, newtyp), iie *)
886 let get_onlytype_expr ((unwrap_e
, typ
), iie
) =
888 | Some
(ft
,_local
), _test
-> Some ft
891 let get_onlylocal_expr ((unwrap_e
, typ
), iie
) =
893 | Some
(ft
,local
), _test
-> Some local
896 (* ------------------------------------------------------------------------- *)
897 let rewrap_str s ii
=
900 OriginTok pi
-> OriginTok
{ pi
with Common.str
= s
;}
901 | ExpandedTok
(pi
,vpi
) -> ExpandedTok
({ pi
with Common.str
= s
;},vpi
)
902 | FakeTok
(_
,vpi
) -> FakeTok
(s
,vpi
)
903 | AbstractLineTok pi
-> OriginTok
{ pi
with Common.str
= s
;})}
905 let rewrap_pinfo pi ii
=
910 (* info about the current location *)
911 let get_pi = function
913 | ExpandedTok
(_
,(pi
,_
)) -> pi
914 | FakeTok
(_
,(pi
,_
)) -> pi
915 | AbstractLineTok pi
-> pi
918 let get_opi = function
920 | ExpandedTok
(pi
,_
) -> pi
(* diff with get_pi *)
921 | FakeTok
(_
,_
) -> failwith
"no position information"
922 | AbstractLineTok pi
-> pi
926 OriginTok pi
-> pi
.Common.str
927 | ExpandedTok
(pi
,_
) -> pi
.Common.str
929 | AbstractLineTok pi
-> pi
.Common.str
934 | ExpandedTok
(_
,(pi
,_
)) -> f pi
935 | FakeTok
(_
,(pi
,_
)) -> f pi
936 | AbstractLineTok pi
-> f pi
938 let get_orig_info f ii
=
941 | ExpandedTok
(pi
,_
) -> f pi
(* diff with get_info *)
942 | FakeTok
(_
,(pi
,_
)) -> f pi
943 | AbstractLineTok pi
-> f pi
945 let make_expanded ii
=
946 {ii
with pinfo
= ExpandedTok
(get_opi ii
.pinfo
,no_virt_pos)}
948 let pos_of_info ii
= get_info (function x
-> x
.Common.charpos
) ii
949 let opos_of_info ii
= get_orig_info (function x
-> x
.Common.charpos
) ii
950 let line_of_info ii
= get_orig_info (function x
-> x
.Common.line
) ii
951 let col_of_info ii
= get_orig_info (function x
-> x
.Common.column
) ii
952 let file_of_info ii
= get_orig_info (function x
-> x
.Common.file
) ii
953 let mcode_of_info ii
= fst
(mcode_and_env_of_cocciref ii
.cocci_tag
)
954 let pinfo_of_info ii
= ii
.pinfo
955 let parse_info_of_info ii
= get_pi ii
.pinfo
957 let strloc_of_info ii
=
958 spf
"%s:%d" (file_of_info ii
) (line_of_info ii
)
962 FakeTok
(_
,_
) -> true
965 let is_origintok ii
=
967 | OriginTok pi
-> true
970 (* ------------------------------------------------------------------------- *)
971 type posrv
= Real
of Common.parse_info
| Virt
of virtual_position
973 let compare_pos ii1 ii2
=
974 let get_pos = function
975 OriginTok pi
-> Real pi
976 | FakeTok
(s
,vpi
) -> Virt vpi
977 | ExpandedTok
(pi
,vpi
) -> Virt vpi
978 | AbstractLineTok pi
-> Real pi
in (* used for printing *)
979 let pos1 = get_pos (pinfo_of_info ii1
) in
980 let pos2 = get_pos (pinfo_of_info ii2
) in
981 match (pos1,pos2) with
982 (Real p1
, Real p2
) ->
983 compare p1
.Common.charpos p2
.Common.charpos
984 | (Virt
(p1
,_
), Real p2
) ->
985 if (compare p1
.Common.charpos p2
.Common.charpos
) =|= (-1) then (-1) else 1
986 | (Real p1
, Virt
(p2
,_
)) ->
987 if (compare p1
.Common.charpos p2
.Common.charpos
) =|= 1 then 1 else (-1)
988 | (Virt
(p1
,o1
), Virt
(p2
,o2
)) ->
989 let poi1 = p1
.Common.charpos
in
990 let poi2 = p2
.Common.charpos
in
991 match compare
poi1 poi2 with
996 let equal_posl (l1
,c1
) (l2
,c2
) =
997 (l1
=|= l2
) && (c1
=|= c2
)
999 let compare_posl (l1
,c1
) (l2
,c2
) =
1004 let info_to_fixpos ii
=
1005 match pinfo_of_info ii
with
1006 OriginTok pi
-> Ast_cocci.Real pi
.Common.charpos
1007 | ExpandedTok
(_
,(pi
,offset
)) ->
1008 Ast_cocci.Virt
(pi
.Common.charpos
,offset
)
1009 | FakeTok
(_
,(pi
,offset
)) ->
1010 Ast_cocci.Virt
(pi
.Common.charpos
,offset
)
1011 | AbstractLineTok pi
-> failwith
"unexpected abstract"
1014 let is_test (e
: expression
) =
1015 let (_
,info
), _
= e
in
1016 let (_
,test
) = !info
in
1019 (*****************************************************************************)
1021 (*****************************************************************************)
1023 (* When we have extended the C Ast to add some info to the tokens,
1024 * such as its line number in the file, we can not use anymore the
1025 * ocaml '=' to compare Ast elements. To overcome this problem, to be
1026 * able to use again '=', we just have to get rid of all those extra
1027 * information, to "abstract those line" (al) information.
1029 * Julia then modifies it a little to have a tokenindex, so the original
1030 * true al_info is in fact real_al_info.
1033 let al_info tokenindex x
=
1036 {charpos
= tokenindex
;
1038 column
= tokenindex
;
1040 str
= str_of_info x
});
1041 cocci_tag
= ref emptyAnnot;
1042 annots_tag
= Token_annot.empty
;
1043 comments_tag
= ref emptyComments;
1046 let semi_al_info x
=
1048 cocci_tag
= ref emptyAnnot;
1049 comments_tag
= ref emptyComments;
1052 let magic_real_number = -10
1054 let real_al_info x
=
1057 {charpos
= magic_real_number;
1058 line
= magic_real_number;
1059 column
= magic_real_number;
1061 str
= str_of_info x
});
1062 cocci_tag
= ref emptyAnnot;
1063 annots_tag
= Token_annot.empty
;
1064 comments_tag
= ref emptyComments;
1069 List.filter
(function (Token_c.TCommentCpp _
,_
) -> true | _
-> false) l
in
1071 (x
,{i
with Common.charpos
= magic_real_number;
1072 Common.line
= magic_real_number;
1073 Common.column
= magic_real_number}) in
1074 {mbefore
= []; (* duplicates mafter of the previous token *)
1075 mafter
= List.map
al_com (keep_cpp x
.mafter
);
1080 let al_info_cpp tokenindex x
=
1083 {charpos
= tokenindex
;
1085 column
= tokenindex
;
1087 str
= str_of_info x
});
1088 cocci_tag
= ref emptyAnnot;
1089 annots_tag
= Token_annot.empty
;
1090 comments_tag
= ref (al_comments !(x
.comments_tag
));
1093 let semi_al_info_cpp x
=
1095 cocci_tag
= ref emptyAnnot;
1096 annots_tag
= Token_annot.empty
;
1097 comments_tag
= ref (al_comments !(x
.comments_tag
));
1100 let real_al_info_cpp x
=
1103 {charpos
= magic_real_number;
1104 line
= magic_real_number;
1105 column
= magic_real_number;
1107 str
= str_of_info x
});
1108 cocci_tag
= ref emptyAnnot;
1109 annots_tag
= Token_annot.empty
;
1110 comments_tag
= ref (al_comments !(x
.comments_tag
));
1114 (*****************************************************************************)
1116 (*****************************************************************************)
1118 (* Transform a list of arguments (or parameters) where the commas are
1119 * represented via the wrap2 and associated with an element, with
1120 * a list where the comma are on their own. f(1,2,2) was
1121 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1123 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1124 * the ast_cocci of julia and ast_c.
1126 let rec (split_comma
: 'a wrap2 list
-> ('a
, il
) either list
) =
1131 then (Left e
)::split_comma xs
1132 else Right ii
::Left e
::split_comma xs
1134 let rec (unsplit_comma
: ('a
, il
) either list
-> 'a wrap2 list
) =
1137 | Right ii
::Left e
::xs
->
1138 (e
, ii
)::unsplit_comma xs
1140 let empty_ii = [] in
1141 (e
, empty_ii)::unsplit_comma xs
1143 raise
(Impossible
59)
1148 (*****************************************************************************)
1149 (* Helpers, could also be put in lib_parsing_c.ml instead *)
1150 (*****************************************************************************)
1152 (* should maybe be in pretty_print_c ? *)
1154 let s_of_inc_file inc_file
=
1156 | Local xs
-> xs
+> Common.join
"/"
1157 | NonLocal xs
-> xs
+> Common.join
"/"
1160 let s_of_inc_file_bis inc_file
=
1162 | Local xs
-> "\"" ^ xs
+> Common.join
"/" ^
"\""
1163 | NonLocal xs
-> "<" ^ xs
+> Common.join
"/" ^
">"
1166 let fieldname_of_fieldkind fieldkind
=
1167 match fieldkind
with
1168 | Simple
(sopt
, ft
) -> sopt
1169 | BitField
(sopt
, ft
, info
, expr
) -> sopt
1172 let s_of_attr attr
=
1174 +> List.map
(fun (Attribute s
, ii
) -> s
)
1178 (* ------------------------------------------------------------------------- *)
1179 let str_of_name ident
=
1181 | RegularName
(s
,ii
) -> s
1182 | CppConcatenatedName xs
->
1183 xs
+> List.map
(fun (x
,iiop
) -> unwrap x
) +> Common.join
"##"
1184 | CppVariadicName
(s
, ii
) -> "##" ^ s
1185 | CppIdentBuilder
((s
,iis
), xs
) ->
1187 (xs
+> List.map
(fun ((x
,iix
), iicomma
) -> x
) +> Common.join
",") ^
1190 let get_s_and_ii_of_name name
=
1192 | RegularName
(s
, iis
) -> s
, iis
1193 | CppIdentBuilder
((s
, iis
), xs
) -> s
, iis
1194 | CppVariadicName
(s
,iis
) ->
1195 let (iop
, iis
) = Common.tuple_of_list2 iis
in
1197 | CppConcatenatedName xs
->
1199 | [] -> raise
(Impossible
60)
1200 | ((s
,iis
),noiiop
)::xs
->
1204 let get_s_and_info_of_name name
=
1205 let (s
,ii
) = get_s_and_ii_of_name name
in
1208 let info_of_name name
=
1209 let (s
,ii
) = get_s_and_ii_of_name name
in
1212 let ii_of_name name
=
1213 let (s
,ii
) = get_s_and_ii_of_name name
in
1216 let get_local_ii_of_expr_inlining_ii_of_name e
=
1217 let (ebis
,_
),ii
= e
in
1219 | Ident name
, noii ->
1222 | RecordAccess
(e
, name
), ii
->
1223 ii
@ ii_of_name name
1224 | RecordPtAccess
(e
, name
), ii
->
1225 ii
@ ii_of_name name
1229 let get_local_ii_of_tybis_inlining_ii_of_name ty
=
1231 | TypeName
(name
, _typ
), [] -> ii_of_name name
1234 (* the following is used to obtain the argument to LocalVar *)
1235 let info_of_type ft
=
1236 let (qu
, ty
) = ft
in
1237 (* bugfix: because of string->name, the ii can be deeper *)
1238 let ii = get_local_ii_of_tybis_inlining_ii_of_name ty
in
1240 | ii::_
-> Some
ii.pinfo
1243 (* only Label and Goto have name *)
1244 let get_local_ii_of_st_inlining_ii_of_name st
=
1246 | Labeled
(Label
(name
, st
)), ii -> ii_of_name name
@ ii
1247 | Jump
(Goto name
), ii ->
1248 let (i1
, i3
) = Common.tuple_of_list2
ii in
1249 [i1
] @ ii_of_name name
@ [i3
]
1254 (* ------------------------------------------------------------------------- *)
1255 let name_of_parameter param
=
1256 param
.p_namei
+> Common.map_option
(str_of_name)
1259 (* ------------------------------------------------------------------------- *)
1260 (* Annotations on tokens *)
1261 (* ------------------------------------------------------------------------- *)
1263 (* to put a given annotation on a token *)
1264 let put_annot_info info key
value =
1265 info
.annots_tag
<- Token_annot.put_annot key
value info
.annots_tag
1267 (* to check if an annotation has such a token *)
1268 let get_annot_info info key
=
1269 Token_annot.get_annot info
.annots_tag key