3 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
16 (*****************************************************************************)
17 (* The AST C related types *)
18 (*****************************************************************************)
20 * Some stuff are tagged semantic: which means that they are computed
23 * This means that some elements in this AST are present only if
24 * some annotation/transformation has been done on the original AST returned
25 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
29 (* ------------------------------------------------------------------------- *)
31 (* ------------------------------------------------------------------------- *)
33 (* To allow some transformations over the AST, we keep as much information
34 * as possible in the AST such as the tokens content and their locations.
35 * Those info are called 'info' (how original) and can be tagged.
36 * For instance one tag may say that the unparser should remove this token.
38 * Update: Now I use a ref! in those 'info' so take care.
39 * That means that modifications of the info of tokens can have
40 * an effect on the info stored in the ast (which is sometimes
41 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
43 * convention: I often use 'ii' for the name of a list of info.
45 * Sometimes we want to add someting at the beginning or at the end
46 * of a construct. For 'function' and 'decl' we want to add something
47 * to their left and for 'if' 'while' et 'for' and so on at their right.
48 * We want some kinds of "virtual placeholders" that represent the start or
49 * end of a construct. We use fakeInfo for that purpose.
50 * To identify those cases I have added a fakestart/fakeend comment.
52 * cocci: Each token will be decorated in the future by the mcodekind
53 * of cocci. It is the job of the pretty printer to look at this
54 * information and decide to print or not the token (and also the
55 * pending '+' associated sometimes with the token).
57 * The first time that we parse the original C file, the mcodekind is
58 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
59 * associated. This is what I call a "clean" expr/statement/....
61 * Each token will also be decorated in the future with an environment,
62 * because the pending '+' may contain metavariables that refer to some
69 type posl
= int * int (* line-col, for MetaPosValList, for position variables *)
71 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
72 type virtual_position
= Common.parse_info
* int (* character offset *)
75 (* Present both in ast and list of tokens *)
76 | OriginTok
of Common.parse_info
77 (* Present only in ast and generated after parsing. Used mainly
78 * by Julia, to add stuff at virtual places, beginning of func or decl *)
79 | FakeTok
of string * virtual_position
80 (* Present both in ast and list of tokens. *)
81 | ExpandedTok
of Common.parse_info
* virtual_position
83 (* Present neither in ast nor in list of tokens
84 * but only in the '+' of the mcode of some tokens. Those kind of tokens
85 * are used to be able to use '=' to compare big ast portions.
87 | AbstractLineTok
of Common.parse_info
(* local to the abstracted thing *)
92 (* this cocci_tag can be changed, which is how we can express some program
93 * transformations by tagging the tokens involved in this transformation.
95 cocci_tag
: (Ast_cocci.mcodekind
* metavars_binding
) ref;
96 (* set in comment_annotater_c.ml *)
97 comments_tag
: comments_around
ref;
99 (* todo? token_info : sometimes useful to know what token it was *)
103 (* wrap2 is like wrap, except that I use it often for separator such
104 * as ','. In that case the info is associated to the argument that
105 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
106 and 'a wrap
= 'a
* il
107 and 'a wrap2
= 'a
* il
109 (* ------------------------------------------------------------------------- *)
111 (* ------------------------------------------------------------------------- *)
113 (* was called 'ident' before, but 'name' is I think better
114 * as concatenated strings can be used not only for identifiers and for
115 * declarators, but also for fields, for labels, etc.
118 | RegularName
of string wrap
119 | CppConcatenatedName
of (string wrap
) wrap2
(* the ## separators *) list
120 (* normally only used inside list of things, as in parameters or arguments
121 * in which case, cf cpp-manual, it has a special meaning *)
122 | CppVariadicName
of string wrap
(* ## s *)
123 | CppIdentBuilder
of string wrap
(* s ( ) *) *
124 ((string wrap
) wrap2 list
) (* arguments *)
127 (* ------------------------------------------------------------------------- *)
129 (* ------------------------------------------------------------------------- *)
130 (* Could have more precise type in fullType, in expression, etc, but
131 * it requires to do too much things in parsing such as checking no
132 * conflicting structname, computing value, etc. Better to separate
133 * concern. So I put '=>' to mean what we would really like. In fact
134 * what we really like is defining another fullType, expression, etc
135 * from scratch, because many stuff are just sugar.
137 * invariant: Array and FunctionType have also typeQualifier but they
138 * dont have sense. I put this to factorise some code. If you look in
139 * the grammar, you see that we can never specify const for the array
140 * himself (but we can do it for pointer) or function, we always
141 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
144 * Because of ExprStatement, we can have more 'new scope' events, but
145 * rare I think. For instance with 'array of constExpression' there can
146 * have an exprStatement and a new (local) struct defined. Same for
152 and fullType
= typeQualifier
* typeC
153 and typeC
= typeCbis wrap
156 | BaseType
of baseType
158 | Pointer
of fullType
159 | Array
of constExpression
option * fullType
160 | FunctionType
of functionType
162 | Enum
of string option * enumType
163 | StructUnion
of structUnion
* string option * structType
(* new scope *)
166 | StructUnionName
of structUnion
* string
168 | TypeName
of name
* fullType
option (* semantic: filled later *)
170 | ParenType
of fullType
(* forunparser: *)
172 (* gccext: TypeOfType below may seems useless; Why declare a
173 * __typeof__(int) x; ?
174 * When used with macros, it allows to fix a problem of C which
175 * is that type declaration can be spread around the ident. Indeed it
176 * may be difficult to have a macro such as
177 * '#define macro(type, ident) type ident;'
178 * because when you want to do a
179 * macro(char[256], x),
180 * then it will generate invalid code, but with a
181 * '#define macro(type, ident) __typeof(type) ident;'
184 | TypeOfExpr
of expression
185 | TypeOfType
of fullType
187 (* cppext: IfdefType TODO *)
189 (* -------------------------------------- *)
192 | FloatType
of floatType
194 (* stdC: type section
196 * note: char and signed char are semantically different!!
198 and intType
= CChar
(* obsolete? | CWchar *)
201 and signed
= sign
* base
202 and base
= CChar2
| CShort
| CInt
| CLong
| CLongLong
(* gccext: *)
203 and sign
= Signed
| UnSigned
205 and floatType
= CFloat
| CDouble
| CLongDouble
208 (* -------------------------------------- *)
209 and structUnion
= Struct
| Union
210 and structType
= field list
211 and field
= fieldbis wrap
213 | DeclarationField
of field_declaration
218 | MacroStructDeclTodo
221 | CppDirectiveStruct
of cpp_directive
222 | IfdefStruct
of ifdef_directive
(* * field list list *)
225 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
226 and field_declaration
=
227 | FieldDeclList
of fieldkind wrap2 list
(* , *) wrap
(* ; *)
229 (* At first I thought that a bitfield could be only Signed/Unsigned.
230 * But it seems that gcc allow char i:4. C rule must say that you
231 * can cast into int so enum too, ...
234 | Simple
of name
option * fullType
235 | BitField
of name
option * fullType
*
236 info
(* : *) * constExpression
237 (* fullType => BitFieldInt | BitFieldUnsigned *)
240 (* -------------------------------------- *)
241 and enumType
= (name
* (info
(* = *) * constExpression
) option)
243 (* => string * int list *)
246 (* -------------------------------------- *)
247 (* return * (params * has "...") *)
248 and functionType
= fullType
* (parameterType wrap2 list
* bool wrap
)
250 { p_namei
: name
option;
251 p_register
: bool wrap
;
254 (* => (bool (register) * fullType) list * bool *)
257 and typeQualifier
= typeQualifierbis wrap
258 and typeQualifierbis
= {const
: bool; volatile
: bool}
260 (* gccext: cppext: *)
261 and attribute
= attributebis wrap
263 | Attribute
of string
265 (* ------------------------------------------------------------------------- *)
267 (* ------------------------------------------------------------------------- *)
268 and expression
= (expressionbis
* exp_info
ref (* semantic: *)) wrap
269 and exp_info
= exp_type
option * test
270 and exp_type
= fullType
(* Type_c.completed_and_simplified *) * local
271 and local
= LocalVar
of parse_info
| NotLocalVar
(* cocci: *)
272 and test
= Test
| NotTest
(* cocci: *)
276 (* Ident can be a enumeration constant, a simple variable, a name of a func.
277 * With cppext, Ident can also be the name of a macro. Sparse says
278 * "an identifier with a meaning is a symbol" *)
279 | Ident
of name
(* todo? more semantic info such as LocalFunc *)
281 | Constant
of constant
282 | FunCall
of expression
* argument wrap2
(* , *) list
283 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
284 | CondExpr
of expression
* expression
option * expression
286 (* should be considered as statements, bad C langage *)
287 | Sequence
of expression
* expression
288 | Assignment
of expression
* assignOp
* expression
291 | Postfix
of expression
* fixOp
292 | Infix
of expression
* fixOp
294 | Unary
of expression
* unaryOp
295 | Binary
of expression
* binaryOp
* expression
297 | ArrayAccess
of expression
* expression
299 (* field ident access *)
300 | RecordAccess
of expression
* name
301 | RecordPtAccess
of expression
* name
302 (* redundant normally, could replace it by DeRef RecordAcces *)
304 | SizeOfExpr
of expression
305 | SizeOfType
of fullType
306 | Cast
of fullType
* expression
309 | StatementExpr
of compound wrap
(* (Â ) new scope *)
310 | Constructor
of fullType
* initialiser wrap2
(* , *) list
313 | ParenExpr
of expression
315 (* cppext: IfdefExpr TODO *)
317 (* cppext: normmally just expression *)
318 and argument
= (expression
, weird_argument
) either
320 | ArgType
of parameterType
321 | ArgAction
of action_macro
323 (* todo: ArgStatement of statement, possibly have ghost token *)
327 (* I put string for Int and Float because int would not be enough because
328 * OCaml int are 31 bits. So simpler to do string. Same reason to have
329 * string instead of int list for the String case.
331 * note: -2 is not a constant, it is the unary operator '-'
332 * applied to constant 2. So the string must represent a positive
336 | String
of (string * isWchar
)
337 | MultiString
of string list
(* can contain MacroString, todo: more info *)
338 | Char
of (string * isWchar
) (* normally it is equivalent to Int *)
339 | Int
of (string (* * intType*))
340 | Float
of (string * floatType
)
342 and isWchar
= IsWchar
| IsChar
345 and unaryOp
= GetRef
| DeRef
| UnPlus
| UnMinus
| Tilde
| Not
346 | GetRefLabel
(* gccext: GetRefLabel, via &&label notation *)
347 and assignOp
= SimpleAssign
| OpAssign
of arithOp
348 and fixOp
= Dec
| Inc
350 and binaryOp
= Arith
of arithOp
| Logical
of logicalOp
353 | Plus
| Minus
| Mul
| Div
| Mod
358 | Inf
| Sup
| InfEq
| SupEq
362 and constExpression
= expression
(* => int *)
365 (* ------------------------------------------------------------------------- *)
367 (* ------------------------------------------------------------------------- *)
368 (* note: that assignement is not a statement but an expression;
369 * wonderful C langage.
371 * note: I use 'and' for type definition cos gccext allow statement as
372 * expression, so need mutual recursive type definition.
376 and statement
= statementbis wrap
379 | Compound
of compound
(* new scope *)
380 | ExprStatement
of exprStatement
381 | Selection
of selection
(* have fakeend *)
382 | Iteration
of iteration
(* have fakeend *)
385 (* simplify cocci: only at the beginning of a compound normally *)
386 | Decl
of declaration
390 | NestedFunc
of definition
397 and labeled
= Label
of name
* statement
398 | Case
of expression
* statement
399 | CaseRange
of expression
* expression
* statement
(* gccext: *)
400 | Default
of statement
403 * old: compound = (declaration list * statement list)
404 * old: (declaration, statement) either list
405 * Simplify cocci to just have statement list, by integrating Decl in stmt.
407 * update: now introduce also the _sequencable to allow ifdef in the middle.
408 * Indeed, I now allow ifdefs in the ast but they must be only between
409 * "sequencable" elements. They can be put in a type only if this type
410 * is used in a list, like at the toplevel, used in a 'toplevel list',
411 * or inside a compound, used in a 'statement list'. I must not allow
412 * ifdef anywhere. For instance I can not make ifdef a statement
413 * cos some instruction like If accept only one statement and the
414 * ifdef directive must not take the place of a legitimate instruction.
415 * We had a similar phenomena in SmPL where we have the notion
416 * of statement and sequencable statement too. Once you have
417 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
418 * and introduce the ifdef.
420 * update: those ifdefs are either passed, or present in the AST but in
421 * a flat form. To structure those flat ifdefs you have to run
422 * a transformation that will put in a tree the statements inside
423 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
424 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
427 and compound
= statement_sequencable list
429 (* cppext: easier to put at statement_list level than statement level *)
430 and statement_sequencable
=
431 | StmtElem
of statement
434 | CppDirectiveStmt
of cpp_directive
435 | IfdefStmt
of ifdef_directive
437 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
438 | IfdefStmt2
of ifdef_directive list
* (statement_sequencable list
) list
440 and exprStatement
= expression
option
442 (* for Switch, need check that all elements in the compound start
443 * with a case:, otherwise unreachable code.
446 | If
of expression
* statement
* statement
447 | Switch
of expression
* statement
451 | While
of expression
* statement
452 | DoWhile
of statement
* expression
453 | For
of exprStatement wrap
* exprStatement wrap
* exprStatement wrap
*
456 | MacroIteration
of string * argument wrap2 list
* statement
458 and jump
= Goto
of name
460 | Return
| ReturnExpr
of expression
461 | GotoComputed
of expression
(* gccext: goto *exp ';' *)
465 and asmbody
= il
(* string list *) * colon wrap
(* : *) list
466 and colon
= Colon
of colon_option wrap2 list
467 and colon_option
= colon_option_bis wrap
468 and colon_option_bis
= ColonMisc
| ColonExpr
of expression
471 (* ------------------------------------------------------------------------- *)
473 (* ------------------------------------------------------------------------- *)
474 (* (string * ...) option cos can have empty declaration or struct tag
477 * Before I had a Typedef constructor, but why make this special case and not
478 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
479 * declarations ? So I try to generalise and not have Typedef either. This
480 * requires more work in parsing. Better to separate concern.
482 * Before the need for unparser, I didn't have a DeclList but just a Decl.
484 * I am not sure what it means to declare a prototype inline, but gcc
489 | DeclList
of onedecl wrap2
(* , *) list wrap
(* ; fakestart sto *)
491 | MacroDecl
of (string * argument wrap2 list
) wrap
494 { v_namei
: (name
* (info
(* = *) * initialiser
) option) option;
497 v_local
: local_decl
; (* cocci: *)
498 v_attr
: attribute list
; (* gccext: *)
500 and storage
= storagebis
* bool (* gccext: inline or not *)
501 and storagebis
= NoSto
| StoTypedef
| Sto
of storageClass
502 and storageClass
= Auto
| Static
| Register
| Extern
504 and local_decl
= LocalDecl
| NotLocalDecl
506 and initialiser
= initialiserbis wrap
508 | InitExpr
of expression
509 | InitList
of initialiser wrap2
(* , *) list
511 | InitDesignators
of designator list
* initialiser
512 | InitFieldOld
of string * initialiser
513 | InitIndexOld
of expression
* initialiser
515 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
516 and designator
= designatorbis wrap
518 | DesignatorField
of string
519 | DesignatorIndex
of expression
520 | DesignatorRange
of expression
* expression
522 (* ------------------------------------------------------------------------- *)
523 (* Function definition *)
524 (* ------------------------------------------------------------------------- *)
525 (* Normally we should define another type functionType2 because there
526 * are more restrictions on what can define a function than a pointer
527 * function. For instance a function declaration can omit the name of the
528 * parameter whereas a function definition can not. But, in some cases such
529 * as 'f(void) {', there is no name too, so I simplified and reused the
530 * same functionType type for both declaration and function definition.
532 * Also old style C does not have type in the parameter, so again simpler
533 * to abuse the functionType and allow missing type.
535 and definition
= definitionbis wrap
(* ( ) { } fakestart sto *)
538 f_type
: functionType
; (* todo? a functionType2 ? *)
541 f_attr
: attribute list
; (* gccext: *)
542 f_old_c_style
: declaration list
option;
544 (* cppext: IfdefFunHeader TODO *)
546 (* ------------------------------------------------------------------------- *)
547 (* cppext: cpp directives, #ifdef, #define and #include body *)
548 (* ------------------------------------------------------------------------- *)
552 | Undef
of string wrap
554 (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
556 and define
= string wrap
(* #define s *) * (define_kind
* define_val
)
559 | DefineFunc
of ((string wrap
) wrap2 list
) wrap
(* () *)
561 (* most common case; e.g. to define int constant *)
562 | DefineExpr
of expression
564 | DefineStmt
of statement
565 | DefineType
of fullType
566 | DefineDoWhileZero
of (statement
* expression
) wrap
(* do { } while(0) *)
568 | DefineFunction
of definition
569 | DefineInit
of initialiser
(* in practice only { } with possible ',' *)
571 (* TODO DefineMulti of define_val list *)
573 | DefineText
of string wrap
581 { i_include
: inc_file wrap
; (* #include s *)
582 (* cocci: computed in ? *)
583 i_rel_pos
: include_rel_pos
option ref;
584 (* cocci: cf -test incl *)
586 (* cf cpp_ast_c.ml. set to None at parsing time. *)
587 i_content
: (Common.filename
(* full path *) * program
) option;
590 | Local
of inc_elem list
591 | NonLocal
of inc_elem list
592 | Weird
of string (* ex: #include SYSTEM_H *)
593 and inc_elem
= string
595 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
597 * The first_of and last_of store the list of prefixes that was
598 * introduced by the include. On #include <a/b/x>, if the include was
599 * the first in the file, it would give in first_of the following
600 * prefixes a/b/c; a/b/; a/ ; <empty>
602 * This is set after parsing, in cocci.ml, in update_rel_pos.
604 and include_rel_pos
= {
605 first_of
: string list list
;
606 last_of
: string list list
;
611 (* todo? to specialize if someone need more info *)
612 and ifdef_directive
= (* or and 'a ifdefed = 'a list wrap *)
613 | IfdefDirective
of (ifdefkind
* matching_tag
) wrap
615 | Ifdef
(* todo? of string ? of formula_cpp ? *)
616 | IfdefElseif
(* same *)
617 | IfdefElse
(* same *)
619 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
620 * a global so it means if you parse the same file twice you may get
621 * different id. I try now to avoid this pb by resetting it each
622 * time I parse a file.
625 IfdefTag
of (int (* tag *) * int (* total with this tag *))
631 (* ------------------------------------------------------------------------- *)
632 (* The toplevels elements *)
633 (* ------------------------------------------------------------------------- *)
635 | Declaration
of declaration
636 | Definition
of definition
639 | CppTop
of cpp_directive
640 | IfdefTop
of ifdef_directive
(* * toplevel list *)
643 | MacroTop
of string * argument wrap2 list
* il
645 | EmptyDef
of il
(* gccext: allow redundant ';' *)
646 | NotParsedCorrectly
of il
648 | FinalDef
of info
(* EOF *)
650 (* ------------------------------------------------------------------------- *)
651 and program
= toplevel list
654 (*****************************************************************************)
656 (*****************************************************************************)
657 (* Was previously in pattern.ml, but because of the transformer,
658 * we need to decorate each token with some cocci code AND the environment
659 * for this cocci code.
661 and metavars_binding
= (Ast_cocci.meta_name
, metavar_binding_kind
) assoc
662 and metavar_binding_kind
=
663 | MetaIdVal
of string
664 | MetaFuncVal
of string
665 | MetaLocalFuncVal
of string
667 | MetaExprVal
of expression
(* a "clean expr" *)
668 | MetaExprListVal
of argument wrap2 list
669 | MetaParamVal
of parameterType
670 | MetaParamListVal
of parameterType wrap2 list
672 | MetaTypeVal
of fullType
673 | MetaInitVal
of initialiser
674 | MetaStmtVal
of statement
676 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
677 * because don't need to have the value for a position in the env of
678 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
679 * variables accessible via SmPL whereas the position can be one day
680 * so I think it's better to put MetaPosVal here *)
681 | MetaPosVal
of (Ast_cocci.fixpos
* Ast_cocci.fixpos
) (* max, min *)
683 (Common.filename
* string (*element*) * posl
* posl
) list
(* min, max *)
684 | MetaListlenVal
of int
687 (*****************************************************************************)
689 (*****************************************************************************)
691 (* convention: I often use "m" for comments as I can not use "c"
692 * (already use for c stuff) and "com" is too long.
695 (* this type will be associated to each token.
697 and comments_around
= {
698 mbefore
: Token_c.comment_like_token list
;
699 mafter
: Token_c.comment_like_token list
;
701 (* old: can do something simpler than CComment for coccinelle, cf above.
702 mbefore: comment_and_relative_pos list;
703 mafter: comment_and_relative_pos list;
704 and comment_and_relative_pos = {
706 minfo: Common.parse_info;
707 (* the int represent the number of lines of difference between the
708 * current token and the comment. When on same line, this number is 0.
709 * When previous line, -1. In some way the after/before in previous
710 * record is useless because the sign of the integer can helps
711 * do the difference too, but I keep it that way.
715 * cppbetween: bool; touse? if false positive
716 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
720 and comment
= Common.parse_info
721 and com
= comment list
ref
727 (*****************************************************************************)
728 (* Some constructors *)
729 (*****************************************************************************)
730 let nullQualif = ({const
=false; volatile
= false}, [])
733 let defaultInt = (BaseType
(IntType
(Si
(Signed
, CInt
))))
735 let noType () = ref (None
,NotTest
)
736 let noInstr = (ExprStatement
(None
), [])
737 let noTypedefDef () = None
740 let emptyMetavarsBinding =
741 ([]: metavars_binding
)
744 (Ast_cocci.CONTEXT
(Ast_cocci.NoPos
,Ast_cocci.NOTHING
),
745 emptyMetavarsBinding)
753 (* for include, some meta information needed by cocci *)
755 ref (None
: include_rel_pos
option)
760 (* When want add some info in ast that does not correspond to
761 * an existing C element.
762 * old: or when don't want 'synchronize' on it in unparse_c.ml
763 * (now have other mark for tha matter).
765 let no_virt_pos = ({str
="";charpos
=0;line
=0;column
=0;file
=""},-1)
768 { pinfo
= FakeTok
("",no_virt_pos);
769 cocci_tag
= ref emptyAnnot;
770 comments_tag
= ref emptyComments;
775 let noi_content = (None
: ((Common.filename
* program
) option))
777 (*****************************************************************************)
779 (*****************************************************************************)
785 let unwrap_expr ((unwrap_e
, typ
), iie
) = unwrap_e
786 let rewrap_expr ((_old_unwrap_e
, typ
), iie
) newe
= ((newe
, typ
), iie
)
788 let get_type_expr ((unwrap_e
, typ
), iie
) = !typ
789 let set_type_expr ((unwrap_e
, oldtyp
), iie
) newtyp
=
791 (* old: (unwrap_e, newtyp), iie *)
793 let get_onlytype_expr ((unwrap_e
, typ
), iie
) =
795 | Some
(ft
,_local
), _test
-> Some ft
798 let get_onlylocal_expr ((unwrap_e
, typ
), iie
) =
800 | Some
(ft
,local
), _test
-> Some local
805 let unwrap_typeC (qu
, (typeC
, ii
)) = typeC
806 let rewrap_typeC (qu
, (typeC
, ii
)) newtypeC
= (qu
, (newtypeC
, ii
))
809 (* ------------------------------------------------------------------------- *)
810 let rewrap_str s ii
=
813 OriginTok pi
-> OriginTok
{ pi
with Common.str
= s
;}
814 | ExpandedTok
(pi
,vpi
) -> ExpandedTok
({ pi
with Common.str
= s
;},vpi
)
815 | FakeTok
(_
,vpi
) -> FakeTok
(s
,vpi
)
816 | AbstractLineTok pi
-> OriginTok
{ pi
with Common.str
= s
;})}
818 let rewrap_pinfo pi ii
=
821 (* info about the current location *)
822 let get_pi = function
824 | ExpandedTok
(_
,(pi
,_
)) -> pi
825 | FakeTok
(_
,(pi
,_
)) -> pi
826 | AbstractLineTok pi
-> pi
829 let get_opi = function
831 | ExpandedTok
(pi
,_
) -> pi
832 | FakeTok
(_
,_
) -> failwith
"no position information"
833 | AbstractLineTok pi
-> pi
837 OriginTok pi
-> pi
.Common.str
838 | ExpandedTok
(pi
,_
) -> pi
.Common.str
840 | AbstractLineTok pi
-> pi
.Common.str
845 | ExpandedTok
(_
,(pi
,_
)) -> f pi
846 | FakeTok
(_
,(pi
,_
)) -> f pi
847 | AbstractLineTok pi
-> f pi
849 let get_orig_info f ii
=
852 | ExpandedTok
(pi
,_
) -> f pi
853 | FakeTok
(_
,(pi
,_
)) -> f pi
854 | AbstractLineTok pi
-> f pi
856 let make_expanded ii
=
857 {ii
with pinfo
= ExpandedTok
(get_opi ii
.pinfo
,no_virt_pos)}
859 let pos_of_info ii
= get_info (function x
-> x
.Common.charpos
) ii
860 let opos_of_info ii
= get_orig_info (function x
-> x
.Common.charpos
) ii
861 let line_of_info ii
= get_orig_info (function x
-> x
.Common.line
) ii
862 let col_of_info ii
= get_orig_info (function x
-> x
.Common.column
) ii
863 let file_of_info ii
= get_orig_info (function x
-> x
.Common.file
) ii
864 let mcode_of_info ii
= fst
(!(ii
.cocci_tag
))
865 let pinfo_of_info ii
= ii
.pinfo
866 let parse_info_of_info ii
= get_pi ii
.pinfo
870 FakeTok
(_
,_
) -> true
873 let is_origintok ii
=
875 | OriginTok pi
-> true
878 (* ------------------------------------------------------------------------- *)
879 type posrv
= Real
of Common.parse_info
| Virt
of virtual_position
881 let compare_pos ii1 ii2
=
882 let get_pos = function
883 OriginTok pi
-> Real pi
884 | FakeTok
(s
,vpi
) -> Virt vpi
885 | ExpandedTok
(pi
,vpi
) -> Virt vpi
886 | AbstractLineTok pi
-> Real pi
in (* used for printing *)
887 let pos1 = get_pos (pinfo_of_info ii1
) in
888 let pos2 = get_pos (pinfo_of_info ii2
) in
889 match (pos1,pos2) with
890 (Real p1
, Real p2
) ->
891 compare p1
.Common.charpos p2
.Common.charpos
892 | (Virt
(p1
,_
), Real p2
) ->
893 if (compare p1
.Common.charpos p2
.Common.charpos
) =|= (-1) then (-1) else 1
894 | (Real p1
, Virt
(p2
,_
)) ->
895 if (compare p1
.Common.charpos p2
.Common.charpos
) =|= 1 then 1 else (-1)
896 | (Virt
(p1
,o1
), Virt
(p2
,o2
)) ->
897 let poi1 = p1
.Common.charpos
in
898 let poi2 = p2
.Common.charpos
in
899 match compare
poi1 poi2 with
904 let equal_posl (l1
,c1
) (l2
,c2
) =
905 (l1
=|= l2
) && (c1
=|= c2
)
907 let info_to_fixpos ii
=
908 match pinfo_of_info ii
with
909 OriginTok pi
-> Ast_cocci.Real pi
.Common.charpos
910 | ExpandedTok
(_
,(pi
,offset
)) ->
911 Ast_cocci.Virt
(pi
.Common.charpos
,offset
)
912 | FakeTok
(_
,(pi
,offset
)) ->
913 Ast_cocci.Virt
(pi
.Common.charpos
,offset
)
914 | AbstractLineTok pi
-> failwith
"unexpected abstract"
917 let is_test (e
: expression
) =
918 let (_
,info
) = unwrap e
in
919 let (_
,test
) = !info
in
922 (*****************************************************************************)
924 (*****************************************************************************)
926 (* When we have extended the C Ast to add some info to the tokens,
927 * such as its line number in the file, we can not use anymore the
928 * ocaml '=' to compare Ast elements. To overcome this problem, to be
929 * able to use again '=', we just have to get rid of all those extra
930 * information, to "abstract those line" (al) information.
932 * Julia then modifies it a little to have a tokenindex, so the original
933 * true al_info is in fact real_al_info.
936 let al_info tokenindex x
=
939 {charpos
= tokenindex
;
943 str
= str_of_info x
});
944 cocci_tag
= ref emptyAnnot;
945 comments_tag
= ref emptyComments;
950 cocci_tag
= ref emptyAnnot;
951 comments_tag
= ref emptyComments;
954 let magic_real_number = -10
959 {charpos
= magic_real_number;
960 line
= magic_real_number;
961 column
= magic_real_number;
963 str
= str_of_info x
});
964 cocci_tag
= ref emptyAnnot;
965 comments_tag
= ref emptyComments;
970 List.filter
(function (Token_c.TCommentCpp _
,_
) -> true | _
-> false) l
in
972 (x
,{i
with Common.charpos
= magic_real_number;
973 Common.line
= magic_real_number;
974 Common.column
= magic_real_number}) in
975 {mbefore
= []; (* duplicates mafter of the previous token *)
976 mafter
= List.map
al_com (keep_cpp x
.mafter
)}
978 let al_info_cpp tokenindex x
=
981 {charpos
= tokenindex
;
985 str
= str_of_info x
});
986 cocci_tag
= ref emptyAnnot;
987 comments_tag
= ref (al_comments !(x
.comments_tag
));
990 let semi_al_info_cpp x
=
992 cocci_tag
= ref emptyAnnot;
993 comments_tag
= ref (al_comments !(x
.comments_tag
));
996 let real_al_info_cpp x
=
999 {charpos
= magic_real_number;
1000 line
= magic_real_number;
1001 column
= magic_real_number;
1003 str
= str_of_info x
});
1004 cocci_tag
= ref emptyAnnot;
1005 comments_tag
= ref (al_comments !(x
.comments_tag
));
1009 (*****************************************************************************)
1011 (*****************************************************************************)
1013 (* Transform a list of arguments (or parameters) where the commas are
1014 * represented via the wrap2 and associated with an element, with
1015 * a list where the comma are on their own. f(1,2,2) was
1016 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1018 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1019 * the ast_cocci of julia and ast_c.
1021 let rec (split_comma
: 'a wrap2 list
-> ('a
, il
) either list
) =
1026 then (Left e
)::split_comma xs
1027 else Right ii
::Left e
::split_comma xs
1029 let rec (unsplit_comma
: ('a
, il
) either list
-> 'a wrap2 list
) =
1032 | Right ii
::Left e
::xs
->
1033 (e
, ii
)::unsplit_comma xs
1035 let empty_ii = [] in
1036 (e
, empty_ii)::unsplit_comma xs
1043 (*****************************************************************************)
1044 (* Helpers, could also be put in lib_parsing_c.ml instead *)
1045 (*****************************************************************************)
1047 let rec stmt_elems_of_sequencable xs
=
1048 xs
+> Common.map
(fun x
->
1051 | CppDirectiveStmt _
1054 pr2_once
("stmt_elems_of_sequencable: filter a directive");
1056 | IfdefStmt2
(_ifdef
, xxs
) ->
1057 pr2
("stmt_elems_of_sequencable: IfdefStm2 TODO?");
1058 xxs
+> List.map
(fun xs
->
1059 let xs'
= stmt_elems_of_sequencable xs in
1067 (* should maybe be in pretty_print_c ? *)
1069 let s_of_inc_file inc_file
=
1071 | Local
xs -> xs +> Common.join
"/"
1072 | NonLocal
xs -> xs +> Common.join
"/"
1075 let s_of_inc_file_bis inc_file
=
1077 | Local
xs -> "\"" ^
xs +> Common.join
"/" ^
"\""
1078 | NonLocal
xs -> "<" ^
xs +> Common.join
"/" ^
">"
1081 let fieldname_of_fieldkind fieldkind
=
1082 match fieldkind
with
1083 | Simple
(sopt
, ft
) -> sopt
1084 | BitField
(sopt
, ft
, info
, expr
) -> sopt
1087 let s_of_attr attr
=
1089 +> List.map
(fun (Attribute s
, ii
) -> s
)
1092 let str_of_name ident
=
1094 | RegularName
(s
,ii
) -> s
1095 | CppConcatenatedName
xs ->
1096 xs +> List.map
(fun (x
,iiop
) -> unwrap x
) +> Common.join
"##"
1097 | CppVariadicName
(s
, ii
) -> "##" ^ s
1098 | CppIdentBuilder
((s
,iis
), xs) ->
1100 (xs +> List.map
(fun ((x
,iix
), iicomma
) -> x
) +> Common.join
",") ^
1103 let info_of_name ident
=
1105 | RegularName
(s
,ii
) -> List.hd ii
1106 | CppConcatenatedName
xs ->
1108 | [] -> raise Impossible
1109 | ((x
,ii1
),ii2
)::xs ->
1112 | CppVariadicName
(s
, ii
) ->
1113 let (iihash
, iis
) = Common.tuple_of_list2 ii
in
1115 | CppIdentBuilder
((s
,iis
),xs) ->
1118 let get_s_and_ii_of_name name
=
1120 | RegularName
(s
, iis
) -> s
, List.hd iis
1124 let name_of_parameter param
=
1125 param
.p_namei
+> Common.map_option
(str_of_name)