(* Yoann Padioleau * * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License (GPL) * version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * file license.txt for more details. *) open Common (*****************************************************************************) (* The AST C related types *) (*****************************************************************************) (* * Some stuff are tagged semantic: which means that they are computed * after parsing. * * This means that some elements in this AST are present only if * some annotation/transformation has been done on the original AST returned * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc. *) (* ------------------------------------------------------------------------- *) (* Token/info *) (* ------------------------------------------------------------------------- *) (* To allow some transformations over the AST, we keep as much information * as possible in the AST such as the tokens content and their locations. * Those info are called 'info' (how original) and can be tagged. * For instance one tag may say that the unparser should remove this token. * * Update: Now I use a ref! in those 'info' so take care. * That means that modifications of the info of tokens can have * an effect on the info stored in the ast (which is sometimes * convenient, cf unparse_c.ml or comment_annotater_c.ml) * * convention: I often use 'ii' for the name of a list of info. * * Sometimes we want to add someting at the beginning or at the end * of a construct. For 'function' and 'decl' we want to add something * to their left and for 'if' 'while' et 'for' and so on at their right. * We want some kinds of "virtual placeholders" that represent the start or * end of a construct. We use fakeInfo for that purpose. * To identify those cases I have added a fakestart/fakeend comment. * * cocci: Each token will be decorated in the future by the mcodekind * of cocci. It is the job of the pretty printer to look at this * information and decide to print or not the token (and also the * pending '+' associated sometimes with the token). * * The first time that we parse the original C file, the mcodekind is * empty, or more precisely all is tagged as a CONTEXT with NOTHING * associated. This is what I call a "clean" expr/statement/.... * * Each token will also be decorated in the future with an environment, * because the pending '+' may contain metavariables that refer to some * C code. * *) (* for unparser: *) type posl = int * int (* line-col, for MetaPosValList, for position variables *) (* with sexp *) (* the virtual position is set in Parsing_hacks.insert_virtual_positions *) type virtual_position = Common.parse_info * int (* character offset *) (* with sexp *) type parse_info = (* Present both in ast and list of tokens *) | OriginTok of Common.parse_info (* Present only in ast and generated after parsing. Used mainly * by Julia, to add stuff at virtual places, beginning of func or decl *) | FakeTok of string * virtual_position (* Present both in ast and list of tokens. *) | ExpandedTok of Common.parse_info * virtual_position (* Present neither in ast nor in list of tokens * but only in the '+' of the mcode of some tokens. Those kind of tokens * are used to be able to use '=' to compare big ast portions. *) | AbstractLineTok of Common.parse_info (* local to the abstracted thing *) (* with sexp *) type info = { pinfo : parse_info; (* this cocci_tag can be changed, which is how we can express some program * transformations by tagging the tokens involved in this transformation. *) cocci_tag: (Ast_cocci.mcodekind * metavars_binding list) option ref; (* set in comment_annotater_c.ml *) comments_tag: comments_around ref; (* annotations on the token (mutable) *) mutable annots_tag: Token_annot.annots (* todo? token_info : sometimes useful to know what token it was *) } and il = info list (* wrap2 is like wrap, except that I use it often for separator such * as ','. In that case the info is associated to the argument that * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. * * wrap3 is like wrap, except that I use it in case sometimes it * will be empty because the info will be included in a nested * entity (e.g. for Ident in expr because it's inlined in the name) * so user should never assume List.length wrap3 > 0. *) and 'a wrap = 'a * il and 'a wrap2 = 'a * il and 'a wrap3 = 'a * il (* * evotype*) (* ------------------------------------------------------------------------- *) (* Name *) (* ------------------------------------------------------------------------- *) (* was called 'ident' before, but 'name' is I think better * as concatenated strings can be used not only for identifiers and for * declarators, but also for fields, for labels, etc. * * Note: because now the info is embeded in the name, the info for * expression like Ident, or types like Typename, are not anymore * stored in the expression or type. Hence if you assume this, * which was true before, you are now wrong. So never write code like * let (unwrape,_), ii = e and use 'ii' believing it contains * the local ii to e. If you want to do that, use the appropiate * wrapper get_local_ii_of_expr_inlining_ii_of_name. *) and name = | RegularName of string wrap | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list (* normally only used inside list of things, as in parameters or arguments * in which case, cf cpp-manual, it has a special meaning *) | CppVariadicName of string wrap (* ## s *) | CppIdentBuilder of string wrap (* s ( ) *) * ((string wrap) wrap2 list) (* arguments *) (* ------------------------------------------------------------------------- *) (* C Type *) (* ------------------------------------------------------------------------- *) (* Could have more precise type in fullType, in expression, etc, but * it requires to do too much things in parsing such as checking no * conflicting structname, computing value, etc. Better to separate * concern. So I put '=>' to mean what we would really like. In fact * what we really like is defining another fullType, expression, etc * from scratch, because many stuff are just sugar. * * invariant: Array and FunctionType have also typeQualifier but they * dont have sense. I put this to factorise some code. If you look in * the grammar, you see that we can never specify const for the array * himself (but we can do it for pointer) or function, we always * have in the action rule of the grammar a { (nQ, FunctionType ...) }. * * * Because of ExprStatement, we can have more 'new scope' events, but * rare I think. For instance with 'array of constExpression' there can * have an exprStatement and a new (local) struct defined. Same for * Constructor. * *) and fullType = typeQualifier * typeC and typeC = typeCbis wrap (* todo reput wrap3 *) and typeCbis = NoType (* for c++ only, and for K&R C *) | BaseType of baseType | Pointer of fullType | Array of constExpression option * fullType | FunctionType of functionType | Enum of string option * enumType | StructUnion of structUnion * string option * structType (* new scope *) | EnumName of string | StructUnionName of structUnion * string | TypeName of name * fullType option (* semantic: filled later *) | ParenType of fullType (* for unparser: *) (* gccext: TypeOfType below may seems useless; Why declare a * __typeof__(int) x; ? * When used with macros, it allows to fix a problem of C which * is that type declaration can be spread around the ident. Indeed it * may be difficult to have a macro such as * '#define macro(type, ident) type ident;' * because when you want to do a * macro(char[256], x), * then it will generate invalid code, but with a * '#define macro(type, ident) __typeof(type) ident;' * it will work. *) | TypeOfExpr of expression | TypeOfType of fullType (* cppext: IfdefType TODO *) (* -------------------------------------- *) and baseType = Void | IntType of intType | FloatType of floatType | SizeType | SSizeType | PtrDiffType (* stdC: type section * add a | SizeT ? * note: char and signed char are semantically different!! *) and intType = CChar (* obsolete? | CWchar *) | Si of signed and signed = sign * base and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *) and sign = Signed | UnSigned and floatType = CFloat | CDouble | CLongDouble (* -------------------------------------- *) and structUnion = Struct | Union and structType = field list and field = | DeclarationField of field_declaration (* gccext: *) | EmptyField of info (* cppext: *) | MacroDeclField of (string * argument wrap2 list) wrap (* optional ';'*) (* cppext: *) | CppDirectiveStruct of cpp_directive | IfdefStruct of ifdef_directive (* * field list list *) (* before unparser, I didn't have a FieldDeclList but just a Field. *) and field_declaration = | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *) (* At first I thought that a bitfield could be only Signed/Unsigned. * But it seems that gcc allow char i:4. C rule must say that you * can cast into int so enum too, ... *) and fieldkind = | Simple of name option * fullType | BitField of name option * fullType * info (* : *) * constExpression (* fullType => BitFieldInt | BitFieldUnsigned *) (* -------------------------------------- *) and enumType = oneEnumType wrap2 (* , *) list (* => string * int list *) and oneEnumType = name * (info (* = *) * constExpression) option (* -------------------------------------- *) (* return * (params * has "...") *) and functionType = fullType * (parameterType wrap2 list * bool wrap) and parameterType = { p_namei: name option; p_register: bool wrap; p_type: fullType; } (* => (bool (register) * fullType) list * bool *) and typeQualifier = typeQualifierbis wrap and typeQualifierbis = {const: bool; volatile: bool} (* gccext: cppext: *) and attribute = attributebis wrap and attributebis = | Attribute of string (* ------------------------------------------------------------------------- *) (* C expression *) (* ------------------------------------------------------------------------- *) and expression = (expressionbis * exp_info ref (* semantic: *)) wrap3 and exp_info = exp_type option * test and exp_type = fullType (* Type_c.completed_and_simplified *) * local and local = LocalVar of parse_info | StaticLocalVar of parse_info | NotLocalVar (* cocci: *) and test = Test | NotTest (* cocci: *) and expressionbis = (* Ident can be a enumeration constant, a simple variable, a name of a func. * With cppext, Ident can also be the name of a macro. Sparse says * "an identifier with a meaning is a symbol" *) | Ident of name (* todo? more semantic info such as LocalFunc *) | Constant of constant | FunCall of expression * argument wrap2 (* , *) list (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *) | CondExpr of expression * expression option * expression (* should be considered as statements, bad C langage *) | Sequence of expression * expression | Assignment of expression * assignOp * expression | Postfix of expression * fixOp | Infix of expression * fixOp | Unary of expression * unaryOp | Binary of expression * binaryOp * expression | ArrayAccess of expression * expression (* field ident access *) | RecordAccess of expression * name | RecordPtAccess of expression * name (* redundant normally, could replace it by DeRef RecordAcces *) | SizeOfExpr of expression | SizeOfType of fullType | Cast of fullType * expression (* gccext: *) | StatementExpr of compound wrap (* ( ) new scope *) | Constructor of fullType * initialiser (* for unparser: *) | ParenExpr of expression (* for C++: *) | New of (argument wrap2 (* , *) list) option * argument | Delete of expression (* cppext: IfdefExpr TODO *) (* cppext: normally just expression *) and argument = (expression, weird_argument) Common.either and weird_argument = | ArgType of parameterType | ArgAction of action_macro and action_macro = (* todo: ArgStatement of statement, possibly have ghost token *) | ActMisc of il (* I put string for Int and Float because int would not be enough because * OCaml int are 31 bits. So simpler to do string. Same reason to have * string instead of int list for the String case. * * note: -2 is not a constant, it is the unary operator '-' * applied to constant 2. So the string must represent a positive * integer only. *) and constant = | String of (string * isWchar) | MultiString of string list (* can contain MacroString, todo: more info *) | Char of (string * isWchar) (* normally it is equivalent to Int *) | Int of (string * intType) | Float of (string * floatType) and isWchar = IsWchar | IsChar and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not | GetRefLabel (* gccext: GetRefLabel, via &&label notation *) and assignOp = SimpleAssign | OpAssign of arithOp and fixOp = Dec | Inc and binaryOp = Arith of arithOp | Logical of logicalOp and arithOp = | Plus | Minus | Mul | Div | Mod | DecLeft | DecRight | And | Or | Xor | Max | Min and logicalOp = | Inf | Sup | InfEq | SupEq | Eq | NotEq | AndLog | OrLog and constExpression = expression (* => int *) (* ------------------------------------------------------------------------- *) (* C statement *) (* ------------------------------------------------------------------------- *) (* note: that assignement is not a statement but an expression; * wonderful C langage. * * note: I use 'and' for type definition cos gccext allow statement as * expression, so need mutual recursive type definition. * *) and statement = statementbis wrap3 and statementbis = | Labeled of labeled | Compound of compound (* new scope *) | ExprStatement of exprStatement | Selection of selection (* have fakeend *) | Iteration of iteration (* have fakeend *) | Jump of jump (* simplify cocci: only at the beginning of a compound normally *) | Decl of declaration (* gccext: *) | Asm of asmbody | NestedFunc of definition (* cppext: *) | MacroStmt and labeled = Label of name * statement | Case of expression * statement | CaseRange of expression * expression * statement (* gccext: *) | Default of statement (* cppext: * old: compound = (declaration list * statement list) * old: (declaration, statement) either list * Simplify cocci to just have statement list, by integrating Decl in stmt. * * update: now introduce also the _sequencable to allow ifdef in the middle. * Indeed, I now allow ifdefs in the ast but they must be only between * "sequencable" elements. They can be put in a type only if this type * is used in a list, like at the toplevel, used in a 'toplevel list', * or inside a compound, used in a 'statement list'. I must not allow * ifdef anywhere. For instance I can not make ifdef a statement * cos some instruction like If accept only one statement and the * ifdef directive must not take the place of a legitimate instruction. * We had a similar phenomena in SmPL where we have the notion * of statement and sequencable statement too. Once you have * such a type of sequencable thing, then s/xx list/xx_sequencable list/ * and introduce the ifdef. * * update: those ifdefs are either passed, or present in the AST but in * a flat form. To structure those flat ifdefs you have to run * a transformation that will put in a tree the statements inside * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference * between a IfdefStmt (flat) and IfdefStmt2 (tree structured). * *) and compound = statement_sequencable list (* cppext: easier to put at statement_list level than statement level *) and statement_sequencable = | StmtElem of statement (* cppext: *) | CppDirectiveStmt of cpp_directive | IfdefStmt of ifdef_directive (* this will be build in cpp_ast_c from the previous flat IfdefStmt *) | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list and exprStatement = expression option and declOrExpr = ForDecl of declaration | ForExp of expression option wrap (* for Switch, need check that all elements in the compound start * with a case:, otherwise unreachable code. *) and selection = | If of expression * statement * statement | Switch of expression * statement and iteration = | While of expression * statement | DoWhile of statement * expression | For of declOrExpr * exprStatement wrap * exprStatement wrap * statement (* cppext: *) | MacroIteration of string * argument wrap2 list * statement and jump = Goto of name | Continue | Break | Return | ReturnExpr of expression | GotoComputed of expression (* gccext: goto *exp ';' *) (* gccext: *) and asmbody = il (* string list *) * colon wrap (* : *) list and colon = Colon of colon_option wrap2 list and colon_option = colon_option_bis wrap and colon_option_bis = ColonMisc | ColonExpr of expression (* ------------------------------------------------------------------------- *) (* Declaration *) (* ------------------------------------------------------------------------- *) (* (string * ...) option cos can have empty declaration or struct tag * declaration. * * Before I had a Typedef constructor, but why make this special case and not * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2 * declarations ? So I try to generalise and not have Typedef either. This * requires more work in parsing. Better to separate concern. * * Before the need for unparser, I didn't have a DeclList but just a Decl. * * I am not sure what it means to declare a prototype inline, but gcc * accepts it. *) and declaration = | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *) (* cppext: *) (* bool is true if there is a ; at the end *) | MacroDecl of (string * argument wrap2 list * bool) wrap (* fakestart *) | MacroDeclInit of (string * argument wrap2 list * initialiser) wrap (* fakestart *) and onedecl = { v_namei: (name * v_init) option; v_type: fullType; (* semantic: set in type annotated and used in cocci_vs_c * when we transform some initialisation into affectation *) v_type_bis: fullType (* Type_c.completed_and_simplified *) option ref; v_storage: storage; v_local: local_decl; (* cocci: *) v_attr: attribute list; (* gccext: *) } and v_init = NoInit | ValInit of info * initialiser | ConstrInit of argument wrap2 (* , *) list wrap and storage = storagebis * bool (* gccext: inline or not *) and storagebis = NoSto | StoTypedef | Sto of storageClass and storageClass = Auto | Static | Register | Extern and local_decl = LocalDecl | NotLocalDecl (* fullType is the type used if the type should be converted to an assignment. It can be adjusted in the type annotation phase when typedef information is availalble *) and initialiser = initialiserbis wrap and initialiserbis = | InitExpr of expression | InitList of initialiser wrap2 (* , *) list (* gccext: *) | InitDesignators of designator list * initialiser | InitFieldOld of string * initialiser | InitIndexOld of expression * initialiser (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *) and designator = designatorbis wrap and designatorbis = | DesignatorField of string | DesignatorIndex of expression | DesignatorRange of expression * expression (* ------------------------------------------------------------------------- *) (* Function definition *) (* ------------------------------------------------------------------------- *) (* Normally we should define another type functionType2 because there * are more restrictions on what can define a function than a pointer * function. For instance a function declaration can omit the name of the * parameter whereas a function definition can not. But, in some cases such * as 'f(void) {', there is no name too, so I simplified and reused the * same functionType type for both declaration and function definition. * * Also old style C does not have type in the parameter, so again simpler * to abuse the functionType and allow missing type. *) and definition = definitionbis wrap (* ( ) { } fakestart sto *) and definitionbis = { f_name: name; f_type: functionType; (* less? a functionType2 ? *) f_storage: storage; f_body: compound; f_attr: attribute list; (* gccext: *) f_old_c_style: declaration list option; } (* cppext: IfdefFunHeader TODO *) (* ------------------------------------------------------------------------- *) (* cppext: cpp directives, #ifdef, #define and #include body *) (* ------------------------------------------------------------------------- *) and cpp_directive = | Define of define | Include of includ | PragmaAndCo of il (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *) and define = string wrap (* #define s eol *) * (define_kind * define_val) and define_kind = | DefineVar | DefineFunc of ((string wrap) wrap2 list) wrap (* () *) | Undef and define_val = (* most common case; e.g. to define int constant *) | DefineExpr of expression | DefineStmt of statement | DefineType of fullType | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *) | DefineFunction of definition | DefineInit of initialiser (* in practice only { } with possible ',' *) | DefineMulti of statement list | DefineText of string wrap | DefineEmpty | DefineTodo and includ = { i_include: inc_file wrap; (* #include s *) (* cocci: computed in ? *) i_rel_pos: include_rel_pos option ref; (* cocci: cf -test incl *) i_is_in_ifdef: bool; (* cf cpp_ast_c.ml. set to None at parsing time. *) i_content: (Common.filename (* full path *) * program) option; } and inc_file = | Local of inc_elem list | NonLocal of inc_elem list | Weird of string (* ex: #include SYSTEM_H *) and inc_elem = string (* cocci: to tag the first of #include and last of #include * * The first_of and last_of store the list of prefixes that was * introduced by the include. On #include , if the include was * the first in the file, it would give in first_of the following * prefixes a/b/c; a/b/; a/ ; * * This is set after parsing, in cocci.ml, in update_rel_pos. *) and include_rel_pos = { first_of : string list list; last_of : string list list; } (* todo? to specialize if someone need more info *) and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *) | IfdefDirective of (ifdefkind * matching_tag) wrap and ifdefkind = | Ifdef (* todo? of string ? of formula_cpp ? *) | IfdefElseif (* same *) | IfdefElse (* same *) | IfdefEndif (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use * a global so it means if you parse the same file twice you may get * different id. I try now to avoid this pb by resetting it each * time I parse a file. *) and matching_tag = IfdefTag of (int (* tag *) * int (* total with this tag *)) (* ------------------------------------------------------------------------- *) (* The toplevels elements *) (* ------------------------------------------------------------------------- *) and toplevel = | Declaration of declaration | Definition of definition (* cppext: *) | CppTop of cpp_directive | IfdefTop of ifdef_directive (* * toplevel list *) (* cppext: *) | MacroTop of string * argument wrap2 list * il | EmptyDef of il (* gccext: allow redundant ';' *) | NotParsedCorrectly of il | FinalDef of info (* EOF *) (* c++ *) | Namespace of toplevel list * il (* ------------------------------------------------------------------------- *) and program = toplevel list (*****************************************************************************) (* Cocci Bindings *) (*****************************************************************************) (* Was previously in pattern.ml, but because of the transformer, * we need to decorate each token with some cocci code AND the environment * for this cocci code. *) and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc and metavar_binding_kind = | MetaIdVal of string * Ast_cocci.meta_name list (* negative constraints *) | MetaFuncVal of string | MetaLocalFuncVal of string | MetaExprVal of expression (* a "clean expr" *) * (*subterm constraints, currently exprs*) Ast_cocci.meta_name list | MetaExprListVal of argument wrap2 list | MetaParamVal of parameterType | MetaParamListVal of parameterType wrap2 list | MetaTypeVal of fullType | MetaInitVal of initialiser | MetaInitListVal of initialiser wrap2 list | MetaDeclVal of declaration | MetaFieldVal of field | MetaFieldListVal of field list | MetaStmtVal of statement (* Could also be in Lib_engine.metavars_binding2 with the ParenVal, * because don't need to have the value for a position in the env of * a '+'. But ParenVal or LabelVal are used only by CTL, they are not * variables accessible via SmPL whereas the position can be one day * so I think it's better to put MetaPosVal here *) | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *) | MetaPosValList of (Common.filename * string (*element*) * posl * posl) list (* min, max *) | MetaListlenVal of int (*****************************************************************************) (* C comments *) (*****************************************************************************) (* convention: I often use "m" for comments as I can not use "c" * (already use for c stuff) and "com" is too long. *) (* this type will be associated to each token. *) and comments_around = { mbefore: Token_c.comment_like_token list; mafter: Token_c.comment_like_token list; (* less: could remove ? do something simpler than CComment for * coccinelle, cf above. *) mbefore2: comment_and_relative_pos list; mafter2: comment_and_relative_pos list; } and comment_and_relative_pos = { minfo: Common.parse_info; (* the int represent the number of lines of difference between the * current token and the comment. When on same line, this number is 0. * When previous line, -1. In some way the after/before in previous * record is useless because the sign of the integer can helps * do the difference too, but I keep it that way. *) mpos: int; (* todo? * cppbetween: bool; touse? if false positive * is_alone_in_line: bool; (*for labels, to avoid false positive*) *) } and comment = Common.parse_info and com = comment list ref (* with sexp *) (*****************************************************************************) (* Some constructors *) (*****************************************************************************) let nullQualif = ({const=false; volatile= false}, []) let nQ = nullQualif let defaultInt = (BaseType (IntType (Si (Signed, CInt)))) let noType () = ref (None,NotTest) let noInstr = (ExprStatement (None), []) let noTypedefDef () = None let emptyMetavarsBinding = ([]: metavars_binding) let emptyAnnotCocci = (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING), ([] : metavars_binding list)) let emptyAnnot = (None: (Ast_cocci.mcodekind * metavars_binding list) option) (* compatibility mode *) let mcode_and_env_of_cocciref aref = match !aref with | Some x -> x | None -> emptyAnnotCocci let emptyComments= { mbefore = []; mafter = []; mbefore2 = []; mafter2 = []; } (* for include, some meta information needed by cocci *) let noRelPos () = ref (None: include_rel_pos option) let noInIfdef () = ref false (* When want add some info in ast that does not correspond to * an existing C element. * old: or when don't want 'synchronize' on it in unparse_c.ml * (now have other mark for tha matter). *) let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1) let fakeInfo pi = { pinfo = FakeTok ("",no_virt_pos); cocci_tag = ref emptyAnnot; annots_tag = Token_annot.empty; comments_tag = ref emptyComments; } let noii = [] let noattr = [] let noi_content = (None: ((Common.filename * program) option)) (*****************************************************************************) (* Wrappers *) (*****************************************************************************) let unwrap = fst let unwrap2 = fst let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie) let unwrap_typeC (qu, (typeC, ii)) = typeC let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii)) let unwrap_typeCbis (typeC, ii) = typeC let unwrap_st (unwrap_st, ii) = unwrap_st (* ------------------------------------------------------------------------- *) let mk_e unwrap_e ii = (unwrap_e, noType()), ii let mk_e_bis unwrap_e ty ii = (unwrap_e, ty), ii let mk_ty typeC ii = nQ, (typeC, ii) let mk_tybis typeC ii = (typeC, ii) let mk_st unwrap_st ii = (unwrap_st, ii) (* ------------------------------------------------------------------------- *) let get_ii_typeC_take_care (typeC, ii) = ii let get_ii_st_take_care (st, ii) = ii let get_ii_expr_take_care (e, ii) = ii let get_st_and_ii (st, ii) = st, ii let get_ty_and_ii (qu, (typeC, ii)) = qu, (typeC, ii) let get_e_and_ii (e, ii) = e, ii (* ------------------------------------------------------------------------- *) let get_type_expr ((unwrap_e, typ), iie) = !typ let set_type_expr ((unwrap_e, oldtyp), iie) newtyp = oldtyp := newtyp (* old: (unwrap_e, newtyp), iie *) let get_onlytype_expr ((unwrap_e, typ), iie) = match !typ with | Some (ft,_local), _test -> Some ft | None, _ -> None let get_onlylocal_expr ((unwrap_e, typ), iie) = match !typ with | Some (ft,local), _test -> Some local | None, _ -> None (* ------------------------------------------------------------------------- *) let rewrap_str s ii = {ii with pinfo = (match ii.pinfo with OriginTok pi -> OriginTok { pi with Common.str = s;} | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi) | FakeTok (_,vpi) -> FakeTok (s,vpi) | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})} let rewrap_pinfo pi ii = {ii with pinfo = pi} (* info about the current location *) let get_pi = function OriginTok pi -> pi | ExpandedTok (_,(pi,_)) -> pi | FakeTok (_,(pi,_)) -> pi | AbstractLineTok pi -> pi (* original info *) let get_opi = function OriginTok pi -> pi | ExpandedTok (pi,_) -> pi (* diff with get_pi *) | FakeTok (_,_) -> failwith "no position information" | AbstractLineTok pi -> pi let str_of_info ii = match ii.pinfo with OriginTok pi -> pi.Common.str | ExpandedTok (pi,_) -> pi.Common.str | FakeTok (s,_) -> s | AbstractLineTok pi -> pi.Common.str let get_info f ii = match ii.pinfo with OriginTok pi -> f pi | ExpandedTok (_,(pi,_)) -> f pi | FakeTok (_,(pi,_)) -> f pi | AbstractLineTok pi -> f pi let get_orig_info f ii = match ii.pinfo with OriginTok pi -> f pi | ExpandedTok (pi,_) -> f pi (* diff with get_info *) | FakeTok (_,(pi,_)) -> f pi | AbstractLineTok pi -> f pi let make_expanded ii = {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)} let pos_of_info ii = get_info (function x -> x.Common.charpos) ii let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii let line_of_info ii = get_orig_info (function x -> x.Common.line) ii let col_of_info ii = get_orig_info (function x -> x.Common.column) ii let file_of_info ii = get_orig_info (function x -> x.Common.file) ii let mcode_of_info ii = fst (mcode_and_env_of_cocciref ii.cocci_tag) let pinfo_of_info ii = ii.pinfo let parse_info_of_info ii = get_pi ii.pinfo let strloc_of_info ii = spf "%s:%d" (file_of_info ii) (line_of_info ii) let is_fake ii = match ii.pinfo with FakeTok (_,_) -> true | _ -> false let is_origintok ii = match ii.pinfo with | OriginTok pi -> true | _ -> false (* ------------------------------------------------------------------------- *) type posrv = Real of Common.parse_info | Virt of virtual_position let compare_pos ii1 ii2 = let get_pos = function OriginTok pi -> Real pi | FakeTok (s,vpi) -> Virt vpi | ExpandedTok (pi,vpi) -> Virt vpi | AbstractLineTok pi -> Real pi in (* used for printing *) let pos1 = get_pos (pinfo_of_info ii1) in let pos2 = get_pos (pinfo_of_info ii2) in match (pos1,pos2) with (Real p1, Real p2) -> compare p1.Common.charpos p2.Common.charpos | (Virt (p1,_), Real p2) -> if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1 | (Real p1, Virt (p2,_)) -> if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1) | (Virt (p1,o1), Virt (p2,o2)) -> let poi1 = p1.Common.charpos in let poi2 = p2.Common.charpos in match compare poi1 poi2 with -1 -> -1 | 0 -> compare o1 o2 | x -> x let equal_posl (l1,c1) (l2,c2) = (l1 =|= l2) && (c1 =|= c2) let compare_posl (l1,c1) (l2,c2) = match l2 - l1 with 0 -> c2 - c1 | r -> r let info_to_fixpos ii = match pinfo_of_info ii with OriginTok pi -> Ast_cocci.Real pi.Common.charpos | ExpandedTok (_,(pi,offset)) -> Ast_cocci.Virt (pi.Common.charpos,offset) | FakeTok (_,(pi,offset)) -> Ast_cocci.Virt (pi.Common.charpos,offset) | AbstractLineTok pi -> failwith "unexpected abstract" (* cocci: *) let is_test (e : expression) = let (_,info), _ = e in let (_,test) = !info in test =*= Test (*****************************************************************************) (* Abstract line *) (*****************************************************************************) (* When we have extended the C Ast to add some info to the tokens, * such as its line number in the file, we can not use anymore the * ocaml '=' to compare Ast elements. To overcome this problem, to be * able to use again '=', we just have to get rid of all those extra * information, to "abstract those line" (al) information. * * Julia then modifies it a little to have a tokenindex, so the original * true al_info is in fact real_al_info. *) let al_info tokenindex x = { pinfo = (AbstractLineTok {charpos = tokenindex; line = tokenindex; column = tokenindex; file = ""; str = str_of_info x}); cocci_tag = ref emptyAnnot; annots_tag = Token_annot.empty; comments_tag = ref emptyComments; } let semi_al_info x = { x with cocci_tag = ref emptyAnnot; comments_tag = ref emptyComments; } let magic_real_number = -10 let real_al_info x = { pinfo = (AbstractLineTok {charpos = magic_real_number; line = magic_real_number; column = magic_real_number; file = ""; str = str_of_info x}); cocci_tag = ref emptyAnnot; annots_tag = Token_annot.empty; comments_tag = ref emptyComments; } let al_comments x = let keep_cpp l = List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in let al_com (x,i) = (x,{i with Common.charpos = magic_real_number; Common.line = magic_real_number; Common.column = magic_real_number}) in {mbefore = []; (* duplicates mafter of the previous token *) mafter = List.map al_com (keep_cpp x.mafter); mbefore2=[]; mafter2=[]; } let al_info_cpp tokenindex x = { pinfo = (AbstractLineTok {charpos = tokenindex; line = tokenindex; column = tokenindex; file = ""; str = str_of_info x}); cocci_tag = ref emptyAnnot; annots_tag = Token_annot.empty; comments_tag = ref (al_comments !(x.comments_tag)); } let semi_al_info_cpp x = { x with cocci_tag = ref emptyAnnot; annots_tag = Token_annot.empty; comments_tag = ref (al_comments !(x.comments_tag)); } let real_al_info_cpp x = { pinfo = (AbstractLineTok {charpos = magic_real_number; line = magic_real_number; column = magic_real_number; file = ""; str = str_of_info x}); cocci_tag = ref emptyAnnot; annots_tag = Token_annot.empty; comments_tag = ref (al_comments !(x.comments_tag)); } (*****************************************************************************) (* Views *) (*****************************************************************************) (* Transform a list of arguments (or parameters) where the commas are * represented via the wrap2 and associated with an element, with * a list where the comma are on their own. f(1,2,2) was * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2]. * * Used in cocci_vs_c.ml, to have a more direct correspondance between * the ast_cocci of julia and ast_c. *) let rec (split_comma: 'a wrap2 list -> ('a, il) either list) = function | [] -> [] | (e, ii)::xs -> if null ii then (Left e)::split_comma xs else Right ii::Left e::split_comma xs let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) = function | [] -> [] | Right ii::Left e::xs -> (e, ii)::unsplit_comma xs | Left e::xs -> let empty_ii = [] in (e, empty_ii)::unsplit_comma xs | Right ii::_ -> raise (Impossible 59) (*****************************************************************************) (* Helpers, could also be put in lib_parsing_c.ml instead *) (*****************************************************************************) (* should maybe be in pretty_print_c ? *) let s_of_inc_file inc_file = match inc_file with | Local xs -> xs +> Common.join "/" | NonLocal xs -> xs +> Common.join "/" | Weird s -> s let s_of_inc_file_bis inc_file = match inc_file with | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\"" | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">" | Weird s -> s let fieldname_of_fieldkind fieldkind = match fieldkind with | Simple (sopt, ft) -> sopt | BitField (sopt, ft, info, expr) -> sopt let s_of_attr attr = attr +> List.map (fun (Attribute s, ii) -> s) +> Common.join "," (* ------------------------------------------------------------------------- *) let str_of_name ident = match ident with | RegularName (s,ii) -> s | CppConcatenatedName xs -> xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##" | CppVariadicName (s, ii) -> "##" ^ s | CppIdentBuilder ((s,iis), xs) -> s ^ "(" ^ (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^ ")" let get_s_and_ii_of_name name = match name with | RegularName (s, iis) -> s, iis | CppIdentBuilder ((s, iis), xs) -> s, iis | CppVariadicName (s,iis) -> let (iop, iis) = Common.tuple_of_list2 iis in s, [iis] | CppConcatenatedName xs -> (match xs with | [] -> raise (Impossible 60) | ((s,iis),noiiop)::xs -> s, iis ) let get_s_and_info_of_name name = let (s,ii) = get_s_and_ii_of_name name in s, List.hd ii let info_of_name name = let (s,ii) = get_s_and_ii_of_name name in List.hd ii let ii_of_name name = let (s,ii) = get_s_and_ii_of_name name in ii let get_local_ii_of_expr_inlining_ii_of_name e = let (ebis,_),ii = e in match ebis, ii with | Ident name, noii -> assert(null noii); ii_of_name name | RecordAccess (e, name), ii -> ii @ ii_of_name name | RecordPtAccess (e, name), ii -> ii @ ii_of_name name | _, ii -> ii let get_local_ii_of_tybis_inlining_ii_of_name ty = match ty with | TypeName (name, _typ), [] -> ii_of_name name | _, ii -> ii (* the following is used to obtain the argument to LocalVar *) let info_of_type ft = let (qu, ty) = ft in (* bugfix: because of string->name, the ii can be deeper *) let ii = get_local_ii_of_tybis_inlining_ii_of_name ty in match ii with | ii::_ -> Some ii.pinfo | [] -> None (* only Label and Goto have name *) let get_local_ii_of_st_inlining_ii_of_name st = match st with | Labeled (Label (name, st)), ii -> ii_of_name name @ ii | Jump (Goto name), ii -> let (i1, i3) = Common.tuple_of_list2 ii in [i1] @ ii_of_name name @ [i3] | _, ii -> ii (* ------------------------------------------------------------------------- *) let name_of_parameter param = param.p_namei +> Common.map_option (str_of_name) (* ------------------------------------------------------------------------- *) (* Annotations on tokens *) (* ------------------------------------------------------------------------- *) (* to put a given annotation on a token *) let put_annot_info info key value = info.annots_tag <- Token_annot.put_annot key value info.annots_tag (* to check if an annotation has such a token *) let get_annot_info info key = Token_annot.get_annot info.annots_tag key