| 1 | (* Copyright (C) 2002-2008 Yoann Padioleau |
| 2 | * |
| 3 | * This program is free software; you can redistribute it and/or |
| 4 | * modify it under the terms of the GNU General Public License (GPL) |
| 5 | * version 2 as published by the Free Software Foundation. |
| 6 | * |
| 7 | * This program is distributed in the hope that it will be useful, |
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | * file license.txt for more details. |
| 11 | *) |
| 12 | open Common |
| 13 | |
| 14 | (*****************************************************************************) |
| 15 | (* The AST C related types *) |
| 16 | (*****************************************************************************) |
| 17 | |
| 18 | (* Cocci: Each token will be decorated in the future by the mcodekind |
| 19 | * of cocci. It is the job of the pretty printer to look at this |
| 20 | * information and decide to print or not the token (and also the |
| 21 | * pending '+' associated sometimes with the token). |
| 22 | * |
| 23 | * The first time that we parse the original C file, the mcodekind is |
| 24 | * empty, or more precisely all is tagged as a CONTEXT with NOTHING |
| 25 | * associated. This is what I call a "clean" expr/statement/.... |
| 26 | * |
| 27 | * Each token will also be decorated in the future with an environment, |
| 28 | * because the pending '+' may contain metavariables that refer to some |
| 29 | * C code. |
| 30 | * |
| 31 | * Update: Now I use a ref! so take care. |
| 32 | * |
| 33 | * Sometimes we want to add someting at the beginning or at the end |
| 34 | * of a construct. For 'function' and 'decl' we want add something |
| 35 | * to their left and for 'if' 'while' et 'for' and so on at their right. |
| 36 | * We want some kinds of "virtual placeholders" that represent the start or |
| 37 | * end of a construct. We use fakeInfo for that purpose. |
| 38 | * To identify those cases I have added a fakestart/fakeend comment. |
| 39 | * |
| 40 | * convention: I often use 'ii' for the name of a list of info. |
| 41 | * |
| 42 | *) |
| 43 | |
| 44 | (* forunparser: *) |
| 45 | |
| 46 | type posl = int * int (* lin-col, for MetaPosValList, for position variables *) |
| 47 | type virtual_position = Common.parse_info * int (* character offset *) |
| 48 | type parse_info = |
| 49 | (* Present both in ast and list of tokens *) |
| 50 | | OriginTok of Common.parse_info |
| 51 | (* Present only in ast and generated after parsing. Used mainly |
| 52 | * by Julia, to add stuff at virtual places, beginning of func or decl *) |
| 53 | | FakeTok of string * virtual_position |
| 54 | (* Present both in ast and list of tokens. *) |
| 55 | | ExpandedTok of Common.parse_info * virtual_position |
| 56 | (* Present neither in ast nor in list of tokens |
| 57 | * but only in the '+' of the mcode of some tokens. Those kind of tokens |
| 58 | * are used to be able to use '=' to compare big ast portions. |
| 59 | *) |
| 60 | | AbstractLineTok of Common.parse_info (* local to the abstracted thing *) |
| 61 | |
| 62 | type info = { |
| 63 | pinfo : parse_info; |
| 64 | cocci_tag: (Ast_cocci.mcodekind * metavars_binding) ref; |
| 65 | comments_tag: comments_around ref; (* set in comment_annotater.ml *) |
| 66 | (* todo? token_info : sometimes useful to know what token it was *) |
| 67 | } |
| 68 | and il = info list |
| 69 | |
| 70 | (* wrap2 is like wrap, except that I use it often for separator such |
| 71 | * as ','. In that case the info is associated to the argument that |
| 72 | * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *) |
| 73 | and 'a wrap = 'a * il |
| 74 | and 'a wrap2 = 'a * il |
| 75 | |
| 76 | (* ------------------------------------------------------------------------- *) |
| 77 | (* C Type *) |
| 78 | (* ------------------------------------------------------------------------- *) |
| 79 | (* Could have more precise type in fullType, in expression, etc, but |
| 80 | * it requires to do too much things in parsing such as checking no |
| 81 | * conflicting structname, computing value, etc. Better to separate |
| 82 | * concern, so I put '=>' to mean what we would really like. In fact |
| 83 | * what we really like is defining another fullType, expression, etc |
| 84 | * from scratch, because many stuff are just sugar. |
| 85 | * |
| 86 | * invariant: Array and FunctionType have also typeQualifier but they |
| 87 | * dont have sense. I put this to factorise some code. If you look in |
| 88 | * grammar, you see that we can never specify const for the array |
| 89 | * himself (but we can do it for pointer). |
| 90 | * |
| 91 | * |
| 92 | * Because of ExprStatement, we can have more 'new scope' events, but |
| 93 | * rare I think. For instance with 'array of constExpression' there can |
| 94 | * have an exprStatement and a new (local) struct defined. Same for |
| 95 | * Constructor. |
| 96 | * |
| 97 | * Some stuff are tagged semantic: which means that they are computed |
| 98 | * after parsing. *) |
| 99 | |
| 100 | |
| 101 | and fullType = typeQualifier * typeC |
| 102 | and typeC = typeCbis wrap |
| 103 | |
| 104 | and typeCbis = |
| 105 | | BaseType of baseType |
| 106 | |
| 107 | | Pointer of fullType |
| 108 | | Array of constExpression option * fullType |
| 109 | | FunctionType of functionType |
| 110 | |
| 111 | | Enum of string option * enumType |
| 112 | | StructUnion of structUnion * string option * structType (* new scope *) |
| 113 | |
| 114 | | EnumName of string |
| 115 | | StructUnionName of structUnion * string |
| 116 | |
| 117 | | TypeName of string * fullType option (* semantic: filled later *) |
| 118 | |
| 119 | | ParenType of fullType (* forunparser: *) |
| 120 | |
| 121 | (* gccext: TypeOfType may seems useless, why declare a __typeof__(int) |
| 122 | * x; ? But when used with macro, it allows to fix a problem of C which |
| 123 | * is that type declaration can be spread around the ident. Indeed it |
| 124 | * may be difficult to have a macro such as '#define macro(type, |
| 125 | * ident) type ident;' because when you want to do a macro(char[256], |
| 126 | * x), then it will generate invalid code, but with a '#define |
| 127 | * macro(type, ident) __typeof(type) ident;' it will work. *) |
| 128 | | TypeOfExpr of expression |
| 129 | | TypeOfType of fullType |
| 130 | |
| 131 | (* -------------------------------------- *) |
| 132 | and baseType = Void |
| 133 | | IntType of intType |
| 134 | | FloatType of floatType |
| 135 | |
| 136 | (* stdC: type section |
| 137 | * add a | SizeT ? |
| 138 | * note: char and signed char are semantically different!! |
| 139 | *) |
| 140 | and intType = CChar (* obsolete? | CWchar *) |
| 141 | | Si of signed |
| 142 | |
| 143 | and signed = sign * base |
| 144 | and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *) |
| 145 | and sign = Signed | UnSigned |
| 146 | |
| 147 | and floatType = CFloat | CDouble | CLongDouble |
| 148 | |
| 149 | |
| 150 | (* -------------------------------------- *) |
| 151 | and structUnion = Struct | Union |
| 152 | and structType = (field wrap) list (* ; *) |
| 153 | |
| 154 | (* before unparser, I didn't have a FieldDeclList but just a Field. *) |
| 155 | and field = FieldDeclList of fieldkind wrap2 list (* , *) |
| 156 | | EmptyField (* gccext: *) |
| 157 | |
| 158 | (* At first I thought that a bitfield could be only Signed/Unsigned. |
| 159 | * But it seems that gcc allow char i:4. C rule must say that you |
| 160 | * can cast into int so enum too, ... |
| 161 | *) |
| 162 | and fieldkind = fieldkindbis wrap (* s : *) |
| 163 | and fieldkindbis = |
| 164 | | Simple of string option * fullType |
| 165 | | BitField of string option * fullType * constExpression |
| 166 | (* fullType => BitFieldInt | BitFieldUnsigned *) |
| 167 | |
| 168 | |
| 169 | (* -------------------------------------- *) |
| 170 | and enumType = (string * constExpression option) wrap (* s = *) |
| 171 | wrap2 (* , *) list |
| 172 | (* => string * int list *) |
| 173 | |
| 174 | |
| 175 | (* -------------------------------------- *) |
| 176 | (* return * (params * has "...") *) |
| 177 | and functionType = fullType * (parameterType wrap2 list * bool wrap) |
| 178 | and parameterType = (bool * string option * fullType) wrap (* reg s *) |
| 179 | (* => (bool (register) * fullType) list * bool *) |
| 180 | |
| 181 | |
| 182 | and typeQualifier = typeQualifierbis wrap |
| 183 | and typeQualifierbis = {const: bool; volatile: bool} |
| 184 | |
| 185 | |
| 186 | (* ------------------------------------------------------------------------- *) |
| 187 | (* C expression *) |
| 188 | (* ------------------------------------------------------------------------- *) |
| 189 | and expression = (expressionbis * exp_info ref (* semantic: *)) wrap |
| 190 | and local = LocalVar of parse_info | NotLocalVar |
| 191 | and test = Test | NotTest |
| 192 | and exp_type = fullType * local |
| 193 | and exp_info = exp_type option * test |
| 194 | and expressionbis = |
| 195 | |
| 196 | (* Ident can be a enumeration constant, a simple variable, a name of a func. |
| 197 | * With cppext, Ident can also be the name of a macro. Sparse says |
| 198 | * "an identifier with a meaning is a symbol". |
| 199 | *) |
| 200 | | Ident of string (* todo? more semantic info such as LocalFunc *) |
| 201 | | Constant of constant |
| 202 | | FunCall of expression * argument wrap2 (* , *) list |
| 203 | (* gccext: x ? /* empty */ : y <=> x ? x : y; *) |
| 204 | | CondExpr of expression * expression option * expression |
| 205 | |
| 206 | (* should be considered as statements, bad C langage *) |
| 207 | | Sequence of expression * expression |
| 208 | | Assignment of expression * assignOp * expression |
| 209 | |
| 210 | | Postfix of expression * fixOp |
| 211 | | Infix of expression * fixOp |
| 212 | | Unary of expression * unaryOp |
| 213 | | Binary of expression * binaryOp * expression |
| 214 | |
| 215 | | ArrayAccess of expression * expression |
| 216 | | RecordAccess of expression * string |
| 217 | | RecordPtAccess of expression * string |
| 218 | (* redundant normally, could replace it by DeRef RecordAcces *) |
| 219 | |
| 220 | | SizeOfExpr of expression |
| 221 | | SizeOfType of fullType |
| 222 | | Cast of fullType * expression |
| 223 | |
| 224 | (* gccext: *) |
| 225 |