1 (* Copyright (C) 2002, 2006, 2007, 2008 Yoann Padioleau
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
14 (*****************************************************************************)
15 (* The AST C related types *)
16 (*****************************************************************************)
18 (* To allow some transformations over the AST, we keep as much information
19 * as possible in the AST such as the tokens content and their locations.
20 * Those info are called 'info' (how original) and can be tagged.
21 * For instance one tag may say that the unparser should remove this token.
23 * Update: Now I use a ref! in those 'info' so take care.
25 * Sometimes we want to add someting at the beginning or at the end
26 * of a construct. For 'function' and 'decl' we want to add something
27 * to their left and for 'if' 'while' et 'for' and so on at their right.
28 * We want some kinds of "virtual placeholders" that represent the start or
29 * end of a construct. We use fakeInfo for that purpose.
30 * To identify those cases I have added a fakestart/fakeend comment.
32 * convention: I often use 'ii' for the name of a list of info.
34 * update: I now allow ifdefs in the ast but there must be only between
35 * "sequencable" elements. They can be put in a type only if this type
36 * is used only in a list, like at toplevel, used in 'toplevel list',
37 * or inside compound, used in 'statement list'. I must not allow
38 * ifdef anywhere. For instance I can not make ifdef a statement
39 * cos some instruction like If accept only one statement and the
40 * ifdef directive must not take the place of a legitimate instruction.
41 * We had a similar phenomena in SmPL where we have the notion
42 * of statement and sequencable statement too. Once you have
43 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
44 * and introduce the ifdef.
46 * update: those ifdefs are either passed, or present in the AST but in
47 * a flat form. To structure those flat ifdefs you have to run
48 * a transformation that will put in a tree the statements inside
49 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
50 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
52 * Some stuff are tagged semantic: which means that they are computed
55 * cocci: Each token will be decorated in the future by the mcodekind
56 * of cocci. It is the job of the pretty printer to look at this
57 * information and decide to print or not the token (and also the
58 * pending '+' associated sometimes with the token).
60 * The first time that we parse the original C file, the mcodekind is
61 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
62 * associated. This is what I call a "clean" expr/statement/....
64 * Each token will also be decorated in the future with an environment,
65 * because the pending '+' may contain metavariables that refer to some
69 * All of this means that some elements in this AST are present only if
70 * some annotation/transformation has been done on the original AST returned
71 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
76 type posl
= int * int (* lin-col, for MetaPosValList, for position variables *)
78 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
79 type virtual_position
= Common.parse_info
* int (* character offset *)
82 (* Present both in ast and list of tokens *)
83 | OriginTok
of Common.parse_info
84 (* Present only in ast and generated after parsing. Used mainly
85 * by Julia, to add stuff at virtual places, beginning of func or decl *)
86 | FakeTok
of string * virtual_position
87 (* Present both in ast and list of tokens. *)
88 | ExpandedTok
of Common.parse_info
* virtual_position
89 (* Present neither in ast nor in list of tokens
90 * but only in the '+' of the mcode of some tokens. Those kind of tokens
91 * are used to be able to use '=' to compare big ast portions.
93 | AbstractLineTok
of Common.parse_info
(* local to the abstracted thing *)
97 (* this tag can be changed, which is how we can express some progra
98 * transformations by tagging the tokens involved in this transformation.
100 cocci_tag
: (Ast_cocci.mcodekind
* metavars_binding
) ref;
101 (* set in comment_annotater.ml *)
102 comments_tag
: comments_around
ref;
103 (* todo? token_info : sometimes useful to know what token it was *)
107 (* wrap2 is like wrap, except that I use it often for separator such
108 * as ','. In that case the info is associated to the argument that
109 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
110 and 'a wrap
= 'a
* il
111 and 'a wrap2
= 'a
* il
113 (* ------------------------------------------------------------------------- *)
115 (* ------------------------------------------------------------------------- *)
116 (* Could have more precise type in fullType, in expression, etc, but
117 * it requires to do too much things in parsing such as checking no
118 * conflicting structname, computing value, etc. Better to separate
119 * concern, so I put '=>' to mean what we would really like. In fact
120 * what we really like is defining another fullType, expression, etc
121 * from scratch, because many stuff are just sugar.
123 * invariant: Array and FunctionType have also typeQualifier but they
124 * dont have sense. I put this to factorise some code. If you look in
125 * grammar, you see that we can never specify const for the array
126 * himself (but we can do it for pointer).
129 * Because of ExprStatement, we can have more 'new scope' events, but
130 * rare I think. For instance with 'array of constExpression' there can
131 * have an exprStatement and a new (local) struct defined. Same for
134 * Some stuff are tagged semantic: which means that they are computed
139 and fullType
= typeQualifier
* typeC
140 and typeC
= typeCbis wrap
143 | BaseType
of baseType
145 | Pointer
of fullType
146 | Array
of constExpression
option * fullType
147 | FunctionType
of functionType
149 | Enum
of string option * enumType
150 | StructUnion
of structUnion
* string option * structType
(* new scope *)
153 | StructUnionName
of structUnion
* string
155 | TypeName
of string * fullType
option (* semantic: filled later *)
157 | ParenType
of fullType
(* forunparser: *)
159 (* gccext: TypeOfType may seems useless; why declare a
160 * __typeof__(int) x; ?
161 * But when used with macro, it allows to fix a problem of C which
162 * is that type declaration can be spread around the ident. Indeed it
163 * may be difficult to have a macro such as
164 * '#define macro(type, ident) type ident;'
165 * because when you want to do a
166 * macro(char[256], x),
167 * then it will generate invalid code, but with a
168 * '#define macro(type, ident) __typeof(type) ident;'
170 | TypeOfExpr
of expression
171 | TypeOfType
of fullType
173 (* cppext: IfdefType TODO *)
175 (* -------------------------------------- *)
178 | FloatType
of floatType
180 (* stdC: type section
182 * note: char and signed char are semantically different!!
184 and intType
= CChar
(* obsolete? | CWchar *)
187 and signed
= sign
* base
188 and base
= CChar2
| CShort
| CInt
| CLong
| CLongLong
(* gccext: *)
189 and sign
= Signed
| UnSigned
191 and floatType
= CFloat
| CDouble
| CLongDouble
194 (* -------------------------------------- *)
195 and structUnion
= Struct
| Union
196 and structType
= field list
197 and field
= fieldbis wrap
199 | DeclarationField
of field_declaration
200 | EmptyField
(* gccext: *)
202 | MacroStructDeclTodo
205 | CppDirectiveStruct
of cpp_directive
206 | IfdefStruct
of ifdef_directive
(* * field list list *)
209 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
210 and field_declaration
=
211 | FieldDeclList
of fieldkind wrap2 list
(* , *) wrap
(* ; *)
213 (* At first I thought that a bitfield could be only Signed/Unsigned.
214 * But it seems that gcc allow char i:4. C rule must say that you
215 * can cast into int so enum too, ...
217 and fieldkind
= fieldkindbis wrap
(* s : *)
219 | Simple
of string option * fullType
220 | BitField
of string option * fullType
* constExpression
221 (* fullType => BitFieldInt | BitFieldUnsigned *)
224 (* -------------------------------------- *)
225 and enumType
= (string * constExpression
option) wrap
(* s = *)
227 (* => string * int list *)
230 (* -------------------------------------- *)
231 (* return * (params * has "...") *)
232 and functionType
= fullType
* (parameterType wrap2 list
* bool wrap
)
233 and parameterType
= (bool * string option * fullType
) wrap
(* reg s *)
234 (* => (bool (register) * fullType) list * bool *)
237 and typeQualifier
= typeQualifierbis wrap
238 and typeQualifierbis
= {const
: bool; volatile
: bool}
240 (* gccext: cppext: *)
241 and attribute
= attributebis wrap
243 | Attribute
of string
245 (* ------------------------------------------------------------------------- *)
247 (* ------------------------------------------------------------------------- *)
248 and expression
= (expressionbis
* exp_info
ref (* semantic: *)) wrap
249 and exp_info
= exp_type
option * test
250 and exp_type
= fullType
* local
251 and local
= LocalVar
of parse_info
| NotLocalVar
(* cocci: *)
252 and test
= Test
| NotTest
(* cocci: *)
256 (* Ident can be a enumeration constant, a simple variable, a name of a func.
257 * With cppext, Ident can also be the name of a macro. Sparse says
258 * "an identifier with a meaning is a symbol".
260 | Ident
of string (* todo? more semantic info such as LocalFunc *)
261 | Constant
of constant
262 | FunCall
of expression
* argument wrap2
(* , *) list
263 (* gccext: x ? /* empty */ : y <=> x ? x : y; *)
264 | CondExpr
of expression
* expression
option * expression
266 (* should be considered as statements, bad C langage *)
267 | Sequence
of expression
* expression
268 | Assignment
of expression
* assignOp
* expression
270 | Postfix
of expression
* fixOp
271 | Infix
of expression
* fixOp
272 | Unary
of expression
* unaryOp
273 | Binary
of expression
* binaryOp
* expression
275 | ArrayAccess
of expression
* expression
276 | RecordAccess
of expression
* string
277 | RecordPtAccess
of expression
* string
278 (* redundant normally, could replace it by DeRef RecordAcces *)
280 | SizeOfExpr
of expression
281 | SizeOfType
of fullType
282 | Cast
of fullType
* expression
285 | StatementExpr
of compound wrap
(* (Â ) new scope *)
286 | Constructor
of fullType
* initialiser wrap2
(* , *) list
289 | ParenExpr
of expression
291 (* cppext: IfdefExpr TODO *)
293 (* cppext: normmally just expression *)
294 and argument
= (expression
, wierd_argument
) either
296 | ArgType
of parameterType
297 | ArgAction
of action_macro
299 (* todo: ArgStatement of statement, possibly have ghost token *)
303 (* I put string for Int and Float because int would not be enough because
304 * OCaml int are 31 bits. So simpler to do string. Same reason to have
305 * string instead of int list for the String case.
307 * note: that -2 is not a constant, it is the unary operator '-'
308 * applied to constant 2. So the string must represent a positive
312 | String
of (string * isWchar
)
313 | MultiString
(* can contain MacroString, todo: more info *)
314 | Char
of (string * isWchar
) (* normally it is equivalent to Int *)
315 | Int
of (string (* * intType*))
316 | Float
of (string * floatType
)
318 and isWchar
= IsWchar
| IsChar
321 and unaryOp
= GetRef
| DeRef
| UnPlus
| UnMinus
| Tilde
| Not
322 | GetRefLabel
(* gccext: GetRefLabel, via &&label notation *)
323 and assignOp
= SimpleAssign
| OpAssign
of arithOp
324 and fixOp
= Dec
| Inc
326 and binaryOp
= Arith
of arithOp
| Logical
of logicalOp
329 | Plus
| Minus
| Mul
| Div
| Mod
334 | Inf
| Sup
| InfEq
| SupEq
338 and constExpression
= expression
(* => int *)
342 (* ------------------------------------------------------------------------- *)
344 (* ------------------------------------------------------------------------- *)
345 (* note: that assignement is not a statement but an expression;
346 * wonderful C langage.
348 * note: I use 'and' for type definition cos gccext allow statement as
349 * expression, so need mutual recursive type definition. *)
351 and statement
= statementbis wrap
354 | Compound
of compound
(* new scope *)
355 | ExprStatement
of exprStatement
356 | Selection
of selection
(* have fakeend *)
357 | Iteration
of iteration
(* have fakeend *)
360 (* simplify cocci: only at the beginning of a compound normally *)
361 | Decl
of declaration
365 | NestedFunc
of definition
372 and labeled
= Label
of string * statement
373 | Case
of expression
* statement
374 | CaseRange
of expression
* expression
* statement
(* gccext: *)
375 | Default
of statement
378 * old: compound = (declaration list * statement list)
379 * old: (declaration, statement) either list
380 * Simplify cocci to just have statement list, by integrating Decl in stmt.
382 * update: now introduce also the _sequencable to allow ifdef in the middle.
384 and compound
= statement_sequencable list
386 (* cppext: easier to put at statement_list level than statement level *)
387 and statement_sequencable
=
388 | StmtElem
of statement
390 | CppDirectiveStmt
of cpp_directive
391 | IfdefStmt
of ifdef_directive
393 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
394 | IfdefStmt2
of ifdef_directive list
* (statement_sequencable list
) list
396 and exprStatement
= expression
option
398 (* for Switch, need check that all elements in the compound start
399 * with a case:, otherwise unreachable code.
402 | If
of expression
* statement
* statement
403 | Switch
of expression
* statement
407 | While
of expression
* statement
408 | DoWhile
of statement
* expression
409 | For
of exprStatement wrap
* exprStatement wrap
* exprStatement wrap
*
412 | MacroIteration
of string * argument wrap2 list
* statement
414 and jump
= Goto
of string
416 | Return
| ReturnExpr
of expression
417 | GotoComputed
of expression
(* gccext: goto *exp ';' *)
421 and asmbody
= il
(* string list *) * colon wrap
(* : *) list
422 and colon
= Colon
of colon_option wrap2 list
423 and colon_option
= colon_option_bis wrap
424 and colon_option_bis
= ColonMisc
| ColonExpr
of expression
427 (* ------------------------------------------------------------------------- *)
429 (* ------------------------------------------------------------------------- *)
430 (* (string * ...) option cos can have empty declaration or struct tag
433 * Before I had Typedef constructor, but why make this special case and not
434 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
435 * declarations ? So I try to generalise and not have Typedef either. This
436 * requires more work in parsing. Better to separate concern.
438 * Before the need for unparser, I didn't have a DeclList but just a Decl.
440 * I am not sure what it means to declare a prototype inline, but gcc
444 and local_decl
= LocalDecl
| NotLocalDecl
447 | DeclList
of onedecl wrap2
(* , *) list wrap
(* ; fakestart sto *)
449 | MacroDecl
of (string * argument wrap2 list
) wrap
452 { v_namei
: (string * initialiser
option) wrap
(* s = *) option;
455 v_local
: local_decl
; (* cocci: *)
456 v_attr
: attribute list
; (* gccext: *)
458 and storage
= storagebis
* bool (* gccext: inline or not *)
459 and storagebis
= NoSto
| StoTypedef
| Sto
of storageClass
460 and storageClass
= Auto
| Static
| Register
| Extern
462 and initialiser
= initialiserbis wrap
464 | InitExpr
of expression
465 | InitList
of initialiser wrap2
(* , *) list
467 | InitDesignators
of designator list
* initialiser
468 | InitFieldOld
of string * initialiser
469 | InitIndexOld
of expression
* initialiser
471 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
472 and designator
= designatorbis wrap
474 | DesignatorField
of string
475 | DesignatorIndex
of expression
476 | DesignatorRange
of expression
* expression
478 (* ------------------------------------------------------------------------- *)
479 (* Function definition *)
480 (* ------------------------------------------------------------------------- *)
481 (* Normally we should define another type functionType2 because there
482 * are more restrictions on what can define a function than a pointer
483 * function. For instance a function declaration can omit the name of the
484 * parameter wheras a function definition can not. But, in some cases such
485 * as 'f(void) {', there is no name too, so I simplified and reused the
486 * same functionType type for both declaration and function definition.
488 and definition
= definitionbis wrap
(* s ( ) { } fakestart sto *)
491 f_type
: functionType
;
494 f_attr
: attribute list
; (* gccext: *)
496 (* cppext: IfdefFunHeader TODO *)
498 (* ------------------------------------------------------------------------- *)
499 (* cppext: cpp directives, #ifdef, #define and #include body *)
500 (* ------------------------------------------------------------------------- *)
504 | Undef
of string wrap
507 (* to specialize if someone need more info *)
508 and ifdef_directive
= (* or and 'a ifdefed = 'a list wrap *)
509 | IfdefDirective
of (ifdefkind
* matching_tag
) wrap
511 | Ifdef
(* todo? of string ? of formula_cpp *)
512 | IfdefElseif
(* same *)
513 | IfdefElse
(* same *)
515 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
516 * a global so it means if you parse same file twice you may get
517 * different id. I try now to avoid this pb by resetting it each
518 * time I parse a file.
521 IfdefTag
of (int (* tag *) * int (* total with this tag *))
523 and define
= string wrap
* define_body
(* #define s *)
524 and define_body
= define_kind
* define_val
527 | DefineFunc
of ((string wrap
) wrap2 list
) wrap
(* () *)
529 | DefineExpr
of expression
530 | DefineStmt
of statement
531 | DefineType
of fullType
532 | DefineDoWhileZero
of (statement
* expression
) wrap
(* do { } while(0) *)
533 | DefineFunction
of definition
534 | DefineInit
of initialiser
(* in practice only { } with possible ',' *)
535 (* TODO DefineMulti of define_val list *)
537 | DefineText
of string wrap
545 { i_include
: inc_file wrap
; (* #include s *)
546 (* cocci: computed in ? *)
547 i_rel_pos
: include_rel_pos
option ref;
548 (* cocci: cf -test incl *)
550 (* cf cpp_ast_c.ml. set to None at parsing time. *)
551 i_content
: (Common.filename
(* full path *) * program
) option;
554 | Local
of inc_elem list
555 | NonLocal
of inc_elem list
556 | Wierd
of string (* ex: #include SYSTEM_H *)
557 and inc_elem
= string
559 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
561 * The first_of and last_of store the list of prefixes that was
562 * introduced by the include. On #include <a/b/x>, if the include was
563 * the first in the file, it would give in first_of the following
564 * prefixes a/b/c; a/b/; a/ ; <empty>
566 * This is set after parsing, in cocci.ml, in update_rel_pos.
568 and include_rel_pos
= {
569 first_of
: string list list
;
570 last_of
: string list list
;
578 (* ------------------------------------------------------------------------- *)
579 (* The toplevels elements *)
580 (* ------------------------------------------------------------------------- *)
582 | Declaration
of declaration
583 | Definition
of definition
586 | CppTop
of cpp_directive
587 | IfdefTop
of ifdef_directive
(* * toplevel list *)
590 | MacroTop
of string * argument wrap2 list
* il
592 | EmptyDef
of il
(* gccext: allow redundant ';' *)
593 | NotParsedCorrectly
of il
595 | FinalDef
of info
(* EOF *)
597 (* ------------------------------------------------------------------------- *)
598 and program
= toplevel list
601 (*****************************************************************************)
603 (*****************************************************************************)
604 (* Was previously in pattern.ml, but because of the transformer,
605 * we need to decorate each token with some cocci code AND the environment
606 * for this cocci code.
608 and metavars_binding
= (Ast_cocci.meta_name
, metavar_binding_kind
) assoc
609 and metavar_binding_kind
=
610 | MetaIdVal
of string
611 | MetaFuncVal
of string
612 | MetaLocalFuncVal
of string
614 | MetaExprVal
of expression
(* a "clean expr" *)
615 | MetaExprListVal
of argument wrap2 list
616 | MetaParamVal
of parameterType
617 | MetaParamListVal
of parameterType wrap2 list
619 | MetaTypeVal
of fullType
620 | MetaStmtVal
of statement
622 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
623 * because don't need to have the value for a position in the env of
624 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
625 * variables accessible via SmPL whereas the position can be one day
626 * so I think it's better to put MetaPosVal here *)
627 | MetaPosVal
of (Ast_cocci.fixpos
* Ast_cocci.fixpos
) (* max, min *)
629 (Common.filename
* string (*element*) * posl
* posl
) list
(* min, max *)
630 | MetaListlenVal
of int
633 (*****************************************************************************)
635 (*****************************************************************************)
637 (* convention: I often use "m" for comments as I can not use "c"
638 * (already use for c stuff) and "com" is too long.
641 (* this type will be associated to each token *)
642 and comments_around
= {
643 mbefore
: comment_and_relative_pos list
;
644 mafter
: comment_and_relative_pos list
;
646 and comment_and_relative_pos
= {
648 minfo
: Common.parse_info
;
649 (* the int represent the number of lines of difference between the
650 * current token and the comment. When on same line, this number is 0.
651 * When previous line, -1. In some way the after/before in previous
652 * record is useless because the sign of the integer can helps
653 * do the difference too, but I keep it that way.
657 * cppbetween: bool; touse? if false positive
658 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
662 and comment
= Common.parse_info
663 and com
= comment list
ref
666 (*****************************************************************************)
667 (* Cpp constructs put it comments in lexer or parsing_hack *)
668 (*****************************************************************************)
670 (* This type is not in the Ast but is associated with the TCommentCpp token.
671 * I put this enum here because parser_c.mly need it. I could have put
672 * it also in lexer_parser.
674 type cppcommentkind
=
678 | CppPassingNormal
(* ifdef 0, cplusplus, etc *)
679 | CppPassingCosWouldGetError
(* expr passsing *)
684 (*****************************************************************************)
685 (* Some constructors *)
686 (*****************************************************************************)
687 let nullQualif = ({const
=false; volatile
= false}, [])
690 let defaultInt = (BaseType
(IntType
(Si
(Signed
, CInt
))))
692 let noType () = ref (None
,NotTest
)
693 let noInstr = (ExprStatement
(None
), [])
694 let noTypedefDef () = None
696 let emptyMetavarsBinding =
697 ([]: metavars_binding
)
700 (Ast_cocci.CONTEXT
(Ast_cocci.NoPos
,Ast_cocci.NOTHING
),
701 emptyMetavarsBinding)
709 (* for include, some meta information needed by cocci *)
711 ref (None
: include_rel_pos
option)
716 (* When want add some info in ast that does not correspond to
717 * an existing C element.
718 * old: or when don't want 'synchronize' on it in unparse_c.ml
719 * (now have other mark for tha matter).
721 let no_virt_pos = ({str
="";charpos
=0;line
=0;column
=0;file
=""},-1)
724 { pinfo
= FakeTok
("",no_virt_pos);
725 cocci_tag
= ref emptyAnnot;
726 comments_tag
= ref emptyComments;
731 let noi_content = (None
: ((Common.filename
* program
) option))
733 (*****************************************************************************)
735 (*****************************************************************************)
739 let unwrap_expr ((unwrap_e
, typ
), iie
) = unwrap_e
740 let rewrap_expr ((_old_unwrap_e
, typ
), iie
) newe
= ((newe
, typ
), iie
)
742 let get_type_expr ((unwrap_e
, typ
), iie
) = !typ
743 let set_type_expr ((unwrap_e
, oldtyp
), iie
) newtyp
=
745 (* old: (unwrap_e, newtyp), iie *)
748 let unwrap_typeC (qu
, (typeC
, ii
)) = typeC
749 let rewrap_typeC (qu
, (typeC
, ii
)) newtypeC
= (qu
, (newtypeC
, ii
))
752 let rewrap_str s ii
=
755 OriginTok pi
-> OriginTok
{ pi
with Common.str
= s
;}
756 | ExpandedTok
(pi
,vpi
) -> ExpandedTok
({ pi
with Common.str
= s
;},vpi
)
757 | FakeTok
(_
,vpi
) -> FakeTok
(s
,vpi
)
758 | AbstractLineTok pi
-> OriginTok
{ pi
with Common.str
= s
;})}
760 let rewrap_pinfo pi ii
=
763 (* info about the current location *)
764 let get_pi = function
766 | ExpandedTok
(_
,(pi
,_
)) -> pi
767 | FakeTok
(_
,(pi
,_
)) -> pi
768 | AbstractLineTok pi
-> pi
771 let get_opi = function
773 | ExpandedTok
(pi
,_
) -> pi
774 | FakeTok
(_
,_
) -> failwith
"no position information"
775 | AbstractLineTok pi
-> pi
779 OriginTok pi
-> pi
.Common.str
780 | ExpandedTok
(pi
,_
) -> pi
.Common.str
782 | AbstractLineTok pi
-> pi
.Common.str
787 | ExpandedTok
(_
,(pi
,_
)) -> f pi
788 | FakeTok
(_
,(pi
,_
)) -> f pi
789 | AbstractLineTok pi
-> f pi
791 let get_orig_info f ii
=
794 | ExpandedTok
(pi
,_
) -> f pi
795 | FakeTok
(_
,(pi
,_
)) -> f pi
796 | AbstractLineTok pi
-> f pi
798 let make_expanded ii
=
799 {ii
with pinfo
= ExpandedTok
(get_opi ii
.pinfo
,no_virt_pos)}
801 let pos_of_info ii
= get_info (function x
-> x
.Common.charpos
) ii
802 let opos_of_info ii
= get_orig_info (function x
-> x
.Common.charpos
) ii
803 let line_of_info ii
= get_orig_info (function x
-> x
.Common.line
) ii
804 let col_of_info ii
= get_orig_info (function x
-> x
.Common.column
) ii
805 let file_of_info ii
= get_orig_info (function x
-> x
.Common.file
) ii
806 let mcode_of_info ii
= fst
(!(ii
.cocci_tag
))
807 let pinfo_of_info ii
= ii
.pinfo
808 let parse_info_of_info ii
= get_pi ii
.pinfo
812 FakeTok
(_
,_
) -> true
815 let is_origintok ii
=
817 | OriginTok pi
-> true
820 type posrv
= Real
of Common.parse_info
| Virt
of virtual_position
822 let compare_pos ii1 ii2
=
823 let get_pos = function
824 OriginTok pi
-> Real pi
825 | FakeTok
(s
,vpi
) -> Virt vpi
826 | ExpandedTok
(pi
,vpi
) -> Virt vpi
827 | AbstractLineTok pi
-> Real pi
in (* used for printing *)
828 let pos1 = get_pos (pinfo_of_info ii1
) in
829 let pos2 = get_pos (pinfo_of_info ii2
) in
830 match (pos1,pos2) with
831 (Real p1
, Real p2
) -> compare p1
.Common.charpos p2
.Common.charpos
832 | (Virt
(p1
,_
), Real p2
) ->
833 if (compare p1
.Common.charpos p2
.Common.charpos
) = (-1) then (-1) else 1
834 | (Real p1
, Virt
(p2
,_
)) ->
835 if (compare p1
.Common.charpos p2
.Common.charpos
) = 1 then 1 else (-1)
836 | (Virt
(p1
,o1
), Virt
(p2
,o2
)) ->
837 let poi1 = p1
.Common.charpos
in
838 let poi2 = p2
.Common.charpos
in
839 match compare
poi1 poi2 with
844 let equal_posl (l1
,c1
) (l2
,c2
) =
845 (l1
=|= l2
) && (c1
=|= c2
)
847 let info_to_fixpos ii
=
848 match pinfo_of_info ii
with
849 OriginTok pi
-> Ast_cocci.Real pi
.Common.charpos
850 | ExpandedTok
(_
,(pi
,offset
)) ->
851 Ast_cocci.Virt
(pi
.Common.charpos
,offset
)
852 | FakeTok
(_
,(pi
,offset
)) ->
853 Ast_cocci.Virt
(pi
.Common.charpos
,offset
)
854 | AbstractLineTok pi
-> failwith
"unexpected abstract"
857 let is_test (e
: expression
) =
858 let (_
,info
) = unwrap e
in
859 let (_
,test
) = !info
in
862 (*****************************************************************************)
864 (*****************************************************************************)
866 (* When we have extended the C Ast to add some info to the tokens,
867 * such as its line number in the file, we can not use anymore the
868 * ocaml '=' to compare Ast elements. To overcome this problem, to be
869 * able to use again '=', we just have to get rid of all those extra
870 * information, to "abstract those line" (al) information.
873 let al_info tokenindex x
=
876 {charpos
= tokenindex
;
880 str
= str_of_info x
});
881 cocci_tag
= ref emptyAnnot;
882 comments_tag
= ref emptyComments;
887 cocci_tag
= ref emptyAnnot;
888 comments_tag
= ref emptyComments;
891 (*****************************************************************************)
893 (*****************************************************************************)
895 (* Transform a list of arguments (or parameters) where the commas are
896 * represented via the wrap2 and associated with an element, with
897 * a list where the comma are on their own. f(1,2,2) was
898 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
900 * Used in cocci_vs_c.ml, to have a more direct correspondance between
901 * the ast_cocci of julia and ast_c.
903 let rec (split_comma
: 'a wrap2 list
-> ('a
, il
) either list
) =
908 then (Left e
)::split_comma xs
909 else Right ii
::Left e
::split_comma xs
911 let rec (unsplit_comma
: ('a
, il
) either list
-> 'a wrap2 list
) =
914 | Right ii
::Left e
::xs
->
915 (e
, ii
)::unsplit_comma xs
918 (e
, empty_ii)::unsplit_comma xs
925 let split_register_param = fun (hasreg
, idb
, ii_b_s
) ->
926 match hasreg
, idb
, ii_b_s
with
927 | false, Some s
, [i1
] -> Left
(s
, [], i1
)
928 | true, Some s
, [i1
;i2
] -> Left
(s
, [i1
], i2
)
929 | _
, None
, ii
-> Right ii
930 | _
-> raise Impossible
934 (*****************************************************************************)
935 (* Helpers, could also be put in lib_parsing_c.ml instead *)
936 (*****************************************************************************)
938 let rec stmt_elems_of_sequencable xs
=
939 xs
+> Common.map
(fun x
->
945 pr2
("stmt_elems_of_sequencable: filter a directive");
947 | IfdefStmt2
(_ifdef
, xxs
) ->
948 pr2
("stmt_elems_of_sequencable: IfdefStm2 TODO?");
949 xxs
+> List.map
(fun xs
->
950 let xs'
= stmt_elems_of_sequencable xs in
958 let s_of_inc_file inc_file
=
960 | Local
xs -> xs +> Common.join
"/"
961 | NonLocal
xs -> xs +> Common.join
"/"
964 let s_of_inc_file_bis inc_file
=
966 | Local
xs -> "\"" ^
xs +> Common.join
"/" ^
"\""
967 | NonLocal
xs -> "<" ^
xs +> Common.join
"/" ^
">"
970 let fieldname_of_fieldkind fieldkind
=
971 match unwrap fieldkind
with
972 | Simple
(sopt
, ft
) -> sopt
973 | BitField
(sopt
, ft
, expr
) -> sopt