Release coccinelle-0.1.2
[bpt/coccinelle.git] / parsing_c / ast_c.ml
1 (* Copyright (C) 2002, 2006, 2007, 2008 Yoann Padioleau
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
11 *)
12 open Common
13
14 (*****************************************************************************)
15 (* The AST C related types *)
16 (*****************************************************************************)
17
18 (* To allow some transformations over the AST, we keep as much information
19 * as possible in the AST such as the tokens content and their locations.
20 * Those info are called 'info' (how original) and can be tagged.
21 * For instance one tag may say that the unparser should remove this token.
22 *
23 * Update: Now I use a ref! in those 'info' so take care.
24 *
25 * Sometimes we want to add someting at the beginning or at the end
26 * of a construct. For 'function' and 'decl' we want to add something
27 * to their left and for 'if' 'while' et 'for' and so on at their right.
28 * We want some kinds of "virtual placeholders" that represent the start or
29 * end of a construct. We use fakeInfo for that purpose.
30 * To identify those cases I have added a fakestart/fakeend comment.
31 *
32 * convention: I often use 'ii' for the name of a list of info.
33 *
34 * update: I now allow ifdefs in the ast but there must be only between
35 * "sequencable" elements. They can be put in a type only if this type
36 * is used only in a list, like at toplevel, used in 'toplevel list',
37 * or inside compound, used in 'statement list'. I must not allow
38 * ifdef anywhere. For instance I can not make ifdef a statement
39 * cos some instruction like If accept only one statement and the
40 * ifdef directive must not take the place of a legitimate instruction.
41 * We had a similar phenomena in SmPL where we have the notion
42 * of statement and sequencable statement too. Once you have
43 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
44 * and introduce the ifdef.
45 *
46 * update: those ifdefs are either passed, or present in the AST but in
47 * a flat form. To structure those flat ifdefs you have to run
48 * a transformation that will put in a tree the statements inside
49 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
50 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
51 *
52 * Some stuff are tagged semantic: which means that they are computed
53 * after parsing.
54 *
55 * cocci: Each token will be decorated in the future by the mcodekind
56 * of cocci. It is the job of the pretty printer to look at this
57 * information and decide to print or not the token (and also the
58 * pending '+' associated sometimes with the token).
59 *
60 * The first time that we parse the original C file, the mcodekind is
61 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
62 * associated. This is what I call a "clean" expr/statement/....
63 *
64 * Each token will also be decorated in the future with an environment,
65 * because the pending '+' may contain metavariables that refer to some
66 * C code.
67 *
68 *
69 * All of this means that some elements in this AST are present only if
70 * some annotation/transformation has been done on the original AST returned
71 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
72 *)
73
74 (* forunparser: *)
75
76 type posl = int * int (* lin-col, for MetaPosValList, for position variables *)
77
78 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
79 type virtual_position = Common.parse_info * int (* character offset *)
80
81 type parse_info =
82 (* Present both in ast and list of tokens *)
83 | OriginTok of Common.parse_info
84 (* Present only in ast and generated after parsing. Used mainly
85 * by Julia, to add stuff at virtual places, beginning of func or decl *)
86 | FakeTok of string * virtual_position
87 (* Present both in ast and list of tokens. *)
88 | ExpandedTok of Common.parse_info * virtual_position
89 (* Present neither in ast nor in list of tokens
90 * but only in the '+' of the mcode of some tokens. Those kind of tokens
91 * are used to be able to use '=' to compare big ast portions.
92 *)
93 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
94
95 type info = {
96 pinfo : parse_info;
97 (* this tag can be changed, which is how we can express some progra
98 * transformations by tagging the tokens involved in this transformation.
99 *)
100 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) ref;
101 (* set in comment_annotater.ml *)
102 comments_tag: comments_around ref;
103 (* todo? token_info : sometimes useful to know what token it was *)
104 }
105 and il = info list
106
107 (* wrap2 is like wrap, except that I use it often for separator such
108 * as ','. In that case the info is associated to the argument that
109 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
110 and 'a wrap = 'a * il
111 and 'a wrap2 = 'a * il
112
113 (* ------------------------------------------------------------------------- *)
114 (* C Type *)
115 (* ------------------------------------------------------------------------- *)
116 (* Could have more precise type in fullType, in expression, etc, but
117 * it requires to do too much things in parsing such as checking no
118 * conflicting structname, computing value, etc. Better to separate
119 * concern, so I put '=>' to mean what we would really like. In fact
120 * what we really like is defining another fullType, expression, etc
121 * from scratch, because many stuff are just sugar.
122 *
123 * invariant: Array and FunctionType have also typeQualifier but they
124 * dont have sense. I put this to factorise some code. If you look in
125 * grammar, you see that we can never specify const for the array
126 * himself (but we can do it for pointer).
127 *
128 *
129 * Because of ExprStatement, we can have more 'new scope' events, but
130 * rare I think. For instance with 'array of constExpression' there can
131 * have an exprStatement and a new (local) struct defined. Same for
132 * Constructor.
133 *
134 * Some stuff are tagged semantic: which means that they are computed
135 * after parsing.
136 *)
137
138
139 and fullType = typeQualifier * typeC
140 and typeC = typeCbis wrap
141
142 and typeCbis =
143 | BaseType of baseType
144
145 | Pointer of fullType
146 | Array of constExpression option * fullType
147 | FunctionType of functionType
148
149 | Enum of string option * enumType
150 | StructUnion of structUnion * string option * structType (* new scope *)
151
152 | EnumName of string
153 | StructUnionName of structUnion * string
154
155 | TypeName of string * fullType option (* semantic: filled later *)
156
157 | ParenType of fullType (* forunparser: *)
158
159 (* gccext: TypeOfType may seems useless; why declare a
160 * __typeof__(int) x; ?
161 * But when used with macro, it allows to fix a problem of C which
162 * is that type declaration can be spread around the ident. Indeed it
163 * may be difficult to have a macro such as
164 * '#define macro(type, ident) type ident;'
165 * because when you want to do a
166 * macro(char[256], x),
167 * then it will generate invalid code, but with a
168 * '#define macro(type, ident) __typeof(type) ident;'
169 * it will work. *)
170 | TypeOfExpr of expression
171 | TypeOfType of fullType
172
173 (* cppext: IfdefType TODO *)
174
175 (* -------------------------------------- *)
176 and baseType = Void
177 | IntType of intType
178 | FloatType of floatType
179
180 (* stdC: type section
181 * add a | SizeT ?
182 * note: char and signed char are semantically different!!
183 *)
184 and intType = CChar (* obsolete? | CWchar *)
185 | Si of signed
186
187 and signed = sign * base
188 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
189 and sign = Signed | UnSigned
190
191 and floatType = CFloat | CDouble | CLongDouble
192
193
194 (* -------------------------------------- *)
195 and structUnion = Struct | Union
196 and structType = field list
197 and field = fieldbis wrap
198 and fieldbis =
199 | DeclarationField of field_declaration
200 | EmptyField (* gccext: *)
201 (* cppext: *)
202 | MacroStructDeclTodo
203
204 (* cppext: *)
205 | CppDirectiveStruct of cpp_directive
206 | IfdefStruct of ifdef_directive (* * field list list *)
207
208
209 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
210 and field_declaration =
211 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
212
213 (* At first I thought that a bitfield could be only Signed/Unsigned.
214 * But it seems that gcc allow char i:4. C rule must say that you
215 * can cast into int so enum too, ...
216 *)
217 and fieldkind = fieldkindbis wrap (* s : *)
218 and fieldkindbis =
219 | Simple of string option * fullType
220 | BitField of string option * fullType * constExpression
221 (* fullType => BitFieldInt | BitFieldUnsigned *)
222
223
224 (* -------------------------------------- *)
225 and enumType = (string * constExpression option) wrap (* s = *)
226 wrap2 (* , *) list
227 (* => string * int list *)
228
229
230 (* -------------------------------------- *)
231 (* return * (params * has "...") *)
232 and functionType = fullType * (parameterType wrap2 list * bool wrap)
233 and parameterType = (bool * string option * fullType) wrap (* reg s *)
234 (* => (bool (register) * fullType) list * bool *)
235
236
237 and typeQualifier = typeQualifierbis wrap
238 and typeQualifierbis = {const: bool; volatile: bool}
239
240 (* gccext: cppext: *)
241 and attribute = attributebis wrap
242 and attributebis =
243 | Attribute of string
244
245 (* ------------------------------------------------------------------------- *)
246 (* C expression *)
247 (* ------------------------------------------------------------------------- *)
248 and expression = (expressionbis * exp_info ref (* semantic: *)) wrap
249 and exp_info = exp_type option * test
250 and exp_type = fullType * local
251 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
252 and test = Test | NotTest (* cocci: *)
253
254 and expressionbis =
255
256 (* Ident can be a enumeration constant, a simple variable, a name of a func.
257 * With cppext, Ident can also be the name of a macro. Sparse says
258 * "an identifier with a meaning is a symbol".
259 *)
260 | Ident of string (* todo? more semantic info such as LocalFunc *)
261 | Constant of constant
262 | FunCall of expression * argument wrap2 (* , *) list
263 (* gccext: x ? /* empty */ : y <=> x ? x : y; *)
264 | CondExpr of expression * expression option * expression
265
266 (* should be considered as statements, bad C langage *)
267 | Sequence of expression * expression
268 | Assignment of expression * assignOp * expression
269
270 | Postfix of expression * fixOp
271 | Infix of expression * fixOp
272 | Unary of expression * unaryOp
273 | Binary of expression * binaryOp * expression
274
275 | ArrayAccess of expression * expression
276 | RecordAccess of expression * string
277 | RecordPtAccess of expression * string
278 (* redundant normally, could replace it by DeRef RecordAcces *)
279
280 | SizeOfExpr of expression
281 | SizeOfType of fullType
282 | Cast of fullType * expression
283
284 (* gccext: *)
285 | StatementExpr of compound wrap (* ( ) new scope *)
286 | Constructor of fullType * initialiser wrap2 (* , *) list
287
288 (* forunparser: *)
289 | ParenExpr of expression
290
291 (* cppext: IfdefExpr TODO *)
292
293 (* cppext: normmally just expression *)
294 and argument = (expression, wierd_argument) either
295 and wierd_argument =
296 | ArgType of parameterType
297 | ArgAction of action_macro
298 and action_macro =
299 (* todo: ArgStatement of statement, possibly have ghost token *)
300 | ActMisc of il
301
302
303 (* I put string for Int and Float because int would not be enough because
304 * OCaml int are 31 bits. So simpler to do string. Same reason to have
305 * string instead of int list for the String case.
306 *
307 * note: that -2 is not a constant, it is the unary operator '-'
308 * applied to constant 2. So the string must represent a positive
309 * integer only. *)
310
311 and constant =
312 | String of (string * isWchar)
313 | MultiString (* can contain MacroString, todo: more info *)
314 | Char of (string * isWchar) (* normally it is equivalent to Int *)
315 | Int of (string (* * intType*))
316 | Float of (string * floatType)
317
318 and isWchar = IsWchar | IsChar
319
320
321 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
322 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
323 and assignOp = SimpleAssign | OpAssign of arithOp
324 and fixOp = Dec | Inc
325
326 and binaryOp = Arith of arithOp | Logical of logicalOp
327
328 and arithOp =
329 | Plus | Minus | Mul | Div | Mod
330 | DecLeft | DecRight
331 | And | Or | Xor
332
333 and logicalOp =
334 | Inf | Sup | InfEq | SupEq
335 | Eq | NotEq
336 | AndLog | OrLog
337
338 and constExpression = expression (* => int *)
339
340
341
342 (* ------------------------------------------------------------------------- *)
343 (* C statement *)
344 (* ------------------------------------------------------------------------- *)
345 (* note: that assignement is not a statement but an expression;
346 * wonderful C langage.
347 *
348 * note: I use 'and' for type definition cos gccext allow statement as
349 * expression, so need mutual recursive type definition. *)
350
351 and statement = statementbis wrap
352 and statementbis =
353 | Labeled of labeled
354 | Compound of compound (* new scope *)
355 | ExprStatement of exprStatement
356 | Selection of selection (* have fakeend *)
357 | Iteration of iteration (* have fakeend *)
358 | Jump of jump
359
360 (* simplify cocci: only at the beginning of a compound normally *)
361 | Decl of declaration
362
363 (* gccext: *)
364 | Asm of asmbody
365 | NestedFunc of definition
366
367 (* cppext: *)
368 | MacroStmt
369
370
371
372 and labeled = Label of string * statement
373 | Case of expression * statement
374 | CaseRange of expression * expression * statement (* gccext: *)
375 | Default of statement
376
377 (* cppext:
378 * old: compound = (declaration list * statement list)
379 * old: (declaration, statement) either list
380 * Simplify cocci to just have statement list, by integrating Decl in stmt.
381 *
382 * update: now introduce also the _sequencable to allow ifdef in the middle.
383 *)
384 and compound = statement_sequencable list
385
386 (* cppext: easier to put at statement_list level than statement level *)
387 and statement_sequencable =
388 | StmtElem of statement
389 (* cppext: *)
390 | CppDirectiveStmt of cpp_directive
391 | IfdefStmt of ifdef_directive
392
393 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
394 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
395
396 and exprStatement = expression option
397
398 (* for Switch, need check that all elements in the compound start
399 * with a case:, otherwise unreachable code.
400 *)
401 and selection =
402 | If of expression * statement * statement
403 | Switch of expression * statement
404
405
406 and iteration =
407 | While of expression * statement
408 | DoWhile of statement * expression
409 | For of exprStatement wrap * exprStatement wrap * exprStatement wrap *
410 statement
411 (* cppext: *)
412 | MacroIteration of string * argument wrap2 list * statement
413
414 and jump = Goto of string
415 | Continue | Break
416 | Return | ReturnExpr of expression
417 | GotoComputed of expression (* gccext: goto *exp ';' *)
418
419
420 (* gccext: *)
421 and asmbody = il (* string list *) * colon wrap (* : *) list
422 and colon = Colon of colon_option wrap2 list
423 and colon_option = colon_option_bis wrap
424 and colon_option_bis = ColonMisc | ColonExpr of expression
425
426
427 (* ------------------------------------------------------------------------- *)
428 (* Declaration *)
429 (* ------------------------------------------------------------------------- *)
430 (* (string * ...) option cos can have empty declaration or struct tag
431 * declaration.
432 *
433 * Before I had Typedef constructor, but why make this special case and not
434 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
435 * declarations ? So I try to generalise and not have Typedef either. This
436 * requires more work in parsing. Better to separate concern.
437 *
438 * Before the need for unparser, I didn't have a DeclList but just a Decl.
439 *
440 * I am not sure what it means to declare a prototype inline, but gcc
441 * accepts it.
442 *)
443
444 and local_decl = LocalDecl | NotLocalDecl
445
446 and declaration =
447 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
448 (* cppext: *)
449 | MacroDecl of (string * argument wrap2 list) wrap
450
451 and onedecl =
452 { v_namei: (string * initialiser option) wrap (* s = *) option;
453 v_type: fullType;
454 v_storage: storage;
455 v_local: local_decl; (* cocci: *)
456 v_attr: attribute list; (* gccext: *)
457 }
458 and storage = storagebis * bool (* gccext: inline or not *)
459 and storagebis = NoSto | StoTypedef | Sto of storageClass
460 and storageClass = Auto | Static | Register | Extern
461
462 and initialiser = initialiserbis wrap
463 and initialiserbis =
464 | InitExpr of expression
465 | InitList of initialiser wrap2 (* , *) list
466 (* gccext: *)
467 | InitDesignators of designator list * initialiser
468 | InitFieldOld of string * initialiser
469 | InitIndexOld of expression * initialiser
470
471 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
472 and designator = designatorbis wrap
473 and designatorbis =
474 | DesignatorField of string
475 | DesignatorIndex of expression
476 | DesignatorRange of expression * expression
477
478 (* ------------------------------------------------------------------------- *)
479 (* Function definition *)
480 (* ------------------------------------------------------------------------- *)
481 (* Normally we should define another type functionType2 because there
482 * are more restrictions on what can define a function than a pointer
483 * function. For instance a function declaration can omit the name of the
484 * parameter wheras a function definition can not. But, in some cases such
485 * as 'f(void) {', there is no name too, so I simplified and reused the
486 * same functionType type for both declaration and function definition.
487 *)
488 and definition = definitionbis wrap (* s ( ) { } fakestart sto *)
489 and definitionbis =
490 { f_name: string;
491 f_type: functionType;
492 f_storage: storage;
493 f_body: compound;
494 f_attr: attribute list; (* gccext: *)
495 }
496 (* cppext: IfdefFunHeader TODO *)
497
498 (* ------------------------------------------------------------------------- *)
499 (* cppext: cpp directives, #ifdef, #define and #include body *)
500 (* ------------------------------------------------------------------------- *)
501 and cpp_directive =
502 | Include of includ
503 | Define of define
504 | Undef of string wrap
505 | PragmaAndCo of il
506
507 (* to specialize if someone need more info *)
508 and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
509 | IfdefDirective of (ifdefkind * matching_tag) wrap
510 and ifdefkind =
511 | Ifdef (* todo? of string ? of formula_cpp *)
512 | IfdefElseif (* same *)
513 | IfdefElse (* same *)
514 | IfdefEndif
515 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
516 * a global so it means if you parse same file twice you may get
517 * different id. I try now to avoid this pb by resetting it each
518 * time I parse a file.
519 *)
520 and matching_tag =
521 IfdefTag of (int (* tag *) * int (* total with this tag *))
522
523 and define = string wrap * define_body (* #define s *)
524 and define_body = define_kind * define_val
525 and define_kind =
526 | DefineVar
527 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
528 and define_val =
529 | DefineExpr of expression
530 | DefineStmt of statement
531 | DefineType of fullType
532 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
533 | DefineFunction of definition
534 | DefineInit of initialiser (* in practice only { } with possible ',' *)
535 (* TODO DefineMulti of define_val list *)
536
537 | DefineText of string wrap
538 | DefineEmpty
539
540 | DefineTodo
541
542
543
544 and includ =
545 { i_include: inc_file wrap; (* #include s *)
546 (* cocci: computed in ? *)
547 i_rel_pos: include_rel_pos option ref;
548 (* cocci: cf -test incl *)
549 i_is_in_ifdef: bool;
550 (* cf cpp_ast_c.ml. set to None at parsing time. *)
551 i_content: (Common.filename (* full path *) * program) option;
552 }
553 and inc_file =
554 | Local of inc_elem list
555 | NonLocal of inc_elem list
556 | Wierd of string (* ex: #include SYSTEM_H *)
557 and inc_elem = string
558
559 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
560 *
561 * The first_of and last_of store the list of prefixes that was
562 * introduced by the include. On #include <a/b/x>, if the include was
563 * the first in the file, it would give in first_of the following
564 * prefixes a/b/c; a/b/; a/ ; <empty>
565 *
566 * This is set after parsing, in cocci.ml, in update_rel_pos.
567 *)
568 and include_rel_pos = {
569 first_of : string list list;
570 last_of : string list list;
571 }
572
573
574
575
576
577
578 (* ------------------------------------------------------------------------- *)
579 (* The toplevels elements *)
580 (* ------------------------------------------------------------------------- *)
581 and toplevel =
582 | Declaration of declaration
583 | Definition of definition
584
585 (* cppext: *)
586 | CppTop of cpp_directive
587 | IfdefTop of ifdef_directive (* * toplevel list *)
588
589 (* cppext: *)
590 | MacroTop of string * argument wrap2 list * il
591
592 | EmptyDef of il (* gccext: allow redundant ';' *)
593 | NotParsedCorrectly of il
594
595 | FinalDef of info (* EOF *)
596
597 (* ------------------------------------------------------------------------- *)
598 and program = toplevel list
599
600
601 (*****************************************************************************)
602 (* Cocci Bindings *)
603 (*****************************************************************************)
604 (* Was previously in pattern.ml, but because of the transformer,
605 * we need to decorate each token with some cocci code AND the environment
606 * for this cocci code.
607 *)
608 and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
609 and metavar_binding_kind =
610 | MetaIdVal of string
611 | MetaFuncVal of string
612 | MetaLocalFuncVal of string
613
614 | MetaExprVal of expression (* a "clean expr" *)
615 | MetaExprListVal of argument wrap2 list
616 | MetaParamVal of parameterType
617 | MetaParamListVal of parameterType wrap2 list
618
619 | MetaTypeVal of fullType
620 | MetaStmtVal of statement
621
622 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
623 * because don't need to have the value for a position in the env of
624 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
625 * variables accessible via SmPL whereas the position can be one day
626 * so I think it's better to put MetaPosVal here *)
627 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
628 | MetaPosValList of
629 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
630 | MetaListlenVal of int
631
632
633 (*****************************************************************************)
634 (* C comments *)
635 (*****************************************************************************)
636
637 (* convention: I often use "m" for comments as I can not use "c"
638 * (already use for c stuff) and "com" is too long.
639 *)
640
641 (* this type will be associated to each token *)
642 and comments_around = {
643 mbefore: comment_and_relative_pos list;
644 mafter: comment_and_relative_pos list;
645 }
646 and comment_and_relative_pos = {
647
648 minfo: Common.parse_info;
649 (* the int represent the number of lines of difference between the
650 * current token and the comment. When on same line, this number is 0.
651 * When previous line, -1. In some way the after/before in previous
652 * record is useless because the sign of the integer can helps
653 * do the difference too, but I keep it that way.
654 *)
655 mpos: int;
656 (* todo?
657 * cppbetween: bool; touse? if false positive
658 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
659 *)
660 }
661
662 and comment = Common.parse_info
663 and com = comment list ref
664
665
666 (*****************************************************************************)
667 (* Cpp constructs put it comments in lexer or parsing_hack *)
668 (*****************************************************************************)
669
670 (* This type is not in the Ast but is associated with the TCommentCpp token.
671 * I put this enum here because parser_c.mly need it. I could have put
672 * it also in lexer_parser.
673 *)
674 type cppcommentkind =
675 | CppDirective
676 | CppAttr
677 | CppMacro
678 | CppPassingNormal (* ifdef 0, cplusplus, etc *)
679 | CppPassingCosWouldGetError (* expr passsing *)
680
681
682
683
684 (*****************************************************************************)
685 (* Some constructors *)
686 (*****************************************************************************)
687 let nullQualif = ({const=false; volatile= false}, [])
688 let nQ = nullQualif
689
690 let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
691
692 let noType () = ref (None,NotTest)
693 let noInstr = (ExprStatement (None), [])
694 let noTypedefDef () = None
695
696 let emptyMetavarsBinding =
697 ([]: metavars_binding)
698
699 let emptyAnnot =
700 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
701 emptyMetavarsBinding)
702
703 let emptyComments= {
704 mbefore = [];
705 mafter = [];
706 }
707
708
709 (* for include, some meta information needed by cocci *)
710 let noRelPos () =
711 ref (None: include_rel_pos option)
712 let noInIfdef () =
713 ref false
714
715
716 (* When want add some info in ast that does not correspond to
717 * an existing C element.
718 * old: or when don't want 'synchronize' on it in unparse_c.ml
719 * (now have other mark for tha matter).
720 *)
721 let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
722
723 let fakeInfo pi =
724 { pinfo = FakeTok ("",no_virt_pos);
725 cocci_tag = ref emptyAnnot;
726 comments_tag = ref emptyComments;
727 }
728
729 let noii = []
730 let noattr = []
731 let noi_content = (None: ((Common.filename * program) option))
732
733 (*****************************************************************************)
734 (* Wrappers *)
735 (*****************************************************************************)
736 let unwrap = fst
737
738
739 let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
740 let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
741
742 let get_type_expr ((unwrap_e, typ), iie) = !typ
743 let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
744 oldtyp := newtyp
745 (* old: (unwrap_e, newtyp), iie *)
746
747
748 let unwrap_typeC (qu, (typeC, ii)) = typeC
749 let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
750
751
752 let rewrap_str s ii =
753 {ii with pinfo =
754 (match ii.pinfo with
755 OriginTok pi -> OriginTok { pi with Common.str = s;}
756 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
757 | FakeTok (_,vpi) -> FakeTok (s,vpi)
758 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
759
760 let rewrap_pinfo pi ii =
761 {ii with pinfo = pi}
762
763 (* info about the current location *)
764 let get_pi = function
765 OriginTok pi -> pi
766 | ExpandedTok (_,(pi,_)) -> pi
767 | FakeTok (_,(pi,_)) -> pi
768 | AbstractLineTok pi -> pi
769
770 (* original info *)
771 let get_opi = function
772 OriginTok pi -> pi
773 | ExpandedTok (pi,_) -> pi
774 | FakeTok (_,_) -> failwith "no position information"
775 | AbstractLineTok pi -> pi
776
777 let str_of_info ii =
778 match ii.pinfo with
779 OriginTok pi -> pi.Common.str
780 | ExpandedTok (pi,_) -> pi.Common.str
781 | FakeTok (s,_) -> s
782 | AbstractLineTok pi -> pi.Common.str
783
784 let get_info f ii =
785 match ii.pinfo with
786 OriginTok pi -> f pi
787 | ExpandedTok (_,(pi,_)) -> f pi
788 | FakeTok (_,(pi,_)) -> f pi
789 | AbstractLineTok pi -> f pi
790
791 let get_orig_info f ii =
792 match ii.pinfo with
793 OriginTok pi -> f pi
794 | ExpandedTok (pi,_) -> f pi
795 | FakeTok (_,(pi,_)) -> f pi
796 | AbstractLineTok pi -> f pi
797
798 let make_expanded ii =
799 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
800
801 let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
802 let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
803 let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
804 let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
805 let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
806 let mcode_of_info ii = fst (!(ii.cocci_tag))
807 let pinfo_of_info ii = ii.pinfo
808 let parse_info_of_info ii = get_pi ii.pinfo
809
810 let is_fake ii =
811 match ii.pinfo with
812 FakeTok (_,_) -> true
813 | _ -> false
814
815 let is_origintok ii =
816 match ii.pinfo with
817 | OriginTok pi -> true
818 | _ -> false
819
820 type posrv = Real of Common.parse_info | Virt of virtual_position
821
822 let compare_pos ii1 ii2 =
823 let get_pos = function
824 OriginTok pi -> Real pi
825 | FakeTok (s,vpi) -> Virt vpi
826 | ExpandedTok (pi,vpi) -> Virt vpi
827 | AbstractLineTok pi -> Real pi in (* used for printing *)
828 let pos1 = get_pos (pinfo_of_info ii1) in
829 let pos2 = get_pos (pinfo_of_info ii2) in
830 match (pos1,pos2) with
831 (Real p1, Real p2) -> compare p1.Common.charpos p2.Common.charpos
832 | (Virt (p1,_), Real p2) ->
833 if (compare p1.Common.charpos p2.Common.charpos) = (-1) then (-1) else 1
834 | (Real p1, Virt (p2,_)) ->
835 if (compare p1.Common.charpos p2.Common.charpos) = 1 then 1 else (-1)
836 | (Virt (p1,o1), Virt (p2,o2)) ->
837 let poi1 = p1.Common.charpos in
838 let poi2 = p2.Common.charpos in
839 match compare poi1 poi2 with
840 -1 -> -1
841 | 0 -> compare o1 o2
842 | x -> x
843
844 let equal_posl (l1,c1) (l2,c2) =
845 (l1 =|= l2) && (c1 =|= c2)
846
847 let info_to_fixpos ii =
848 match pinfo_of_info ii with
849 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
850 | ExpandedTok (_,(pi,offset)) ->
851 Ast_cocci.Virt (pi.Common.charpos,offset)
852 | FakeTok (_,(pi,offset)) ->
853 Ast_cocci.Virt (pi.Common.charpos,offset)
854 | AbstractLineTok pi -> failwith "unexpected abstract"
855
856 (* cocci: *)
857 let is_test (e : expression) =
858 let (_,info) = unwrap e in
859 let (_,test) = !info in
860 test = Test
861
862 (*****************************************************************************)
863 (* Abstract line *)
864 (*****************************************************************************)
865
866 (* When we have extended the C Ast to add some info to the tokens,
867 * such as its line number in the file, we can not use anymore the
868 * ocaml '=' to compare Ast elements. To overcome this problem, to be
869 * able to use again '=', we just have to get rid of all those extra
870 * information, to "abstract those line" (al) information.
871 *)
872
873 let al_info tokenindex x =
874 { pinfo =
875 (AbstractLineTok
876 {charpos = tokenindex;
877 line = tokenindex;
878 column = tokenindex;
879 file = "";
880 str = str_of_info x});
881 cocci_tag = ref emptyAnnot;
882 comments_tag = ref emptyComments;
883 }
884
885 let semi_al_info x =
886 { x with
887 cocci_tag = ref emptyAnnot;
888 comments_tag = ref emptyComments;
889 }
890
891 (*****************************************************************************)
892 (* Views *)
893 (*****************************************************************************)
894
895 (* Transform a list of arguments (or parameters) where the commas are
896 * represented via the wrap2 and associated with an element, with
897 * a list where the comma are on their own. f(1,2,2) was
898 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
899 *
900 * Used in cocci_vs_c.ml, to have a more direct correspondance between
901 * the ast_cocci of julia and ast_c.
902 *)
903 let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
904 function
905 | [] -> []
906 | (e, ii)::xs ->
907 if null ii
908 then (Left e)::split_comma xs
909 else Right ii::Left e::split_comma xs
910
911 let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
912 function
913 | [] -> []
914 | Right ii::Left e::xs ->
915 (e, ii)::unsplit_comma xs
916 | Left e::xs ->
917 let empty_ii = [] in
918 (e, empty_ii)::unsplit_comma xs
919 | Right ii::_ ->
920 raise Impossible
921
922
923
924
925 let split_register_param = fun (hasreg, idb, ii_b_s) ->
926 match hasreg, idb, ii_b_s with
927 | false, Some s, [i1] -> Left (s, [], i1)
928 | true, Some s, [i1;i2] -> Left (s, [i1], i2)
929 | _, None, ii -> Right ii
930 | _ -> raise Impossible
931
932
933
934 (*****************************************************************************)
935 (* Helpers, could also be put in lib_parsing_c.ml instead *)
936 (*****************************************************************************)
937
938 let rec stmt_elems_of_sequencable xs =
939 xs +> Common.map (fun x ->
940 match x with
941 | StmtElem e -> [e]
942 | CppDirectiveStmt _
943 | IfdefStmt _
944 ->
945 pr2 ("stmt_elems_of_sequencable: filter a directive");
946 []
947 | IfdefStmt2 (_ifdef, xxs) ->
948 pr2 ("stmt_elems_of_sequencable: IfdefStm2 TODO?");
949 xxs +> List.map (fun xs ->
950 let xs' = stmt_elems_of_sequencable xs in
951 xs'
952 ) +> List.flatten
953 ) +> List.flatten
954
955
956
957
958 let s_of_inc_file inc_file =
959 match inc_file with
960 | Local xs -> xs +> Common.join "/"
961 | NonLocal xs -> xs +> Common.join "/"
962 | Wierd s -> s
963
964 let s_of_inc_file_bis inc_file =
965 match inc_file with
966 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
967 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
968 | Wierd s -> s
969
970 let fieldname_of_fieldkind fieldkind =
971 match unwrap fieldkind with
972 | Simple (sopt, ft) -> sopt
973 | BitField (sopt, ft, expr) -> sopt
974