permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15 open Common
16
17 (*****************************************************************************)
18 (* The AST C related types *)
19 (*****************************************************************************)
20 (*
21 * Some stuff are tagged semantic: which means that they are computed
22 * after parsing.
23 *
24 * This means that some elements in this AST are present only if
25 * some annotation/transformation has been done on the original AST returned
26 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
27 *)
28
29
30 (* ------------------------------------------------------------------------- *)
31 (* Token/info *)
32 (* ------------------------------------------------------------------------- *)
33
34 (* To allow some transformations over the AST, we keep as much information
35 * as possible in the AST such as the tokens content and their locations.
36 * Those info are called 'info' (how original) and can be tagged.
37 * For instance one tag may say that the unparser should remove this token.
38 *
39 * Update: Now I use a ref! in those 'info' so take care.
40 * That means that modifications of the info of tokens can have
41 * an effect on the info stored in the ast (which is sometimes
42 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
43 *
44 * convention: I often use 'ii' for the name of a list of info.
45 *
46 * Sometimes we want to add someting at the beginning or at the end
47 * of a construct. For 'function' and 'decl' we want to add something
48 * to their left and for 'if' 'while' et 'for' and so on at their right.
49 * We want some kinds of "virtual placeholders" that represent the start or
50 * end of a construct. We use fakeInfo for that purpose.
51 * To identify those cases I have added a fakestart/fakeend comment.
52 *
53 * cocci: Each token will be decorated in the future by the mcodekind
54 * of cocci. It is the job of the pretty printer to look at this
55 * information and decide to print or not the token (and also the
56 * pending '+' associated sometimes with the token).
57 *
58 * The first time that we parse the original C file, the mcodekind is
59 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
60 * associated. This is what I call a "clean" expr/statement/....
61 *
62 * Each token will also be decorated in the future with an environment,
63 * because the pending '+' may contain metavariables that refer to some
64 * C code.
65 *
66 *)
67
68 (* for unparser: *)
69
70 type posl = int * int (* line-col, for MetaPosValList, for position variables *)
71 (* with sexp *)
72
73 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
74 type virtual_position = Common.parse_info * int (* character offset *)
75 (* with sexp *)
76
77 type parse_info =
78 (* Present both in ast and list of tokens *)
79 | OriginTok of Common.parse_info
80 (* Present only in ast and generated after parsing. Used mainly
81 * by Julia, to add stuff at virtual places, beginning of func or decl *)
82 | FakeTok of string * virtual_position
83 (* Present both in ast and list of tokens. *)
84 | ExpandedTok of Common.parse_info * virtual_position
85
86 (* Present neither in ast nor in list of tokens
87 * but only in the '+' of the mcode of some tokens. Those kind of tokens
88 * are used to be able to use '=' to compare big ast portions.
89 *)
90 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
91 (* with sexp *)
92
93 type info = {
94 pinfo : parse_info;
95
96 (* this cocci_tag can be changed, which is how we can express some program
97 * transformations by tagging the tokens involved in this transformation.
98 *)
99 cocci_tag: (Ast_cocci.mcodekind * metavars_binding list) option ref;
100 (* set in comment_annotater_c.ml *)
101 comments_tag: comments_around ref;
102
103 (* annotations on the token (mutable) *)
104 mutable annots_tag: Token_annot.annots
105
106 (* todo? token_info : sometimes useful to know what token it was *)
107 }
108 and il = info list
109
110 (* wrap2 is like wrap, except that I use it often for separator such
111 * as ','. In that case the info is associated to the argument that
112 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])].
113 *
114 * wrap3 is like wrap, except that I use it in case sometimes it
115 * will be empty because the info will be included in a nested
116 * entity (e.g. for Ident in expr because it's inlined in the name)
117 * so user should never assume List.length wrap3 > 0.
118 *)
119 and 'a wrap = 'a * il
120 and 'a wrap2 = 'a * il
121 and 'a wrap3 = 'a * il (* * evotype*)
122
123 (* ------------------------------------------------------------------------- *)
124 (* Name *)
125 (* ------------------------------------------------------------------------- *)
126
127 (* was called 'ident' before, but 'name' is I think better
128 * as concatenated strings can be used not only for identifiers and for
129 * declarators, but also for fields, for labels, etc.
130 *
131 * Note: because now the info is embeded in the name, the info for
132 * expression like Ident, or types like Typename, are not anymore
133 * stored in the expression or type. Hence if you assume this,
134 * which was true before, you are now wrong. So never write code like
135 * let (unwrape,_), ii = e and use 'ii' believing it contains
136 * the local ii to e. If you want to do that, use the appropiate
137 * wrapper get_local_ii_of_expr_inlining_ii_of_name.
138 *)
139 and name =
140 | RegularName of string wrap
141 | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list
142 (* normally only used inside list of things, as in parameters or arguments
143 * in which case, cf cpp-manual, it has a special meaning *)
144 | CppVariadicName of string wrap (* ## s *)
145 | CppIdentBuilder of string wrap (* s ( ) *) *
146 ((string wrap) wrap2 list) (* arguments *)
147
148
149 (* ------------------------------------------------------------------------- *)
150 (* C Type *)
151 (* ------------------------------------------------------------------------- *)
152 (* Could have more precise type in fullType, in expression, etc, but
153 * it requires to do too much things in parsing such as checking no
154 * conflicting structname, computing value, etc. Better to separate
155 * concern. So I put '=>' to mean what we would really like. In fact
156 * what we really like is defining another fullType, expression, etc
157 * from scratch, because many stuff are just sugar.
158 *
159 * invariant: Array and FunctionType have also typeQualifier but they
160 * dont have sense. I put this to factorise some code. If you look in
161 * the grammar, you see that we can never specify const for the array
162 * himself (but we can do it for pointer) or function, we always
163 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
164 *
165 *
166 * Because of ExprStatement, we can have more 'new scope' events, but
167 * rare I think. For instance with 'array of constExpression' there can
168 * have an exprStatement and a new (local) struct defined. Same for
169 * Constructor.
170 *
171 *)
172
173
174 and fullType = typeQualifier * typeC
175 and typeC = typeCbis wrap (* todo reput wrap3 *)
176
177 and typeCbis =
178 NoType (* for c++ only, and for K&R C *)
179 | BaseType of baseType
180
181 | Pointer of fullType
182 | Array of constExpression option * fullType
183 | FunctionType of functionType
184
185 | Enum of string option * enumType
186 | StructUnion of structUnion * string option * structType (* new scope *)
187
188 | EnumName of string
189 | StructUnionName of structUnion * string
190
191 | TypeName of name * fullType option (* semantic: filled later *)
192
193 | ParenType of fullType (* for unparser: *)
194
195 (* gccext: TypeOfType below may seems useless; Why declare a
196 * __typeof__(int) x; ?
197 * When used with macros, it allows to fix a problem of C which
198 * is that type declaration can be spread around the ident. Indeed it
199 * may be difficult to have a macro such as
200 * '#define macro(type, ident) type ident;'
201 * because when you want to do a
202 * macro(char[256], x),
203 * then it will generate invalid code, but with a
204 * '#define macro(type, ident) __typeof(type) ident;'
205 * it will work.
206 *)
207 | TypeOfExpr of expression
208 | TypeOfType of fullType
209
210 (* cppext: IfdefType TODO *)
211
212 (* -------------------------------------- *)
213 and baseType = Void
214 | IntType of intType
215 | FloatType of floatType
216 | SizeType
217 | SSizeType
218 | PtrDiffType
219
220 (* stdC: type section
221 * add a | SizeT ?
222 * note: char and signed char are semantically different!!
223 *)
224 and intType = CChar (* obsolete? | CWchar *)
225 | Si of signed
226
227 and signed = sign * base
228 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
229 and sign = Signed | UnSigned
230
231 and floatType = CFloat | CDouble | CLongDouble
232
233
234 (* -------------------------------------- *)
235 and structUnion = Struct | Union
236 and structType = field list
237 and field =
238 | DeclarationField of field_declaration
239 (* gccext: *)
240 | EmptyField of info
241
242 (* cppext: *)
243 | MacroDeclField of (string * argument wrap2 list)
244 wrap (* optional ';'*)
245
246 (* cppext: *)
247 | CppDirectiveStruct of cpp_directive
248 | IfdefStruct of ifdef_directive (* * field list list *)
249
250
251 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
252 and field_declaration =
253 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
254
255 (* At first I thought that a bitfield could be only Signed/Unsigned.
256 * But it seems that gcc allow char i:4. C rule must say that you
257 * can cast into int so enum too, ...
258 *)
259 and fieldkind =
260 | Simple of name option * fullType
261 | BitField of name option * fullType *
262 info (* : *) * constExpression
263 (* fullType => BitFieldInt | BitFieldUnsigned *)
264
265
266 (* -------------------------------------- *)
267 and enumType = oneEnumType wrap2 (* , *) list
268 (* => string * int list *)
269
270 and oneEnumType = name * (info (* = *) * constExpression) option
271
272 (* -------------------------------------- *)
273 (* return * (params * has "...") *)
274 and functionType = fullType * (parameterType wrap2 list * bool wrap)
275 and parameterType =
276 { p_namei: name option;
277 p_register: bool wrap;
278 p_type: fullType;
279 }
280 (* => (bool (register) * fullType) list * bool *)
281
282
283 and typeQualifier = typeQualifierbis wrap
284 and typeQualifierbis = {const: bool; volatile: bool}
285
286 (* gccext: cppext: *)
287 and attribute = attributebis wrap
288 and attributebis =
289 | Attribute of string
290
291 (* ------------------------------------------------------------------------- *)
292 (* C expression *)
293 (* ------------------------------------------------------------------------- *)
294 and expression = (expressionbis * exp_info ref (* semantic: *)) wrap3
295 and exp_info = exp_type option * test
296 and exp_type = fullType (* Type_c.completed_and_simplified *) * local
297 and local = LocalVar of parse_info | StaticLocalVar of parse_info
298 | NotLocalVar (* cocci: *)
299 and test = Test | NotTest (* cocci: *)
300
301 and expressionbis =
302
303 (* Ident can be a enumeration constant, a simple variable, a name of a func.
304 * With cppext, Ident can also be the name of a macro. Sparse says
305 * "an identifier with a meaning is a symbol" *)
306 | Ident of name (* todo? more semantic info such as LocalFunc *)
307
308 | Constant of constant
309 | FunCall of expression * argument wrap2 (* , *) list
310 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
311 | CondExpr of expression * expression option * expression
312
313 (* should be considered as statements, bad C langage *)
314 | Sequence of expression * expression
315 | Assignment of expression * assignOp * expression
316
317
318 | Postfix of expression * fixOp
319 | Infix of expression * fixOp
320
321 | Unary of expression * unaryOp
322 | Binary of expression * binaryOp * expression
323
324 | ArrayAccess of expression * expression
325
326 (* field ident access *)
327 | RecordAccess of expression * name
328 | RecordPtAccess of expression * name
329 (* redundant normally, could replace it by DeRef RecordAcces *)
330
331 | SizeOfExpr of expression
332 | SizeOfType of fullType
333 | Cast of fullType * expression
334
335 (* gccext: *)
336 | StatementExpr of compound wrap (* ( ) new scope *)
337 | Constructor of fullType * initialiser
338
339 (* for unparser: *)
340 | ParenExpr of expression
341
342 (* for C++: *)
343 | New of (argument wrap2 (* , *) list) option * argument
344 | Delete of expression
345
346 (* cppext: IfdefExpr TODO *)
347
348 (* cppext: normally just expression *)
349 and argument = (expression, weird_argument) Common.either
350 and weird_argument =
351 | ArgType of parameterType
352 | ArgAction of action_macro
353 and action_macro =
354 (* todo: ArgStatement of statement, possibly have ghost token *)
355 | ActMisc of il
356
357
358 (* I put string for Int and Float because int would not be enough because
359 * OCaml int are 31 bits. So simpler to do string. Same reason to have
360 * string instead of int list for the String case.
361 *
362 * note: -2 is not a constant, it is the unary operator '-'
363 * applied to constant 2. So the string must represent a positive
364 * integer only. *)
365
366 and constant =
367 | String of (string * isWchar)
368 | MultiString of string list (* can contain MacroString, todo: more info *)
369 | Char of (string * isWchar) (* normally it is equivalent to Int *)
370 | Int of (string * intType)
371 | Float of (string * floatType)
372
373 and isWchar = IsWchar | IsChar
374
375
376 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
377 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
378 and assignOp = SimpleAssign | OpAssign of arithOp
379 and fixOp = Dec | Inc
380
381 and binaryOp = Arith of arithOp | Logical of logicalOp
382
383 and arithOp =
384 | Plus | Minus | Mul | Div | Mod
385 | DecLeft | DecRight
386 | And | Or | Xor | Max | Min
387
388 and logicalOp =
389 | Inf | Sup | InfEq | SupEq
390 | Eq | NotEq
391 | AndLog | OrLog
392
393 and constExpression = expression (* => int *)
394
395 (* ------------------------------------------------------------------------- *)
396 (* C statement *)
397 (* ------------------------------------------------------------------------- *)
398 (* note: that assignement is not a statement but an expression;
399 * wonderful C langage.
400 *
401 * note: I use 'and' for type definition cos gccext allow statement as
402 * expression, so need mutual recursive type definition.
403 *
404 *)
405
406 and statement = statementbis wrap3
407 and statementbis =
408 | Labeled of labeled
409 | Compound of compound (* new scope *)
410 | ExprStatement of exprStatement
411 | Selection of selection (* have fakeend *)
412 | Iteration of iteration (* have fakeend *)
413 | Jump of jump
414
415 (* simplify cocci: only at the beginning of a compound normally *)
416 | Decl of declaration
417
418 (* gccext: *)
419 | Asm of asmbody
420 | NestedFunc of definition
421
422 (* cppext: *)
423 | MacroStmt
424
425
426
427 and labeled = Label of name * statement
428 | Case of expression * statement
429 | CaseRange of expression * expression * statement (* gccext: *)
430 | Default of statement
431
432 (* cppext:
433 * old: compound = (declaration list * statement list)
434 * old: (declaration, statement) either list
435 * Simplify cocci to just have statement list, by integrating Decl in stmt.
436 *
437 * update: now introduce also the _sequencable to allow ifdef in the middle.
438 * Indeed, I now allow ifdefs in the ast but they must be only between
439 * "sequencable" elements. They can be put in a type only if this type
440 * is used in a list, like at the toplevel, used in a 'toplevel list',
441 * or inside a compound, used in a 'statement list'. I must not allow
442 * ifdef anywhere. For instance I can not make ifdef a statement
443 * cos some instruction like If accept only one statement and the
444 * ifdef directive must not take the place of a legitimate instruction.
445 * We had a similar phenomena in SmPL where we have the notion
446 * of statement and sequencable statement too. Once you have
447 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
448 * and introduce the ifdef.
449 *
450 * update: those ifdefs are either passed, or present in the AST but in
451 * a flat form. To structure those flat ifdefs you have to run
452 * a transformation that will put in a tree the statements inside
453 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
454 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
455 *
456 *)
457 and compound = statement_sequencable list
458
459 (* cppext: easier to put at statement_list level than statement level *)
460 and statement_sequencable =
461 | StmtElem of statement
462
463 (* cppext: *)
464 | CppDirectiveStmt of cpp_directive
465 | IfdefStmt of ifdef_directive
466
467 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
468 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
469
470 and exprStatement = expression option
471
472 and declOrExpr = ForDecl of declaration | ForExp of expression option wrap
473
474 (* for Switch, need check that all elements in the compound start
475 * with a case:, otherwise unreachable code.
476 *)
477 and selection =
478 | If of expression * statement * statement
479 | Switch of expression * statement
480
481
482 and iteration =
483 | While of expression * statement
484 | DoWhile of statement * expression
485 | For of declOrExpr * exprStatement wrap * exprStatement wrap *
486 statement
487 (* cppext: *)
488 | MacroIteration of string * argument wrap2 list * statement
489
490 and jump = Goto of name
491 | Continue | Break
492 | Return | ReturnExpr of expression
493 | GotoComputed of expression (* gccext: goto *exp ';' *)
494
495
496 (* gccext: *)
497 and asmbody = il (* string list *) * colon wrap (* : *) list
498 and colon = Colon of colon_option wrap2 list
499 and colon_option = colon_option_bis wrap
500 and colon_option_bis = ColonMisc | ColonExpr of expression
501
502
503 (* ------------------------------------------------------------------------- *)
504 (* Declaration *)
505 (* ------------------------------------------------------------------------- *)
506 (* (string * ...) option cos can have empty declaration or struct tag
507 * declaration.
508 *
509 * Before I had a Typedef constructor, but why make this special case and not
510 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
511 * declarations ? So I try to generalise and not have Typedef either. This
512 * requires more work in parsing. Better to separate concern.
513 *
514 * Before the need for unparser, I didn't have a DeclList but just a Decl.
515 *
516 * I am not sure what it means to declare a prototype inline, but gcc
517 * accepts it.
518 *)
519
520 and declaration =
521 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
522 (* cppext: *)
523 (* bool is true if there is a ; at the end *)
524 | MacroDecl of (string * argument wrap2 list * bool) wrap (* fakestart *)
525 | MacroDeclInit of
526 (string * argument wrap2 list * initialiser) wrap (* fakestart *)
527
528 and onedecl =
529 { v_namei: (name * v_init) option;
530 v_type: fullType;
531 (* semantic: set in type annotated and used in cocci_vs_c
532 * when we transform some initialisation into affectation
533 *)
534 v_type_bis: fullType (* Type_c.completed_and_simplified *) option ref;
535 v_storage: storage;
536 v_local: local_decl; (* cocci: *)
537 v_attr: attribute list; (* gccext: *)
538 }
539 and v_init =
540 NoInit | ValInit of info * initialiser
541 | ConstrInit of argument wrap2 (* , *) list wrap
542 and storage = storagebis * bool (* gccext: inline or not *)
543 and storagebis = NoSto | StoTypedef | Sto of storageClass
544 and storageClass = Auto | Static | Register | Extern
545
546 and local_decl = LocalDecl | NotLocalDecl
547
548 (* fullType is the type used if the type should be converted to
549 an assignment. It can be adjusted in the type annotation
550 phase when typedef information is availalble *)
551 and initialiser = initialiserbis wrap
552 and initialiserbis =
553 | InitExpr of expression
554 | InitList of initialiser wrap2 (* , *) list
555 (* gccext: *)
556 | InitDesignators of designator list * initialiser
557 | InitFieldOld of string * initialiser
558 | InitIndexOld of expression * initialiser
559
560 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
561 and designator = designatorbis wrap
562 and designatorbis =
563 | DesignatorField of string
564 | DesignatorIndex of expression
565 | DesignatorRange of expression * expression
566
567 (* ------------------------------------------------------------------------- *)
568 (* Function definition *)
569 (* ------------------------------------------------------------------------- *)
570 (* Normally we should define another type functionType2 because there
571 * are more restrictions on what can define a function than a pointer
572 * function. For instance a function declaration can omit the name of the
573 * parameter whereas a function definition can not. But, in some cases such
574 * as 'f(void) {', there is no name too, so I simplified and reused the
575 * same functionType type for both declaration and function definition.
576 *
577 * Also old style C does not have type in the parameter, so again simpler
578 * to abuse the functionType and allow missing type.
579 *)
580 and definition = definitionbis wrap (* ( ) { } fakestart sto *)
581 and definitionbis =
582 { f_name: name;
583 f_type: functionType; (* less? a functionType2 ? *)
584 f_storage: storage;
585 f_body: compound;
586 f_attr: attribute list; (* gccext: *)
587 f_old_c_style: declaration list option;
588 }
589 (* cppext: IfdefFunHeader TODO *)
590
591 (* ------------------------------------------------------------------------- *)
592 (* cppext: cpp directives, #ifdef, #define and #include body *)
593 (* ------------------------------------------------------------------------- *)
594 and cpp_directive =
595 | Define of define
596 | Include of includ
597 | PragmaAndCo of il
598 (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
599
600 and define = string wrap (* #define s eol *) * (define_kind * define_val)
601 and define_kind =
602 | DefineVar
603 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
604 | Undef
605 and define_val =
606 (* most common case; e.g. to define int constant *)
607 | DefineExpr of expression
608
609 | DefineStmt of statement
610 | DefineType of fullType
611 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
612
613 | DefineFunction of definition
614 | DefineInit of initialiser (* in practice only { } with possible ',' *)
615
616 | DefineMulti of statement list
617
618 | DefineText of string wrap
619 | DefineEmpty
620
621 | DefineTodo
622
623
624
625 and includ =
626 { i_include: inc_file wrap; (* #include s *)
627 (* cocci: computed in ? *)
628 i_rel_pos: include_rel_pos option ref;
629 (* cocci: cf -test incl *)
630 i_is_in_ifdef: bool;
631 (* cf cpp_ast_c.ml. set to None at parsing time. *)
632 i_content: (Common.filename (* full path *) * program) option;
633 }
634 and inc_file =
635 | Local of inc_elem list
636 | NonLocal of inc_elem list
637 | Weird of string (* ex: #include SYSTEM_H *)
638 and inc_elem = string
639
640 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
641 *
642 * The first_of and last_of store the list of prefixes that was
643 * introduced by the include. On #include <a/b/x>, if the include was
644 * the first in the file, it would give in first_of the following
645 * prefixes a/b/c; a/b/; a/ ; <empty>
646 *
647 * This is set after parsing, in cocci.ml, in update_rel_pos.
648 *)
649 and include_rel_pos = {
650 first_of : string list list;
651 last_of : string list list;
652 }
653
654
655
656 (* todo? to specialize if someone need more info *)
657 and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
658 | IfdefDirective of (ifdefkind * matching_tag) wrap
659 and ifdefkind =
660 | Ifdef (* todo? of string ? of formula_cpp ? *)
661 | IfdefElseif (* same *)
662 | IfdefElse (* same *)
663 | IfdefEndif
664 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
665 * a global so it means if you parse the same file twice you may get
666 * different id. I try now to avoid this pb by resetting it each
667 * time I parse a file.
668 *)
669 and matching_tag =
670 IfdefTag of (int (* tag *) * int (* total with this tag *))
671
672
673
674
675
676 (* ------------------------------------------------------------------------- *)
677 (* The toplevels elements *)
678 (* ------------------------------------------------------------------------- *)
679 and toplevel =
680 | Declaration of declaration
681 | Definition of definition
682
683 (* cppext: *)
684 | CppTop of cpp_directive
685 | IfdefTop of ifdef_directive (* * toplevel list *)
686
687 (* cppext: *)
688 | MacroTop of string * argument wrap2 list * il
689
690 | EmptyDef of il (* gccext: allow redundant ';' *)
691 | NotParsedCorrectly of il
692
693 | FinalDef of info (* EOF *)
694
695 (* c++ *)
696 | Namespace of toplevel list * il
697
698 (* ------------------------------------------------------------------------- *)
699 and program = toplevel list
700
701 (*****************************************************************************)
702 (* Cocci Bindings *)
703 (*****************************************************************************)
704 (* Was previously in pattern.ml, but because of the transformer,
705 * we need to decorate each token with some cocci code AND the environment
706 * for this cocci code.
707 *)
708 and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
709 and metavar_binding_kind =
710 | MetaIdVal of string *
711 Ast_cocci.meta_name list (* negative constraints *)
712 | MetaFuncVal of string
713 | MetaLocalFuncVal of string
714
715 | MetaExprVal of expression (* a "clean expr" *) *
716 (*subterm constraints, currently exprs*)
717 Ast_cocci.meta_name list
718 | MetaExprListVal of argument wrap2 list
719 | MetaParamVal of parameterType
720 | MetaParamListVal of parameterType wrap2 list
721
722 | MetaTypeVal of fullType
723 | MetaInitVal of initialiser
724 | MetaInitListVal of initialiser wrap2 list
725 | MetaDeclVal of declaration
726 | MetaFieldVal of field
727 | MetaFieldListVal of field list
728 | MetaStmtVal of statement
729
730 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
731 * because don't need to have the value for a position in the env of
732 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
733 * variables accessible via SmPL whereas the position can be one day
734 * so I think it's better to put MetaPosVal here *)
735 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
736 | MetaPosValList of
737 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
738 | MetaListlenVal of int
739
740
741 (*****************************************************************************)
742 (* C comments *)
743 (*****************************************************************************)
744
745 (* convention: I often use "m" for comments as I can not use "c"
746 * (already use for c stuff) and "com" is too long.
747 *)
748
749 (* this type will be associated to each token.
750 *)
751 and comments_around = {
752 mbefore: Token_c.comment_like_token list;
753 mafter: Token_c.comment_like_token list;
754
755 (* less: could remove ? do something simpler than CComment for
756 * coccinelle, cf above. *)
757 mbefore2: comment_and_relative_pos list;
758 mafter2: comment_and_relative_pos list;
759 }
760 and comment_and_relative_pos = {
761
762 minfo: Common.parse_info;
763 (* the int represent the number of lines of difference between the
764 * current token and the comment. When on same line, this number is 0.
765 * When previous line, -1. In some way the after/before in previous
766 * record is useless because the sign of the integer can helps
767 * do the difference too, but I keep it that way.
768 *)
769 mpos: int;
770 (* todo?
771 * cppbetween: bool; touse? if false positive
772 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
773 *)
774 }
775
776 and comment = Common.parse_info
777 and com = comment list ref
778
779 (* with sexp *)
780
781
782 (*****************************************************************************)
783 (* Some constructors *)
784 (*****************************************************************************)
785 let nullQualif = ({const=false; volatile= false}, [])
786 let nQ = nullQualif
787
788 let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
789
790 let noType () = ref (None,NotTest)
791 let noInstr = (ExprStatement (None), [])
792 let noTypedefDef () = None
793
794 let emptyMetavarsBinding =
795 ([]: metavars_binding)
796
797 let emptyAnnotCocci =
798 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
799 ([] : metavars_binding list))
800
801 let emptyAnnot =
802 (None: (Ast_cocci.mcodekind * metavars_binding list) option)
803
804 (* compatibility mode *)
805 let mcode_and_env_of_cocciref aref =
806 match !aref with
807 | Some x -> x
808 | None -> emptyAnnotCocci
809
810
811 let emptyComments= {
812 mbefore = [];
813 mafter = [];
814 mbefore2 = [];
815 mafter2 = [];
816 }
817
818
819 (* for include, some meta information needed by cocci *)
820 let noRelPos () =
821 ref (None: include_rel_pos option)
822 let noInIfdef () =
823 ref false
824
825
826 (* When want add some info in ast that does not correspond to
827 * an existing C element.
828 * old: or when don't want 'synchronize' on it in unparse_c.ml
829 * (now have other mark for tha matter).
830 *)
831 let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
832
833 let fakeInfo pi =
834 { pinfo = FakeTok ("",no_virt_pos);
835 cocci_tag = ref emptyAnnot;
836 annots_tag = Token_annot.empty;
837 comments_tag = ref emptyComments;
838 }
839
840 let noii = []
841 let noattr = []
842 let noi_content = (None: ((Common.filename * program) option))
843
844 (*****************************************************************************)
845 (* Wrappers *)
846 (*****************************************************************************)
847 let unwrap = fst
848
849 let unwrap2 = fst
850
851 let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
852 let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
853
854 let unwrap_typeC (qu, (typeC, ii)) = typeC
855 let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
856
857 let unwrap_typeCbis (typeC, ii) = typeC
858
859 let unwrap_st (unwrap_st, ii) = unwrap_st
860
861 (* ------------------------------------------------------------------------- *)
862 let mk_e unwrap_e ii = (unwrap_e, noType()), ii
863 let mk_e_bis unwrap_e ty ii = (unwrap_e, ty), ii
864
865 let mk_ty typeC ii = nQ, (typeC, ii)
866 let mk_tybis typeC ii = (typeC, ii)
867
868 let mk_st unwrap_st ii = (unwrap_st, ii)
869
870 (* ------------------------------------------------------------------------- *)
871 let get_ii_typeC_take_care (typeC, ii) = ii
872 let get_ii_st_take_care (st, ii) = ii
873 let get_ii_expr_take_care (e, ii) = ii
874
875 let get_st_and_ii (st, ii) = st, ii
876 let get_ty_and_ii (qu, (typeC, ii)) = qu, (typeC, ii)
877 let get_e_and_ii (e, ii) = e, ii
878
879
880 (* ------------------------------------------------------------------------- *)
881 let get_type_expr ((unwrap_e, typ), iie) = !typ
882 let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
883 oldtyp := newtyp
884 (* old: (unwrap_e, newtyp), iie *)
885
886 let get_onlytype_expr ((unwrap_e, typ), iie) =
887 match !typ with
888 | Some (ft,_local), _test -> Some ft
889 | None, _ -> None
890
891 let get_onlylocal_expr ((unwrap_e, typ), iie) =
892 match !typ with
893 | Some (ft,local), _test -> Some local
894 | None, _ -> None
895
896 (* ------------------------------------------------------------------------- *)
897 let rewrap_str s ii =
898 {ii with pinfo =
899 (match ii.pinfo with
900 OriginTok pi -> OriginTok { pi with Common.str = s;}
901 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
902 | FakeTok (_,vpi) -> FakeTok (s,vpi)
903 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
904
905 let rewrap_pinfo pi ii =
906 {ii with pinfo = pi}
907
908
909
910 (* info about the current location *)
911 let get_pi = function
912 OriginTok pi -> pi
913 | ExpandedTok (_,(pi,_)) -> pi
914 | FakeTok (_,(pi,_)) -> pi
915 | AbstractLineTok pi -> pi
916
917 (* original info *)
918 let get_opi = function
919 OriginTok pi -> pi
920 | ExpandedTok (pi,_) -> pi (* diff with get_pi *)
921 | FakeTok (_,_) -> failwith "no position information"
922 | AbstractLineTok pi -> pi
923
924 let str_of_info ii =
925 match ii.pinfo with
926 OriginTok pi -> pi.Common.str
927 | ExpandedTok (pi,_) -> pi.Common.str
928 | FakeTok (s,_) -> s
929 | AbstractLineTok pi -> pi.Common.str
930
931 let get_info f ii =
932 match ii.pinfo with
933 OriginTok pi -> f pi
934 | ExpandedTok (_,(pi,_)) -> f pi
935 | FakeTok (_,(pi,_)) -> f pi
936 | AbstractLineTok pi -> f pi
937
938 let get_orig_info f ii =
939 match ii.pinfo with
940 OriginTok pi -> f pi
941 | ExpandedTok (pi,_) -> f pi (* diff with get_info *)
942 | FakeTok (_,(pi,_)) -> f pi
943 | AbstractLineTok pi -> f pi
944
945 let make_expanded ii =
946 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
947
948 let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
949 let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
950 let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
951 let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
952 let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
953 let mcode_of_info ii = fst (mcode_and_env_of_cocciref ii.cocci_tag)
954 let pinfo_of_info ii = ii.pinfo
955 let parse_info_of_info ii = get_pi ii.pinfo
956
957 let strloc_of_info ii =
958 spf "%s:%d" (file_of_info ii) (line_of_info ii)
959
960 let is_fake ii =
961 match ii.pinfo with
962 FakeTok (_,_) -> true
963 | _ -> false
964
965 let is_origintok ii =
966 match ii.pinfo with
967 | OriginTok pi -> true
968 | _ -> false
969
970 (* ------------------------------------------------------------------------- *)
971 type posrv = Real of Common.parse_info | Virt of virtual_position
972
973 let compare_pos ii1 ii2 =
974 let get_pos = function
975 OriginTok pi -> Real pi
976 | FakeTok (s,vpi) -> Virt vpi
977 | ExpandedTok (pi,vpi) -> Virt vpi
978 | AbstractLineTok pi -> Real pi in (* used for printing *)
979 let pos1 = get_pos (pinfo_of_info ii1) in
980 let pos2 = get_pos (pinfo_of_info ii2) in
981 match (pos1,pos2) with
982 (Real p1, Real p2) ->
983 compare p1.Common.charpos p2.Common.charpos
984 | (Virt (p1,_), Real p2) ->
985 if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1
986 | (Real p1, Virt (p2,_)) ->
987 if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1)
988 | (Virt (p1,o1), Virt (p2,o2)) ->
989 let poi1 = p1.Common.charpos in
990 let poi2 = p2.Common.charpos in
991 match compare poi1 poi2 with
992 -1 -> -1
993 | 0 -> compare o1 o2
994 | x -> x
995
996 let equal_posl (l1,c1) (l2,c2) =
997 (l1 =|= l2) && (c1 =|= c2)
998
999 let compare_posl (l1,c1) (l2,c2) =
1000 match l2 - l1 with
1001 0 -> c2 - c1
1002 | r -> r
1003
1004 let info_to_fixpos ii =
1005 match pinfo_of_info ii with
1006 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
1007 | ExpandedTok (_,(pi,offset)) ->
1008 Ast_cocci.Virt (pi.Common.charpos,offset)
1009 | FakeTok (_,(pi,offset)) ->
1010 Ast_cocci.Virt (pi.Common.charpos,offset)
1011 | AbstractLineTok pi -> failwith "unexpected abstract"
1012
1013 (* cocci: *)
1014 let is_test (e : expression) =
1015 let (_,info), _ = e in
1016 let (_,test) = !info in
1017 test =*= Test
1018
1019 (*****************************************************************************)
1020 (* Abstract line *)
1021 (*****************************************************************************)
1022
1023 (* When we have extended the C Ast to add some info to the tokens,
1024 * such as its line number in the file, we can not use anymore the
1025 * ocaml '=' to compare Ast elements. To overcome this problem, to be
1026 * able to use again '=', we just have to get rid of all those extra
1027 * information, to "abstract those line" (al) information.
1028 *
1029 * Julia then modifies it a little to have a tokenindex, so the original
1030 * true al_info is in fact real_al_info.
1031 *)
1032
1033 let al_info tokenindex x =
1034 { pinfo =
1035 (AbstractLineTok
1036 {charpos = tokenindex;
1037 line = tokenindex;
1038 column = tokenindex;
1039 file = "";
1040 str = str_of_info x});
1041 cocci_tag = ref emptyAnnot;
1042 annots_tag = Token_annot.empty;
1043 comments_tag = ref emptyComments;
1044 }
1045
1046 let semi_al_info x =
1047 { x with
1048 cocci_tag = ref emptyAnnot;
1049 comments_tag = ref emptyComments;
1050 }
1051
1052 let magic_real_number = -10
1053
1054 let real_al_info x =
1055 { pinfo =
1056 (AbstractLineTok
1057 {charpos = magic_real_number;
1058 line = magic_real_number;
1059 column = magic_real_number;
1060 file = "";
1061 str = str_of_info x});
1062 cocci_tag = ref emptyAnnot;
1063 annots_tag = Token_annot.empty;
1064 comments_tag = ref emptyComments;
1065 }
1066
1067 let al_comments x =
1068 let keep_cpp l =
1069 List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in
1070 let al_com (x,i) =
1071 (x,{i with Common.charpos = magic_real_number;
1072 Common.line = magic_real_number;
1073 Common.column = magic_real_number}) in
1074 {mbefore = []; (* duplicates mafter of the previous token *)
1075 mafter = List.map al_com (keep_cpp x.mafter);
1076 mbefore2=[];
1077 mafter2=[];
1078 }
1079
1080 let al_info_cpp tokenindex x =
1081 { pinfo =
1082 (AbstractLineTok
1083 {charpos = tokenindex;
1084 line = tokenindex;
1085 column = tokenindex;
1086 file = "";
1087 str = str_of_info x});
1088 cocci_tag = ref emptyAnnot;
1089 annots_tag = Token_annot.empty;
1090 comments_tag = ref (al_comments !(x.comments_tag));
1091 }
1092
1093 let semi_al_info_cpp x =
1094 { x with
1095 cocci_tag = ref emptyAnnot;
1096 annots_tag = Token_annot.empty;
1097 comments_tag = ref (al_comments !(x.comments_tag));
1098 }
1099
1100 let real_al_info_cpp x =
1101 { pinfo =
1102 (AbstractLineTok
1103 {charpos = magic_real_number;
1104 line = magic_real_number;
1105 column = magic_real_number;
1106 file = "";
1107 str = str_of_info x});
1108 cocci_tag = ref emptyAnnot;
1109 annots_tag = Token_annot.empty;
1110 comments_tag = ref (al_comments !(x.comments_tag));
1111 }
1112
1113
1114 (*****************************************************************************)
1115 (* Views *)
1116 (*****************************************************************************)
1117
1118 (* Transform a list of arguments (or parameters) where the commas are
1119 * represented via the wrap2 and associated with an element, with
1120 * a list where the comma are on their own. f(1,2,2) was
1121 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1122 *
1123 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1124 * the ast_cocci of julia and ast_c.
1125 *)
1126 let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
1127 function
1128 | [] -> []
1129 | (e, ii)::xs ->
1130 if null ii
1131 then (Left e)::split_comma xs
1132 else Right ii::Left e::split_comma xs
1133
1134 let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
1135 function
1136 | [] -> []
1137 | Right ii::Left e::xs ->
1138 (e, ii)::unsplit_comma xs
1139 | Left e::xs ->
1140 let empty_ii = [] in
1141 (e, empty_ii)::unsplit_comma xs
1142 | Right ii::_ ->
1143 raise (Impossible 59)
1144
1145
1146
1147
1148 (*****************************************************************************)
1149 (* Helpers, could also be put in lib_parsing_c.ml instead *)
1150 (*****************************************************************************)
1151
1152 (* should maybe be in pretty_print_c ? *)
1153
1154 let s_of_inc_file inc_file =
1155 match inc_file with
1156 | Local xs -> xs +> Common.join "/"
1157 | NonLocal xs -> xs +> Common.join "/"
1158 | Weird s -> s
1159
1160 let s_of_inc_file_bis inc_file =
1161 match inc_file with
1162 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
1163 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
1164 | Weird s -> s
1165
1166 let fieldname_of_fieldkind fieldkind =
1167 match fieldkind with
1168 | Simple (sopt, ft) -> sopt
1169 | BitField (sopt, ft, info, expr) -> sopt
1170
1171
1172 let s_of_attr attr =
1173 attr
1174 +> List.map (fun (Attribute s, ii) -> s)
1175 +> Common.join ","
1176
1177
1178 (* ------------------------------------------------------------------------- *)
1179 let str_of_name ident =
1180 match ident with
1181 | RegularName (s,ii) -> s
1182 | CppConcatenatedName xs ->
1183 xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##"
1184 | CppVariadicName (s, ii) -> "##" ^ s
1185 | CppIdentBuilder ((s,iis), xs) ->
1186 s ^ "(" ^
1187 (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^
1188 ")"
1189
1190 let get_s_and_ii_of_name name =
1191 match name with
1192 | RegularName (s, iis) -> s, iis
1193 | CppIdentBuilder ((s, iis), xs) -> s, iis
1194 | CppVariadicName (s,iis) ->
1195 let (iop, iis) = Common.tuple_of_list2 iis in
1196 s, [iis]
1197 | CppConcatenatedName xs ->
1198 (match xs with
1199 | [] -> raise (Impossible 60)
1200 | ((s,iis),noiiop)::xs ->
1201 s, iis
1202 )
1203
1204 let get_s_and_info_of_name name =
1205 let (s,ii) = get_s_and_ii_of_name name in
1206 s, List.hd ii
1207
1208 let info_of_name name =
1209 let (s,ii) = get_s_and_ii_of_name name in
1210 List.hd ii
1211
1212 let ii_of_name name =
1213 let (s,ii) = get_s_and_ii_of_name name in
1214 ii
1215
1216 let get_local_ii_of_expr_inlining_ii_of_name e =
1217 let (ebis,_),ii = e in
1218 match ebis, ii with
1219 | Ident name, noii ->
1220 assert(null noii);
1221 ii_of_name name
1222 | RecordAccess (e, name), ii ->
1223 ii @ ii_of_name name
1224 | RecordPtAccess (e, name), ii ->
1225 ii @ ii_of_name name
1226 | _, ii -> ii
1227
1228
1229 let get_local_ii_of_tybis_inlining_ii_of_name ty =
1230 match ty with
1231 | TypeName (name, _typ), [] -> ii_of_name name
1232 | _, ii -> ii
1233
1234 (* the following is used to obtain the argument to LocalVar *)
1235 let info_of_type ft =
1236 let (qu, ty) = ft in
1237 (* bugfix: because of string->name, the ii can be deeper *)
1238 let ii = get_local_ii_of_tybis_inlining_ii_of_name ty in
1239 match ii with
1240 | ii::_ -> Some ii.pinfo
1241 | [] -> None
1242
1243 (* only Label and Goto have name *)
1244 let get_local_ii_of_st_inlining_ii_of_name st =
1245 match st with
1246 | Labeled (Label (name, st)), ii -> ii_of_name name @ ii
1247 | Jump (Goto name), ii ->
1248 let (i1, i3) = Common.tuple_of_list2 ii in
1249 [i1] @ ii_of_name name @ [i3]
1250 | _, ii -> ii
1251
1252
1253
1254 (* ------------------------------------------------------------------------- *)
1255 let name_of_parameter param =
1256 param.p_namei +> Common.map_option (str_of_name)
1257
1258
1259 (* ------------------------------------------------------------------------- *)
1260 (* Annotations on tokens *)
1261 (* ------------------------------------------------------------------------- *)
1262
1263 (* to put a given annotation on a token *)
1264 let put_annot_info info key value =
1265 info.annots_tag <- Token_annot.put_annot key value info.annots_tag
1266
1267 (* to check if an annotation has such a token *)
1268 let get_annot_info info key =
1269 Token_annot.get_annot info.annots_tag key
1270