Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 (*****************************************************************************)
17 (* The AST C related types *)
18 (*****************************************************************************)
19 (*
20 * Some stuff are tagged semantic: which means that they are computed
21 * after parsing.
22 *
23 * This means that some elements in this AST are present only if
24 * some annotation/transformation has been done on the original AST returned
25 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
26 *)
27
28
29 (* ------------------------------------------------------------------------- *)
30 (* Token/info *)
31 (* ------------------------------------------------------------------------- *)
32
33 (* To allow some transformations over the AST, we keep as much information
34 * as possible in the AST such as the tokens content and their locations.
35 * Those info are called 'info' (how original) and can be tagged.
36 * For instance one tag may say that the unparser should remove this token.
37 *
38 * Update: Now I use a ref! in those 'info' so take care.
39 * That means that modifications of the info of tokens can have
40 * an effect on the info stored in the ast (which is sometimes
41 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
42 *
43 * convention: I often use 'ii' for the name of a list of info.
44 *
45 * Sometimes we want to add someting at the beginning or at the end
46 * of a construct. For 'function' and 'decl' we want to add something
47 * to their left and for 'if' 'while' et 'for' and so on at their right.
48 * We want some kinds of "virtual placeholders" that represent the start or
49 * end of a construct. We use fakeInfo for that purpose.
50 * To identify those cases I have added a fakestart/fakeend comment.
51 *
52 * cocci: Each token will be decorated in the future by the mcodekind
53 * of cocci. It is the job of the pretty printer to look at this
54 * information and decide to print or not the token (and also the
55 * pending '+' associated sometimes with the token).
56 *
57 * The first time that we parse the original C file, the mcodekind is
58 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
59 * associated. This is what I call a "clean" expr/statement/....
60 *
61 * Each token will also be decorated in the future with an environment,
62 * because the pending '+' may contain metavariables that refer to some
63 * C code.
64 *
65 *)
66
67 (* forunparser: *)
68
69 type posl = int * int (* line-col, for MetaPosValList, for position variables *)
70 (* with sexp *)
71
72 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
73 type virtual_position = Common.parse_info * int (* character offset *)
74 (* with sexp *)
75
76 type parse_info =
77 (* Present both in ast and list of tokens *)
78 | OriginTok of Common.parse_info
79 (* Present only in ast and generated after parsing. Used mainly
80 * by Julia, to add stuff at virtual places, beginning of func or decl *)
81 | FakeTok of string * virtual_position
82 (* Present both in ast and list of tokens. *)
83 | ExpandedTok of Common.parse_info * virtual_position
84
85 (* Present neither in ast nor in list of tokens
86 * but only in the '+' of the mcode of some tokens. Those kind of tokens
87 * are used to be able to use '=' to compare big ast portions.
88 *)
89 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
90 (* with sexp *)
91
92 type info = {
93 pinfo : parse_info;
94
95 (* this cocci_tag can be changed, which is how we can express some program
96 * transformations by tagging the tokens involved in this transformation.
97 *)
98 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) option ref;
99 (* set in comment_annotater_c.ml *)
100 comments_tag: comments_around ref;
101
102 (* todo? token_info : sometimes useful to know what token it was *)
103 }
104 and il = info list
105
106 (* wrap2 is like wrap, except that I use it often for separator such
107 * as ','. In that case the info is associated to the argument that
108 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])].
109 *
110 * wrap3 is like wrap, except that I use it in case sometimes it
111 * will be empty because the info will be included in a nested
112 * entity (e.g. for Ident in expr because it's inlined in the name)
113 * so user should never assume List.length wrap3 > 0.
114 *)
115 and 'a wrap = 'a * il
116 and 'a wrap2 = 'a * il
117 and 'a wrap3 = 'a * il (* * evotype*)
118
119 (* ------------------------------------------------------------------------- *)
120 (* Name *)
121 (* ------------------------------------------------------------------------- *)
122
123 (* was called 'ident' before, but 'name' is I think better
124 * as concatenated strings can be used not only for identifiers and for
125 * declarators, but also for fields, for labels, etc.
126 *
127 * Note: because now the info is embeded in the name, the info for
128 * expression like Ident, or types like Typename, are not anymore
129 * stored in the expression or type. Hence if you assume this,
130 * which was true before, you are now wrong. So never write code like
131 * let (unwrape,_), ii = e and use 'ii' believing it contains
132 * the local ii to e. If you want to do that, use the appropiate
133 * wrapper get_local_ii_of_expr_inlining_ii_of_name.
134 *)
135 and name =
136 | RegularName of string wrap
137 | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list
138 (* normally only used inside list of things, as in parameters or arguments
139 * in which case, cf cpp-manual, it has a special meaning *)
140 | CppVariadicName of string wrap (* ## s *)
141 | CppIdentBuilder of string wrap (* s ( ) *) *
142 ((string wrap) wrap2 list) (* arguments *)
143
144
145 (* ------------------------------------------------------------------------- *)
146 (* C Type *)
147 (* ------------------------------------------------------------------------- *)
148 (* Could have more precise type in fullType, in expression, etc, but
149 * it requires to do too much things in parsing such as checking no
150 * conflicting structname, computing value, etc. Better to separate
151 * concern. So I put '=>' to mean what we would really like. In fact
152 * what we really like is defining another fullType, expression, etc
153 * from scratch, because many stuff are just sugar.
154 *
155 * invariant: Array and FunctionType have also typeQualifier but they
156 * dont have sense. I put this to factorise some code. If you look in
157 * the grammar, you see that we can never specify const for the array
158 * himself (but we can do it for pointer) or function, we always
159 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
160 *
161 *
162 * Because of ExprStatement, we can have more 'new scope' events, but
163 * rare I think. For instance with 'array of constExpression' there can
164 * have an exprStatement and a new (local) struct defined. Same for
165 * Constructor.
166 *
167 *)
168
169
170 and fullType = typeQualifier * typeC
171 and typeC = typeCbis wrap (* todo reput wrap3 *)
172
173 and typeCbis =
174 | BaseType of baseType
175
176 | Pointer of fullType
177 | Array of constExpression option * fullType
178 | FunctionType of functionType
179
180 | Enum of string option * enumType
181 | StructUnion of structUnion * string option * structType (* new scope *)
182
183 | EnumName of string
184 | StructUnionName of structUnion * string
185
186 | TypeName of name * fullType option (* semantic: filled later *)
187
188 | ParenType of fullType (* forunparser: *)
189
190 (* gccext: TypeOfType below may seems useless; Why declare a
191 * __typeof__(int) x; ?
192 * When used with macros, it allows to fix a problem of C which
193 * is that type declaration can be spread around the ident. Indeed it
194 * may be difficult to have a macro such as
195 * '#define macro(type, ident) type ident;'
196 * because when you want to do a
197 * macro(char[256], x),
198 * then it will generate invalid code, but with a
199 * '#define macro(type, ident) __typeof(type) ident;'
200 * it will work.
201 *)
202 | TypeOfExpr of expression
203 | TypeOfType of fullType
204
205 (* cppext: IfdefType TODO *)
206
207 (* -------------------------------------- *)
208 and baseType = Void
209 | IntType of intType
210 | FloatType of floatType
211
212 (* stdC: type section
213 * add a | SizeT ?
214 * note: char and signed char are semantically different!!
215 *)
216 and intType = CChar (* obsolete? | CWchar *)
217 | Si of signed
218
219 and signed = sign * base
220 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
221 and sign = Signed | UnSigned
222
223 and floatType = CFloat | CDouble | CLongDouble
224
225
226 (* -------------------------------------- *)
227 and structUnion = Struct | Union
228 and structType = field list
229 and field =
230 | DeclarationField of field_declaration
231 (* gccext: *)
232 | EmptyField of info
233
234 (* cppext: *)
235 | MacroDeclField of (string * argument wrap2 list)
236 wrap (* optional ';'*)
237
238 (* cppext: *)
239 | CppDirectiveStruct of cpp_directive
240 | IfdefStruct of ifdef_directive (* * field list list *)
241
242
243 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
244 and field_declaration =
245 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
246
247 (* At first I thought that a bitfield could be only Signed/Unsigned.
248 * But it seems that gcc allow char i:4. C rule must say that you
249 * can cast into int so enum too, ...
250 *)
251 and fieldkind =
252 | Simple of name option * fullType
253 | BitField of name option * fullType *
254 info (* : *) * constExpression
255 (* fullType => BitFieldInt | BitFieldUnsigned *)
256
257
258 (* -------------------------------------- *)
259 and enumType = (name * (info (* = *) * constExpression) option)
260 wrap2 (* , *) list
261 (* => string * int list *)
262
263
264 (* -------------------------------------- *)
265 (* return * (params * has "...") *)
266 and functionType = fullType * (parameterType wrap2 list * bool wrap)
267 and parameterType =
268 { p_namei: name option;
269 p_register: bool wrap;
270 p_type: fullType;
271 }
272 (* => (bool (register) * fullType) list * bool *)
273
274
275 and typeQualifier = typeQualifierbis wrap
276 and typeQualifierbis = {const: bool; volatile: bool}
277
278 (* gccext: cppext: *)
279 and attribute = attributebis wrap
280 and attributebis =
281 | Attribute of string
282
283 (* ------------------------------------------------------------------------- *)
284 (* C expression *)
285 (* ------------------------------------------------------------------------- *)
286 and expression = (expressionbis * exp_info ref (* semantic: *)) wrap3
287 and exp_info = exp_type option * test
288 and exp_type = fullType (* Type_c.completed_and_simplified *) * local
289 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
290 and test = Test | NotTest (* cocci: *)
291
292 and expressionbis =
293
294 (* Ident can be a enumeration constant, a simple variable, a name of a func.
295 * With cppext, Ident can also be the name of a macro. Sparse says
296 * "an identifier with a meaning is a symbol" *)
297 | Ident of name (* todo? more semantic info such as LocalFunc *)
298
299 | Constant of constant
300 | FunCall of expression * argument wrap2 (* , *) list
301 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
302 | CondExpr of expression * expression option * expression
303
304 (* should be considered as statements, bad C langage *)
305 | Sequence of expression * expression
306 | Assignment of expression * assignOp * expression
307
308
309 | Postfix of expression * fixOp
310 | Infix of expression * fixOp
311
312 | Unary of expression * unaryOp
313 | Binary of expression * binaryOp * expression
314
315 | ArrayAccess of expression * expression
316
317 (* field ident access *)
318 | RecordAccess of expression * name
319 | RecordPtAccess of expression * name
320 (* redundant normally, could replace it by DeRef RecordAcces *)
321
322 | SizeOfExpr of expression
323 | SizeOfType of fullType
324 | Cast of fullType * expression
325
326 (* gccext: *)
327 | StatementExpr of compound wrap (* ( ) new scope *)
328 | Constructor of fullType * initialiser wrap2 (* , *) list
329
330 (* forunparser: *)
331 | ParenExpr of expression
332
333 (* cppext: IfdefExpr TODO *)
334
335 (* cppext: normmally just expression *)
336 and argument = (expression, weird_argument) Common.either
337 and weird_argument =
338 | ArgType of parameterType
339 | ArgAction of action_macro
340 and action_macro =
341 (* todo: ArgStatement of statement, possibly have ghost token *)
342 | ActMisc of il
343
344
345 (* I put string for Int and Float because int would not be enough because
346 * OCaml int are 31 bits. So simpler to do string. Same reason to have
347 * string instead of int list for the String case.
348 *
349 * note: -2 is not a constant, it is the unary operator '-'
350 * applied to constant 2. So the string must represent a positive
351 * integer only. *)
352
353 and constant =
354 | String of (string * isWchar)
355 | MultiString of string list (* can contain MacroString, todo: more info *)
356 | Char of (string * isWchar) (* normally it is equivalent to Int *)
357 | Int of (string * intType)
358 | Float of (string * floatType)
359
360 and isWchar = IsWchar | IsChar
361
362
363 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
364 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
365 and assignOp = SimpleAssign | OpAssign of arithOp
366 and fixOp = Dec | Inc
367
368 and binaryOp = Arith of arithOp | Logical of logicalOp
369
370 and arithOp =
371 | Plus | Minus | Mul | Div | Mod
372 | DecLeft | DecRight
373 | And | Or | Xor
374
375 and logicalOp =
376 | Inf | Sup | InfEq | SupEq
377 | Eq | NotEq
378 | AndLog | OrLog
379
380 and constExpression = expression (* => int *)
381
382
383 (* ------------------------------------------------------------------------- *)
384 (* C statement *)
385 (* ------------------------------------------------------------------------- *)
386 (* note: that assignement is not a statement but an expression;
387 * wonderful C langage.
388 *
389 * note: I use 'and' for type definition cos gccext allow statement as
390 * expression, so need mutual recursive type definition.
391 *
392 *)
393
394 and statement = statementbis wrap3
395 and statementbis =
396 | Labeled of labeled
397 | Compound of compound (* new scope *)
398 | ExprStatement of exprStatement
399 | Selection of selection (* have fakeend *)
400 | Iteration of iteration (* have fakeend *)
401 | Jump of jump
402
403 (* simplify cocci: only at the beginning of a compound normally *)
404 | Decl of declaration
405
406 (* gccext: *)
407 | Asm of asmbody
408 | NestedFunc of definition
409
410 (* cppext: *)
411 | MacroStmt
412
413
414
415 and labeled = Label of name * statement
416 | Case of expression * statement
417 | CaseRange of expression * expression * statement (* gccext: *)
418 | Default of statement
419
420 (* cppext:
421 * old: compound = (declaration list * statement list)
422 * old: (declaration, statement) either list
423 * Simplify cocci to just have statement list, by integrating Decl in stmt.
424 *
425 * update: now introduce also the _sequencable to allow ifdef in the middle.
426 * Indeed, I now allow ifdefs in the ast but they must be only between
427 * "sequencable" elements. They can be put in a type only if this type
428 * is used in a list, like at the toplevel, used in a 'toplevel list',
429 * or inside a compound, used in a 'statement list'. I must not allow
430 * ifdef anywhere. For instance I can not make ifdef a statement
431 * cos some instruction like If accept only one statement and the
432 * ifdef directive must not take the place of a legitimate instruction.
433 * We had a similar phenomena in SmPL where we have the notion
434 * of statement and sequencable statement too. Once you have
435 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
436 * and introduce the ifdef.
437 *
438 * update: those ifdefs are either passed, or present in the AST but in
439 * a flat form. To structure those flat ifdefs you have to run
440 * a transformation that will put in a tree the statements inside
441 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
442 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
443 *
444 *)
445 and compound = statement_sequencable list
446
447 (* cppext: easier to put at statement_list level than statement level *)
448 and statement_sequencable =
449 | StmtElem of statement
450
451 (* cppext: *)
452 | CppDirectiveStmt of cpp_directive
453 | IfdefStmt of ifdef_directive
454
455 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
456 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
457
458 and exprStatement = expression option
459
460 (* for Switch, need check that all elements in the compound start
461 * with a case:, otherwise unreachable code.
462 *)
463 and selection =
464 | If of expression * statement * statement
465 | Switch of expression * statement
466
467
468 and iteration =
469 | While of expression * statement
470 | DoWhile of statement * expression
471 | For of exprStatement wrap * exprStatement wrap * exprStatement wrap *
472 statement
473 (* cppext: *)
474 | MacroIteration of string * argument wrap2 list * statement
475
476 and jump = Goto of name
477 | Continue | Break
478 | Return | ReturnExpr of expression
479 | GotoComputed of expression (* gccext: goto *exp ';' *)
480
481
482 (* gccext: *)
483 and asmbody = il (* string list *) * colon wrap (* : *) list
484 and colon = Colon of colon_option wrap2 list
485 and colon_option = colon_option_bis wrap
486 and colon_option_bis = ColonMisc | ColonExpr of expression
487
488
489 (* ------------------------------------------------------------------------- *)
490 (* Declaration *)
491 (* ------------------------------------------------------------------------- *)
492 (* (string * ...) option cos can have empty declaration or struct tag
493 * declaration.
494 *
495 * Before I had a Typedef constructor, but why make this special case and not
496 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
497 * declarations ? So I try to generalise and not have Typedef either. This
498 * requires more work in parsing. Better to separate concern.
499 *
500 * Before the need for unparser, I didn't have a DeclList but just a Decl.
501 *
502 * I am not sure what it means to declare a prototype inline, but gcc
503 * accepts it.
504 *)
505
506 and declaration =
507 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
508 (* cppext: *)
509 | MacroDecl of (string * argument wrap2 list) wrap (* fakestart *)
510
511 and onedecl =
512 { v_namei: (name * (info (* = *) * initialiser) option) option;
513 v_type: fullType;
514 v_storage: storage;
515 v_local: local_decl; (* cocci: *)
516 v_attr: attribute list; (* gccext: *)
517 }
518 and storage = storagebis * bool (* gccext: inline or not *)
519 and storagebis = NoSto | StoTypedef | Sto of storageClass
520 and storageClass = Auto | Static | Register | Extern
521
522 and local_decl = LocalDecl | NotLocalDecl
523
524 and initialiser = initialiserbis wrap
525 and initialiserbis =
526 | InitExpr of expression
527 | InitList of initialiser wrap2 (* , *) list
528 (* gccext: *)
529 | InitDesignators of designator list * initialiser
530 | InitFieldOld of string * initialiser
531 | InitIndexOld of expression * initialiser
532
533 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
534 and designator = designatorbis wrap
535 and designatorbis =
536 | DesignatorField of string
537 | DesignatorIndex of expression
538 | DesignatorRange of expression * expression
539
540 (* ------------------------------------------------------------------------- *)
541 (* Function definition *)
542 (* ------------------------------------------------------------------------- *)
543 (* Normally we should define another type functionType2 because there
544 * are more restrictions on what can define a function than a pointer
545 * function. For instance a function declaration can omit the name of the
546 * parameter whereas a function definition can not. But, in some cases such
547 * as 'f(void) {', there is no name too, so I simplified and reused the
548 * same functionType type for both declaration and function definition.
549 *
550 * Also old style C does not have type in the parameter, so again simpler
551 * to abuse the functionType and allow missing type.
552 *)
553 and definition = definitionbis wrap (* ( ) { } fakestart sto *)
554 and definitionbis =
555 { f_name: name;
556 f_type: functionType; (* less? a functionType2 ? *)
557 f_storage: storage;
558 f_body: compound;
559 f_attr: attribute list; (* gccext: *)
560 f_old_c_style: declaration list option;
561 }
562 (* cppext: IfdefFunHeader TODO *)
563
564 (* ------------------------------------------------------------------------- *)
565 (* cppext: cpp directives, #ifdef, #define and #include body *)
566 (* ------------------------------------------------------------------------- *)
567 and cpp_directive =
568 | Define of define
569 | Include of includ
570 | Undef of string wrap
571 | PragmaAndCo of il
572 (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
573
574 and define = string wrap (* #define s eol *) * (define_kind * define_val)
575 and define_kind =
576 | DefineVar
577 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
578 and define_val =
579 (* most common case; e.g. to define int constant *)
580 | DefineExpr of expression
581
582 | DefineStmt of statement
583 | DefineType of fullType
584 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
585
586 | DefineFunction of definition
587 | DefineInit of initialiser (* in practice only { } with possible ',' *)
588
589 (* TODO DefineMulti of define_val list *)
590
591 | DefineText of string wrap
592 | DefineEmpty
593
594 | DefineTodo
595
596
597
598 and includ =
599 { i_include: inc_file wrap; (* #include s *)
600 (* cocci: computed in ? *)
601 i_rel_pos: include_rel_pos option ref;
602 (* cocci: cf -test incl *)
603 i_is_in_ifdef: bool;
604 (* cf cpp_ast_c.ml. set to None at parsing time. *)
605 i_content: (Common.filename (* full path *) * program) option;
606 }
607 and inc_file =
608 | Local of inc_elem list
609 | NonLocal of inc_elem list
610 | Weird of string (* ex: #include SYSTEM_H *)
611 and inc_elem = string
612
613 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
614 *
615 * The first_of and last_of store the list of prefixes that was
616 * introduced by the include. On #include <a/b/x>, if the include was
617 * the first in the file, it would give in first_of the following
618 * prefixes a/b/c; a/b/; a/ ; <empty>
619 *
620 * This is set after parsing, in cocci.ml, in update_rel_pos.
621 *)
622 and include_rel_pos = {
623 first_of : string list list;
624 last_of : string list list;
625 }
626
627
628
629 (* todo? to specialize if someone need more info *)
630 and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
631 | IfdefDirective of (ifdefkind * matching_tag) wrap
632 and ifdefkind =
633 | Ifdef (* todo? of string ? of formula_cpp ? *)
634 | IfdefElseif (* same *)
635 | IfdefElse (* same *)
636 | IfdefEndif
637 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
638 * a global so it means if you parse the same file twice you may get
639 * different id. I try now to avoid this pb by resetting it each
640 * time I parse a file.
641 *)
642 and matching_tag =
643 IfdefTag of (int (* tag *) * int (* total with this tag *))
644
645
646
647
648
649 (* ------------------------------------------------------------------------- *)
650 (* The toplevels elements *)
651 (* ------------------------------------------------------------------------- *)
652 and toplevel =
653 | Declaration of declaration
654 | Definition of definition
655
656 (* cppext: *)
657 | CppTop of cpp_directive
658 | IfdefTop of ifdef_directive (* * toplevel list *)
659
660 (* cppext: *)
661 | MacroTop of string * argument wrap2 list * il
662
663 | EmptyDef of il (* gccext: allow redundant ';' *)
664 | NotParsedCorrectly of il
665
666 | FinalDef of info (* EOF *)
667
668 (* ------------------------------------------------------------------------- *)
669 and program = toplevel list
670
671 (*****************************************************************************)
672 (* Cocci Bindings *)
673 (*****************************************************************************)
674 (* Was previously in pattern.ml, but because of the transformer,
675 * we need to decorate each token with some cocci code AND the environment
676 * for this cocci code.
677 *)
678 and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
679 and metavar_binding_kind =
680 | MetaIdVal of string
681 | MetaFuncVal of string
682 | MetaLocalFuncVal of string
683
684 | MetaExprVal of expression (* a "clean expr" *)
685 | MetaExprListVal of argument wrap2 list
686 | MetaParamVal of parameterType
687 | MetaParamListVal of parameterType wrap2 list
688
689 | MetaTypeVal of fullType
690 | MetaInitVal of initialiser
691 | MetaStmtVal of statement
692
693 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
694 * because don't need to have the value for a position in the env of
695 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
696 * variables accessible via SmPL whereas the position can be one day
697 * so I think it's better to put MetaPosVal here *)
698 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
699 | MetaPosValList of
700 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
701 | MetaListlenVal of int
702
703
704 (*****************************************************************************)
705 (* C comments *)
706 (*****************************************************************************)
707
708 (* convention: I often use "m" for comments as I can not use "c"
709 * (already use for c stuff) and "com" is too long.
710 *)
711
712 (* this type will be associated to each token.
713 *)
714 and comments_around = {
715 mbefore: Token_c.comment_like_token list;
716 mafter: Token_c.comment_like_token list;
717
718 (* less: could remove ? do something simpler than CComment for
719 * coccinelle, cf above. *)
720 mbefore2: comment_and_relative_pos list;
721 mafter2: comment_and_relative_pos list;
722 }
723 and comment_and_relative_pos = {
724
725 minfo: Common.parse_info;
726 (* the int represent the number of lines of difference between the
727 * current token and the comment. When on same line, this number is 0.
728 * When previous line, -1. In some way the after/before in previous
729 * record is useless because the sign of the integer can helps
730 * do the difference too, but I keep it that way.
731 *)
732 mpos: int;
733 (* todo?
734 * cppbetween: bool; touse? if false positive
735 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
736 *)
737 }
738
739 and comment = Common.parse_info
740 and com = comment list ref
741
742 (* with sexp *)
743
744
745 (*****************************************************************************)
746 (* Some constructors *)
747 (*****************************************************************************)
748 let nullQualif = ({const=false; volatile= false}, [])
749 let nQ = nullQualif
750
751 let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
752
753 let noType () = ref (None,NotTest)
754 let noInstr = (ExprStatement (None), [])
755 let noTypedefDef () = None
756
757 let emptyMetavarsBinding =
758 ([]: metavars_binding)
759
760 let emptyAnnotCocci =
761 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
762 emptyMetavarsBinding)
763
764 let emptyAnnot =
765 (None: (Ast_cocci.mcodekind * metavars_binding) option)
766
767 (* compatibility mode *)
768 let mcode_and_env_of_cocciref aref =
769 match !aref with
770 | Some x -> x
771 | None -> emptyAnnotCocci
772
773
774 let emptyComments= {
775 mbefore = [];
776 mafter = [];
777 mbefore2 = [];
778 mafter2 = [];
779 }
780
781
782 (* for include, some meta information needed by cocci *)
783 let noRelPos () =
784 ref (None: include_rel_pos option)
785 let noInIfdef () =
786 ref false
787
788
789 (* When want add some info in ast that does not correspond to
790 * an existing C element.
791 * old: or when don't want 'synchronize' on it in unparse_c.ml
792 * (now have other mark for tha matter).
793 *)
794 let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
795
796 let fakeInfo pi =
797 { pinfo = FakeTok ("",no_virt_pos);
798 cocci_tag = ref emptyAnnot;
799 comments_tag = ref emptyComments;
800 }
801
802 let noii = []
803 let noattr = []
804 let noi_content = (None: ((Common.filename * program) option))
805
806 (*****************************************************************************)
807 (* Wrappers *)
808 (*****************************************************************************)
809 let unwrap = fst
810
811 let unwrap2 = fst
812
813 let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
814 let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
815
816 let unwrap_typeC (qu, (typeC, ii)) = typeC
817 let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
818
819 let unwrap_typeCbis (typeC, ii) = typeC
820
821 let unwrap_st (unwrap_st, ii) = unwrap_st
822
823 (* ------------------------------------------------------------------------- *)
824 let mk_e unwrap_e ii = (unwrap_e, noType()), ii
825 let mk_e_bis unwrap_e ty ii = (unwrap_e, ty), ii
826
827 let mk_ty typeC ii = nQ, (typeC, ii)
828 let mk_tybis typeC ii = (typeC, ii)
829
830 let mk_st unwrap_st ii = (unwrap_st, ii)
831
832 (* ------------------------------------------------------------------------- *)
833 let get_ii_typeC_take_care (typeC, ii) = ii
834 let get_ii_st_take_care (st, ii) = ii
835 let get_ii_expr_take_care (e, ii) = ii
836
837 let get_st_and_ii (st, ii) = st, ii
838 let get_ty_and_ii (qu, (typeC, ii)) = qu, (typeC, ii)
839 let get_e_and_ii (e, ii) = e, ii
840
841
842 (* ------------------------------------------------------------------------- *)
843 let get_type_expr ((unwrap_e, typ), iie) = !typ
844 let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
845 oldtyp := newtyp
846 (* old: (unwrap_e, newtyp), iie *)
847
848 let get_onlytype_expr ((unwrap_e, typ), iie) =
849 match !typ with
850 | Some (ft,_local), _test -> Some ft
851 | None, _ -> None
852
853 let get_onlylocal_expr ((unwrap_e, typ), iie) =
854 match !typ with
855 | Some (ft,local), _test -> Some local
856 | None, _ -> None
857
858 (* ------------------------------------------------------------------------- *)
859 let rewrap_str s ii =
860 {ii with pinfo =
861 (match ii.pinfo with
862 OriginTok pi -> OriginTok { pi with Common.str = s;}
863 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
864 | FakeTok (_,vpi) -> FakeTok (s,vpi)
865 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
866
867 let rewrap_pinfo pi ii =
868 {ii with pinfo = pi}
869
870
871
872 (* info about the current location *)
873 let get_pi = function
874 OriginTok pi -> pi
875 | ExpandedTok (_,(pi,_)) -> pi
876 | FakeTok (_,(pi,_)) -> pi
877 | AbstractLineTok pi -> pi
878
879 (* original info *)
880 let get_opi = function
881 OriginTok pi -> pi
882 | ExpandedTok (pi,_) -> pi (* diff with get_pi *)
883 | FakeTok (_,_) -> failwith "no position information"
884 | AbstractLineTok pi -> pi
885
886 let str_of_info ii =
887 match ii.pinfo with
888 OriginTok pi -> pi.Common.str
889 | ExpandedTok (pi,_) -> pi.Common.str
890 | FakeTok (s,_) -> s
891 | AbstractLineTok pi -> pi.Common.str
892
893 let get_info f ii =
894 match ii.pinfo with
895 OriginTok pi -> f pi
896 | ExpandedTok (_,(pi,_)) -> f pi
897 | FakeTok (_,(pi,_)) -> f pi
898 | AbstractLineTok pi -> f pi
899
900 let get_orig_info f ii =
901 match ii.pinfo with
902 OriginTok pi -> f pi
903 | ExpandedTok (pi,_) -> f pi (* diff with get_info *)
904 | FakeTok (_,(pi,_)) -> f pi
905 | AbstractLineTok pi -> f pi
906
907 let make_expanded ii =
908 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
909
910 let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
911 let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
912 let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
913 let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
914 let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
915 let mcode_of_info ii = fst (mcode_and_env_of_cocciref ii.cocci_tag)
916 let pinfo_of_info ii = ii.pinfo
917 let parse_info_of_info ii = get_pi ii.pinfo
918
919 let is_fake ii =
920 match ii.pinfo with
921 FakeTok (_,_) -> true
922 | _ -> false
923
924 let is_origintok ii =
925 match ii.pinfo with
926 | OriginTok pi -> true
927 | _ -> false
928
929 (* ------------------------------------------------------------------------- *)
930 type posrv = Real of Common.parse_info | Virt of virtual_position
931
932 let compare_pos ii1 ii2 =
933 let get_pos = function
934 OriginTok pi -> Real pi
935 | FakeTok (s,vpi) -> Virt vpi
936 | ExpandedTok (pi,vpi) -> Virt vpi
937 | AbstractLineTok pi -> Real pi in (* used for printing *)
938 let pos1 = get_pos (pinfo_of_info ii1) in
939 let pos2 = get_pos (pinfo_of_info ii2) in
940 match (pos1,pos2) with
941 (Real p1, Real p2) ->
942 compare p1.Common.charpos p2.Common.charpos
943 | (Virt (p1,_), Real p2) ->
944 if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1
945 | (Real p1, Virt (p2,_)) ->
946 if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1)
947 | (Virt (p1,o1), Virt (p2,o2)) ->
948 let poi1 = p1.Common.charpos in
949 let poi2 = p2.Common.charpos in
950 match compare poi1 poi2 with
951 -1 -> -1
952 | 0 -> compare o1 o2
953 | x -> x
954
955 let equal_posl (l1,c1) (l2,c2) =
956 (l1 =|= l2) && (c1 =|= c2)
957
958 let info_to_fixpos ii =
959 match pinfo_of_info ii with
960 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
961 | ExpandedTok (_,(pi,offset)) ->
962 Ast_cocci.Virt (pi.Common.charpos,offset)
963 | FakeTok (_,(pi,offset)) ->
964 Ast_cocci.Virt (pi.Common.charpos,offset)
965 | AbstractLineTok pi -> failwith "unexpected abstract"
966
967 (* cocci: *)
968 let is_test (e : expression) =
969 let (_,info), _ = e in
970 let (_,test) = !info in
971 test =*= Test
972
973 (*****************************************************************************)
974 (* Abstract line *)
975 (*****************************************************************************)
976
977 (* When we have extended the C Ast to add some info to the tokens,
978 * such as its line number in the file, we can not use anymore the
979 * ocaml '=' to compare Ast elements. To overcome this problem, to be
980 * able to use again '=', we just have to get rid of all those extra
981 * information, to "abstract those line" (al) information.
982 *
983 * Julia then modifies it a little to have a tokenindex, so the original
984 * true al_info is in fact real_al_info.
985 *)
986
987 let al_info tokenindex x =
988 { pinfo =
989 (AbstractLineTok
990 {charpos = tokenindex;
991 line = tokenindex;
992 column = tokenindex;
993 file = "";
994 str = str_of_info x});
995 cocci_tag = ref emptyAnnot;
996 comments_tag = ref emptyComments;
997 }
998
999 let semi_al_info x =
1000 { x with
1001 cocci_tag = ref emptyAnnot;
1002 comments_tag = ref emptyComments;
1003 }
1004
1005 let magic_real_number = -10
1006
1007 let real_al_info x =
1008 { pinfo =
1009 (AbstractLineTok
1010 {charpos = magic_real_number;
1011 line = magic_real_number;
1012 column = magic_real_number;
1013 file = "";
1014 str = str_of_info x});
1015 cocci_tag = ref emptyAnnot;
1016 comments_tag = ref emptyComments;
1017 }
1018
1019 let al_comments x =
1020 let keep_cpp l =
1021 List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in
1022 let al_com (x,i) =
1023 (x,{i with Common.charpos = magic_real_number;
1024 Common.line = magic_real_number;
1025 Common.column = magic_real_number}) in
1026 {mbefore = []; (* duplicates mafter of the previous token *)
1027 mafter = List.map al_com (keep_cpp x.mafter);
1028 mbefore2=[];
1029 mafter2=[];
1030 }
1031
1032 let al_info_cpp tokenindex x =
1033 { pinfo =
1034 (AbstractLineTok
1035 {charpos = tokenindex;
1036 line = tokenindex;
1037 column = tokenindex;
1038 file = "";
1039 str = str_of_info x});
1040 cocci_tag = ref emptyAnnot;
1041 comments_tag = ref (al_comments !(x.comments_tag));
1042 }
1043
1044 let semi_al_info_cpp x =
1045 { x with
1046 cocci_tag = ref emptyAnnot;
1047 comments_tag = ref (al_comments !(x.comments_tag));
1048 }
1049
1050 let real_al_info_cpp x =
1051 { pinfo =
1052 (AbstractLineTok
1053 {charpos = magic_real_number;
1054 line = magic_real_number;
1055 column = magic_real_number;
1056 file = "";
1057 str = str_of_info x});
1058 cocci_tag = ref emptyAnnot;
1059 comments_tag = ref (al_comments !(x.comments_tag));
1060 }
1061
1062
1063 (*****************************************************************************)
1064 (* Views *)
1065 (*****************************************************************************)
1066
1067 (* Transform a list of arguments (or parameters) where the commas are
1068 * represented via the wrap2 and associated with an element, with
1069 * a list where the comma are on their own. f(1,2,2) was
1070 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1071 *
1072 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1073 * the ast_cocci of julia and ast_c.
1074 *)
1075 let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
1076 function
1077 | [] -> []
1078 | (e, ii)::xs ->
1079 if null ii
1080 then (Left e)::split_comma xs
1081 else Right ii::Left e::split_comma xs
1082
1083 let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
1084 function
1085 | [] -> []
1086 | Right ii::Left e::xs ->
1087 (e, ii)::unsplit_comma xs
1088 | Left e::xs ->
1089 let empty_ii = [] in
1090 (e, empty_ii)::unsplit_comma xs
1091 | Right ii::_ ->
1092 raise Impossible
1093
1094
1095
1096
1097 (*****************************************************************************)
1098 (* Helpers, could also be put in lib_parsing_c.ml instead *)
1099 (*****************************************************************************)
1100
1101 (* should maybe be in pretty_print_c ? *)
1102
1103 let s_of_inc_file inc_file =
1104 match inc_file with
1105 | Local xs -> xs +> Common.join "/"
1106 | NonLocal xs -> xs +> Common.join "/"
1107 | Weird s -> s
1108
1109 let s_of_inc_file_bis inc_file =
1110 match inc_file with
1111 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
1112 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
1113 | Weird s -> s
1114
1115 let fieldname_of_fieldkind fieldkind =
1116 match fieldkind with
1117 | Simple (sopt, ft) -> sopt
1118 | BitField (sopt, ft, info, expr) -> sopt
1119
1120
1121 let s_of_attr attr =
1122 attr
1123 +> List.map (fun (Attribute s, ii) -> s)
1124 +> Common.join ","
1125
1126
1127 (* ------------------------------------------------------------------------- *)
1128 let str_of_name ident =
1129 match ident with
1130 | RegularName (s,ii) -> s
1131 | CppConcatenatedName xs ->
1132 xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##"
1133 | CppVariadicName (s, ii) -> "##" ^ s
1134 | CppIdentBuilder ((s,iis), xs) ->
1135 s ^ "(" ^
1136 (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^
1137 ")"
1138
1139 let get_s_and_ii_of_name name =
1140 match name with
1141 | RegularName (s, iis) -> s, iis
1142 | CppIdentBuilder ((s, iis), xs) -> s, iis
1143 | CppVariadicName (s,iis) ->
1144 let (iop, iis) = Common.tuple_of_list2 iis in
1145 s, [iis]
1146 | CppConcatenatedName xs ->
1147 (match xs with
1148 | [] -> raise Impossible
1149 | ((s,iis),noiiop)::xs ->
1150 s, iis
1151 )
1152
1153 let get_s_and_info_of_name name =
1154 let (s,ii) = get_s_and_ii_of_name name in
1155 s, List.hd ii
1156
1157 let info_of_name name =
1158 let (s,ii) = get_s_and_ii_of_name name in
1159 List.hd ii
1160
1161 let ii_of_name name =
1162 let (s,ii) = get_s_and_ii_of_name name in
1163 ii
1164
1165
1166
1167 let get_local_ii_of_expr_inlining_ii_of_name e =
1168 let (ebis,_),ii = e in
1169 match ebis, ii with
1170 | Ident name, noii ->
1171 assert(null noii);
1172 ii_of_name name
1173 | RecordAccess (e, name), ii ->
1174 ii @ ii_of_name name
1175 | RecordPtAccess (e, name), ii ->
1176 ii @ ii_of_name name
1177 | _, ii -> ii
1178
1179
1180 let get_local_ii_of_tybis_inlining_ii_of_name ty =
1181 match ty with
1182 | TypeName (name, _typ), [] -> ii_of_name name
1183 | _, ii -> ii
1184
1185 (* only Label and Goto have name *)
1186 let get_local_ii_of_st_inlining_ii_of_name st =
1187 match st with
1188 | Labeled (Label (name, st)), ii -> ii_of_name name @ ii
1189 | Jump (Goto name), ii ->
1190 let (i1, i3) = Common.tuple_of_list2 ii in
1191 [i1] @ ii_of_name name @ [i3]
1192 | _, ii -> ii
1193
1194
1195
1196 (* ------------------------------------------------------------------------- *)
1197 let name_of_parameter param =
1198 param.p_namei +> Common.map_option (str_of_name)
1199