d8824f6f571844a535548a643b9929146d832202
[bpt/coccinelle.git] / parsing_c / ast_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2002, 2006, 2007, 2008, 2009 Yoann Padioleau
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 (*****************************************************************************)
17 (* The AST C related types *)
18 (*****************************************************************************)
19 (*
20 * Some stuff are tagged semantic: which means that they are computed
21 * after parsing.
22 *
23 * This means that some elements in this AST are present only if
24 * some annotation/transformation has been done on the original AST returned
25 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
26 *)
27
28
29 (* ------------------------------------------------------------------------- *)
30 (* Token/info *)
31 (* ------------------------------------------------------------------------- *)
32
33 (* To allow some transformations over the AST, we keep as much information
34 * as possible in the AST such as the tokens content and their locations.
35 * Those info are called 'info' (how original) and can be tagged.
36 * For instance one tag may say that the unparser should remove this token.
37 *
38 * Update: Now I use a ref! in those 'info' so take care.
39 * That means that modifications of the info of tokens can have
40 * an effect on the info stored in the ast (which is sometimes
41 * convenient, cf unparse_c.ml or comment_annotater_c.ml)
42 *
43 * convention: I often use 'ii' for the name of a list of info.
44 *
45 * Sometimes we want to add someting at the beginning or at the end
46 * of a construct. For 'function' and 'decl' we want to add something
47 * to their left and for 'if' 'while' et 'for' and so on at their right.
48 * We want some kinds of "virtual placeholders" that represent the start or
49 * end of a construct. We use fakeInfo for that purpose.
50 * To identify those cases I have added a fakestart/fakeend comment.
51 *
52 * cocci: Each token will be decorated in the future by the mcodekind
53 * of cocci. It is the job of the pretty printer to look at this
54 * information and decide to print or not the token (and also the
55 * pending '+' associated sometimes with the token).
56 *
57 * The first time that we parse the original C file, the mcodekind is
58 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
59 * associated. This is what I call a "clean" expr/statement/....
60 *
61 * Each token will also be decorated in the future with an environment,
62 * because the pending '+' may contain metavariables that refer to some
63 * C code.
64 *
65 *)
66
67 (* forunparser: *)
68
69 type posl = int * int (* line-col, for MetaPosValList, for position variables *)
70
71 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
72 type virtual_position = Common.parse_info * int (* character offset *)
73
74 type parse_info =
75 (* Present both in ast and list of tokens *)
76 | OriginTok of Common.parse_info
77 (* Present only in ast and generated after parsing. Used mainly
78 * by Julia, to add stuff at virtual places, beginning of func or decl *)
79 | FakeTok of string * virtual_position
80 (* Present both in ast and list of tokens. *)
81 | ExpandedTok of Common.parse_info * virtual_position
82
83 (* Present neither in ast nor in list of tokens
84 * but only in the '+' of the mcode of some tokens. Those kind of tokens
85 * are used to be able to use '=' to compare big ast portions.
86 *)
87 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
88
89 type info = {
90 pinfo : parse_info;
91
92 (* this cocci_tag can be changed, which is how we can express some program
93 * transformations by tagging the tokens involved in this transformation.
94 *)
95 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) ref;
96 (* set in comment_annotater_c.ml *)
97 comments_tag: comments_around ref;
98
99 (* todo? token_info : sometimes useful to know what token it was *)
100 }
101 and il = info list
102
103 (* wrap2 is like wrap, except that I use it often for separator such
104 * as ','. In that case the info is associated to the argument that
105 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
106 and 'a wrap = 'a * il
107 and 'a wrap2 = 'a * il
108
109 (* ------------------------------------------------------------------------- *)
110 (* Name *)
111 (* ------------------------------------------------------------------------- *)
112
113 (* was called 'ident' before, but 'name' is I think better
114 * as concatenated strings can be used not only for identifiers and for
115 * declarators, but also for fields, for labels, etc.
116 *)
117 and name =
118 | RegularName of string wrap
119 | CppConcatenatedName of (string wrap) wrap2 (* the ## separators *) list
120 (* normally only used inside list of things, as in parameters or arguments
121 * in which case, cf cpp-manual, it has a special meaning *)
122 | CppVariadicName of string wrap (* ## s *)
123 | CppIdentBuilder of string wrap (* s ( ) *) *
124 ((string wrap) wrap2 list) (* arguments *)
125
126
127 (* ------------------------------------------------------------------------- *)
128 (* C Type *)
129 (* ------------------------------------------------------------------------- *)
130 (* Could have more precise type in fullType, in expression, etc, but
131 * it requires to do too much things in parsing such as checking no
132 * conflicting structname, computing value, etc. Better to separate
133 * concern. So I put '=>' to mean what we would really like. In fact
134 * what we really like is defining another fullType, expression, etc
135 * from scratch, because many stuff are just sugar.
136 *
137 * invariant: Array and FunctionType have also typeQualifier but they
138 * dont have sense. I put this to factorise some code. If you look in
139 * the grammar, you see that we can never specify const for the array
140 * himself (but we can do it for pointer) or function, we always
141 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
142 *
143 *
144 * Because of ExprStatement, we can have more 'new scope' events, but
145 * rare I think. For instance with 'array of constExpression' there can
146 * have an exprStatement and a new (local) struct defined. Same for
147 * Constructor.
148 *
149 *)
150
151
152 and fullType = typeQualifier * typeC
153 and typeC = typeCbis wrap
154
155 and typeCbis =
156 | BaseType of baseType
157
158 | Pointer of fullType
159 | Array of constExpression option * fullType
160 | FunctionType of functionType
161
162 | Enum of string option * enumType
163 | StructUnion of structUnion * string option * structType (* new scope *)
164
165 | EnumName of string
166 | StructUnionName of structUnion * string
167
168 | TypeName of name * fullType option (* semantic: filled later *)
169
170 | ParenType of fullType (* forunparser: *)
171
172 (* gccext: TypeOfType below may seems useless; Why declare a
173 * __typeof__(int) x; ?
174 * When used with macros, it allows to fix a problem of C which
175 * is that type declaration can be spread around the ident. Indeed it
176 * may be difficult to have a macro such as
177 * '#define macro(type, ident) type ident;'
178 * because when you want to do a
179 * macro(char[256], x),
180 * then it will generate invalid code, but with a
181 * '#define macro(type, ident) __typeof(type) ident;'
182 * it will work.
183 *)
184 | TypeOfExpr of expression
185 | TypeOfType of fullType
186
187 (* cppext: IfdefType TODO *)
188
189 (* -------------------------------------- *)
190 and baseType = Void
191 | IntType of intType
192 | FloatType of floatType
193
194 (* stdC: type section
195 * add a | SizeT ?
196 * note: char and signed char are semantically different!!
197 *)
198 and intType = CChar (* obsolete? | CWchar *)
199 | Si of signed
200
201 and signed = sign * base
202 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
203 and sign = Signed | UnSigned
204
205 and floatType = CFloat | CDouble | CLongDouble
206
207
208 (* -------------------------------------- *)
209 and structUnion = Struct | Union
210 and structType = field list
211 and field = fieldbis wrap
212 and fieldbis =
213 | DeclarationField of field_declaration
214 (* gccext: *)
215 | EmptyField
216
217 (* cppext: *)
218 | MacroStructDeclTodo
219
220 (* cppext: *)
221 | CppDirectiveStruct of cpp_directive
222 | IfdefStruct of ifdef_directive (* * field list list *)
223
224
225 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
226 and field_declaration =
227 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
228
229 (* At first I thought that a bitfield could be only Signed/Unsigned.
230 * But it seems that gcc allow char i:4. C rule must say that you
231 * can cast into int so enum too, ...
232 *)
233 and fieldkind =
234 | Simple of name option * fullType
235 | BitField of name option * fullType *
236 info (* : *) * constExpression
237 (* fullType => BitFieldInt | BitFieldUnsigned *)
238
239
240 (* -------------------------------------- *)
241 and enumType = (name * (info (* = *) * constExpression) option)
242 wrap2 (* , *) list
243 (* => string * int list *)
244
245
246 (* -------------------------------------- *)
247 (* return * (params * has "...") *)
248 and functionType = fullType * (parameterType wrap2 list * bool wrap)
249 and parameterType =
250 { p_namei: name option;
251 p_register: bool wrap;
252 p_type: fullType;
253 }
254 (* => (bool (register) * fullType) list * bool *)
255
256
257 and typeQualifier = typeQualifierbis wrap
258 and typeQualifierbis = {const: bool; volatile: bool}
259
260 (* gccext: cppext: *)
261 and attribute = attributebis wrap
262 and attributebis =
263 | Attribute of string
264
265 (* ------------------------------------------------------------------------- *)
266 (* C expression *)
267 (* ------------------------------------------------------------------------- *)
268 and expression = (expressionbis * exp_info ref (* semantic: *)) wrap
269 and exp_info = exp_type option * test
270 and exp_type = fullType (* Type_c.completed_and_simplified *) * local
271 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
272 and test = Test | NotTest (* cocci: *)
273
274 and expressionbis =
275
276 (* Ident can be a enumeration constant, a simple variable, a name of a func.
277 * With cppext, Ident can also be the name of a macro. Sparse says
278 * "an identifier with a meaning is a symbol" *)
279 | Ident of name (* todo? more semantic info such as LocalFunc *)
280
281 | Constant of constant
282 | FunCall of expression * argument wrap2 (* , *) list
283 (* gccext: x ? /* empty */ : y <=> x ? x : y; hence the 'option' below *)
284 | CondExpr of expression * expression option * expression
285
286 (* should be considered as statements, bad C langage *)
287 | Sequence of expression * expression
288 | Assignment of expression * assignOp * expression
289
290
291 | Postfix of expression * fixOp
292 | Infix of expression * fixOp
293
294 | Unary of expression * unaryOp
295 | Binary of expression * binaryOp * expression
296
297 | ArrayAccess of expression * expression
298
299 (* field ident access *)
300 | RecordAccess of expression * name
301 | RecordPtAccess of expression * name
302 (* redundant normally, could replace it by DeRef RecordAcces *)
303
304 | SizeOfExpr of expression
305 | SizeOfType of fullType
306 | Cast of fullType * expression
307
308 (* gccext: *)
309 | StatementExpr of compound wrap (* ( ) new scope *)
310 | Constructor of fullType * initialiser wrap2 (* , *) list
311
312 (* forunparser: *)
313 | ParenExpr of expression
314
315 (* cppext: IfdefExpr TODO *)
316
317 (* cppext: normmally just expression *)
318 and argument = (expression, weird_argument) either
319 and weird_argument =
320 | ArgType of parameterType
321 | ArgAction of action_macro
322 and action_macro =
323 (* todo: ArgStatement of statement, possibly have ghost token *)
324 | ActMisc of il
325
326
327 (* I put string for Int and Float because int would not be enough because
328 * OCaml int are 31 bits. So simpler to do string. Same reason to have
329 * string instead of int list for the String case.
330 *
331 * note: -2 is not a constant, it is the unary operator '-'
332 * applied to constant 2. So the string must represent a positive
333 * integer only. *)
334
335 and constant =
336 | String of (string * isWchar)
337 | MultiString of string list (* can contain MacroString, todo: more info *)
338 | Char of (string * isWchar) (* normally it is equivalent to Int *)
339 | Int of (string (* * intType*))
340 | Float of (string * floatType)
341
342 and isWchar = IsWchar | IsChar
343
344
345 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
346 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
347 and assignOp = SimpleAssign | OpAssign of arithOp
348 and fixOp = Dec | Inc
349
350 and binaryOp = Arith of arithOp | Logical of logicalOp
351
352 and arithOp =
353 | Plus | Minus | Mul | Div | Mod
354 | DecLeft | DecRight
355 | And | Or | Xor
356
357 and logicalOp =
358 | Inf | Sup | InfEq | SupEq
359 | Eq | NotEq
360 | AndLog | OrLog
361
362 and constExpression = expression (* => int *)
363
364
365 (* ------------------------------------------------------------------------- *)
366 (* C statement *)
367 (* ------------------------------------------------------------------------- *)
368 (* note: that assignement is not a statement but an expression;
369 * wonderful C langage.
370 *
371 * note: I use 'and' for type definition cos gccext allow statement as
372 * expression, so need mutual recursive type definition.
373 *
374 *)
375
376 and statement = statementbis wrap
377 and statementbis =
378 | Labeled of labeled
379 | Compound of compound (* new scope *)
380 | ExprStatement of exprStatement
381 | Selection of selection (* have fakeend *)
382 | Iteration of iteration (* have fakeend *)
383 | Jump of jump
384
385 (* simplify cocci: only at the beginning of a compound normally *)
386 | Decl of declaration
387
388 (* gccext: *)
389 | Asm of asmbody
390 | NestedFunc of definition
391
392 (* cppext: *)
393 | MacroStmt
394
395
396
397 and labeled = Label of name * statement
398 | Case of expression * statement
399 | CaseRange of expression * expression * statement (* gccext: *)
400 | Default of statement
401
402 (* cppext:
403 * old: compound = (declaration list * statement list)
404 * old: (declaration, statement) either list
405 * Simplify cocci to just have statement list, by integrating Decl in stmt.
406 *
407 * update: now introduce also the _sequencable to allow ifdef in the middle.
408 * Indeed, I now allow ifdefs in the ast but they must be only between
409 * "sequencable" elements. They can be put in a type only if this type
410 * is used in a list, like at the toplevel, used in a 'toplevel list',
411 * or inside a compound, used in a 'statement list'. I must not allow
412 * ifdef anywhere. For instance I can not make ifdef a statement
413 * cos some instruction like If accept only one statement and the
414 * ifdef directive must not take the place of a legitimate instruction.
415 * We had a similar phenomena in SmPL where we have the notion
416 * of statement and sequencable statement too. Once you have
417 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
418 * and introduce the ifdef.
419 *
420 * update: those ifdefs are either passed, or present in the AST but in
421 * a flat form. To structure those flat ifdefs you have to run
422 * a transformation that will put in a tree the statements inside
423 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
424 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
425 *
426 *)
427 and compound = statement_sequencable list
428
429 (* cppext: easier to put at statement_list level than statement level *)
430 and statement_sequencable =
431 | StmtElem of statement
432
433 (* cppext: *)
434 | CppDirectiveStmt of cpp_directive
435 | IfdefStmt of ifdef_directive
436
437 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
438 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
439
440 and exprStatement = expression option
441
442 (* for Switch, need check that all elements in the compound start
443 * with a case:, otherwise unreachable code.
444 *)
445 and selection =
446 | If of expression * statement * statement
447 | Switch of expression * statement
448
449
450 and iteration =
451 | While of expression * statement
452 | DoWhile of statement * expression
453 | For of exprStatement wrap * exprStatement wrap * exprStatement wrap *
454 statement
455 (* cppext: *)
456 | MacroIteration of string * argument wrap2 list * statement
457
458 and jump = Goto of name
459 | Continue | Break
460 | Return | ReturnExpr of expression
461 | GotoComputed of expression (* gccext: goto *exp ';' *)
462
463
464 (* gccext: *)
465 and asmbody = il (* string list *) * colon wrap (* : *) list
466 and colon = Colon of colon_option wrap2 list
467 and colon_option = colon_option_bis wrap
468 and colon_option_bis = ColonMisc | ColonExpr of expression
469
470
471 (* ------------------------------------------------------------------------- *)
472 (* Declaration *)
473 (* ------------------------------------------------------------------------- *)
474 (* (string * ...) option cos can have empty declaration or struct tag
475 * declaration.
476 *
477 * Before I had a Typedef constructor, but why make this special case and not
478 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
479 * declarations ? So I try to generalise and not have Typedef either. This
480 * requires more work in parsing. Better to separate concern.
481 *
482 * Before the need for unparser, I didn't have a DeclList but just a Decl.
483 *
484 * I am not sure what it means to declare a prototype inline, but gcc
485 * accepts it.
486 *)
487
488 and declaration =
489 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
490 (* cppext: *)
491 | MacroDecl of (string * argument wrap2 list) wrap
492
493 and onedecl =
494 { v_namei: (name * (info (* = *) * initialiser) option) option;
495 v_type: fullType;
496 v_storage: storage;
497 v_local: local_decl; (* cocci: *)
498 v_attr: attribute list; (* gccext: *)
499 }
500 and storage = storagebis * bool (* gccext: inline or not *)
501 and storagebis = NoSto | StoTypedef | Sto of storageClass
502 and storageClass = Auto | Static | Register | Extern
503
504 and local_decl = LocalDecl | NotLocalDecl
505
506 and initialiser = initialiserbis wrap
507 and initialiserbis =
508 | InitExpr of expression
509 | InitList of initialiser wrap2 (* , *) list
510 (* gccext: *)
511 | InitDesignators of designator list * initialiser
512 | InitFieldOld of string * initialiser
513 | InitIndexOld of expression * initialiser
514
515 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
516 and designator = designatorbis wrap
517 and designatorbis =
518 | DesignatorField of string
519 | DesignatorIndex of expression
520 | DesignatorRange of expression * expression
521
522 (* ------------------------------------------------------------------------- *)
523 (* Function definition *)
524 (* ------------------------------------------------------------------------- *)
525 (* Normally we should define another type functionType2 because there
526 * are more restrictions on what can define a function than a pointer
527 * function. For instance a function declaration can omit the name of the
528 * parameter whereas a function definition can not. But, in some cases such
529 * as 'f(void) {', there is no name too, so I simplified and reused the
530 * same functionType type for both declaration and function definition.
531 *
532 * Also old style C does not have type in the parameter, so again simpler
533 * to abuse the functionType and allow missing type.
534 *)
535 and definition = definitionbis wrap (* ( ) { } fakestart sto *)
536 and definitionbis =
537 { f_name: name;
538 f_type: functionType; (* todo? a functionType2 ? *)
539 f_storage: storage;
540 f_body: compound;
541 f_attr: attribute list; (* gccext: *)
542 f_old_c_style: declaration list option;
543 }
544 (* cppext: IfdefFunHeader TODO *)
545
546 (* ------------------------------------------------------------------------- *)
547 (* cppext: cpp directives, #ifdef, #define and #include body *)
548 (* ------------------------------------------------------------------------- *)
549 and cpp_directive =
550 | Define of define
551 | Include of includ
552 | Undef of string wrap
553 | PragmaAndCo of il
554 (*| Ifdef ? no, ifdefs are handled differently, cf ifdef_directive below *)
555
556 and define = string wrap (* #define s *) * (define_kind * define_val)
557 and define_kind =
558 | DefineVar
559 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
560 and define_val =
561 (* most common case; e.g. to define int constant *)
562 | DefineExpr of expression
563
564 | DefineStmt of statement
565 | DefineType of fullType
566 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
567
568 | DefineFunction of definition
569 | DefineInit of initialiser (* in practice only { } with possible ',' *)
570
571 (* TODO DefineMulti of define_val list *)
572
573 | DefineText of string wrap
574 | DefineEmpty
575
576 | DefineTodo
577
578
579
580 and includ =
581 { i_include: inc_file wrap; (* #include s *)
582 (* cocci: computed in ? *)
583 i_rel_pos: include_rel_pos option ref;
584 (* cocci: cf -test incl *)
585 i_is_in_ifdef: bool;
586 (* cf cpp_ast_c.ml. set to None at parsing time. *)
587 i_content: (Common.filename (* full path *) * program) option;
588 }
589 and inc_file =
590 | Local of inc_elem list
591 | NonLocal of inc_elem list
592 | Weird of string (* ex: #include SYSTEM_H *)
593 and inc_elem = string
594
595 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
596 *
597 * The first_of and last_of store the list of prefixes that was
598 * introduced by the include. On #include <a/b/x>, if the include was
599 * the first in the file, it would give in first_of the following
600 * prefixes a/b/c; a/b/; a/ ; <empty>
601 *
602 * This is set after parsing, in cocci.ml, in update_rel_pos.
603 *)
604 and include_rel_pos = {
605 first_of : string list list;
606 last_of : string list list;
607 }
608
609
610
611 (* todo? to specialize if someone need more info *)
612 and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
613 | IfdefDirective of (ifdefkind * matching_tag) wrap
614 and ifdefkind =
615 | Ifdef (* todo? of string ? of formula_cpp ? *)
616 | IfdefElseif (* same *)
617 | IfdefElse (* same *)
618 | IfdefEndif
619 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
620 * a global so it means if you parse the same file twice you may get
621 * different id. I try now to avoid this pb by resetting it each
622 * time I parse a file.
623 *)
624 and matching_tag =
625 IfdefTag of (int (* tag *) * int (* total with this tag *))
626
627
628
629
630
631 (* ------------------------------------------------------------------------- *)
632 (* The toplevels elements *)
633 (* ------------------------------------------------------------------------- *)
634 and toplevel =
635 | Declaration of declaration
636 | Definition of definition
637
638 (* cppext: *)
639 | CppTop of cpp_directive
640 | IfdefTop of ifdef_directive (* * toplevel list *)
641
642 (* cppext: *)
643 | MacroTop of string * argument wrap2 list * il
644
645 | EmptyDef of il (* gccext: allow redundant ';' *)
646 | NotParsedCorrectly of il
647
648 | FinalDef of info (* EOF *)
649
650 (* ------------------------------------------------------------------------- *)
651 and program = toplevel list
652
653
654 (*****************************************************************************)
655 (* Cocci Bindings *)
656 (*****************************************************************************)
657 (* Was previously in pattern.ml, but because of the transformer,
658 * we need to decorate each token with some cocci code AND the environment
659 * for this cocci code.
660 *)
661 and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
662 and metavar_binding_kind =
663 | MetaIdVal of string
664 | MetaFuncVal of string
665 | MetaLocalFuncVal of string
666
667 | MetaExprVal of expression (* a "clean expr" *)
668 | MetaExprListVal of argument wrap2 list
669 | MetaParamVal of parameterType
670 | MetaParamListVal of parameterType wrap2 list
671
672 | MetaTypeVal of fullType
673 | MetaInitVal of initialiser
674 | MetaStmtVal of statement
675
676 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
677 * because don't need to have the value for a position in the env of
678 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
679 * variables accessible via SmPL whereas the position can be one day
680 * so I think it's better to put MetaPosVal here *)
681 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
682 | MetaPosValList of
683 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
684 | MetaListlenVal of int
685
686
687 (*****************************************************************************)
688 (* C comments *)
689 (*****************************************************************************)
690
691 (* convention: I often use "m" for comments as I can not use "c"
692 * (already use for c stuff) and "com" is too long.
693 *)
694
695 (* this type will be associated to each token.
696 *)
697 and comments_around = {
698 mbefore: Token_c.comment_like_token list;
699 mafter: Token_c.comment_like_token list;
700 }
701 (* old: can do something simpler than CComment for coccinelle, cf above.
702 mbefore: comment_and_relative_pos list;
703 mafter: comment_and_relative_pos list;
704 and comment_and_relative_pos = {
705
706 minfo: Common.parse_info;
707 (* the int represent the number of lines of difference between the
708 * current token and the comment. When on same line, this number is 0.
709 * When previous line, -1. In some way the after/before in previous
710 * record is useless because the sign of the integer can helps
711 * do the difference too, but I keep it that way.
712 *)
713 mpos: int;
714 (* todo?
715 * cppbetween: bool; touse? if false positive
716 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
717 *)
718 }
719
720 and comment = Common.parse_info
721 and com = comment list ref
722 *)
723
724
725
726
727 (*****************************************************************************)
728 (* Some constructors *)
729 (*****************************************************************************)
730 let nullQualif = ({const=false; volatile= false}, [])
731 let nQ = nullQualif
732
733 let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
734
735 let noType () = ref (None,NotTest)
736 let noInstr = (ExprStatement (None), [])
737 let noTypedefDef () = None
738
739
740 let emptyMetavarsBinding =
741 ([]: metavars_binding)
742
743 let emptyAnnot =
744 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
745 emptyMetavarsBinding)
746
747 let emptyComments= {
748 mbefore = [];
749 mafter = [];
750 }
751
752
753 (* for include, some meta information needed by cocci *)
754 let noRelPos () =
755 ref (None: include_rel_pos option)
756 let noInIfdef () =
757 ref false
758
759
760 (* When want add some info in ast that does not correspond to
761 * an existing C element.
762 * old: or when don't want 'synchronize' on it in unparse_c.ml
763 * (now have other mark for tha matter).
764 *)
765 let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
766
767 let fakeInfo pi =
768 { pinfo = FakeTok ("",no_virt_pos);
769 cocci_tag = ref emptyAnnot;
770 comments_tag = ref emptyComments;
771 }
772
773 let noii = []
774 let noattr = []
775 let noi_content = (None: ((Common.filename * program) option))
776
777 (*****************************************************************************)
778 (* Wrappers *)
779 (*****************************************************************************)
780 let unwrap = fst
781
782 let unwrap2 = fst
783
784
785 let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
786 let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
787
788 let get_type_expr ((unwrap_e, typ), iie) = !typ
789 let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
790 oldtyp := newtyp
791 (* old: (unwrap_e, newtyp), iie *)
792
793 let get_onlytype_expr ((unwrap_e, typ), iie) =
794 match !typ with
795 | Some (ft,_local), _test -> Some ft
796 | None, _ -> None
797
798 let get_onlylocal_expr ((unwrap_e, typ), iie) =
799 match !typ with
800 | Some (ft,local), _test -> Some local
801 | None, _ -> None
802
803
804
805 let unwrap_typeC (qu, (typeC, ii)) = typeC
806 let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
807
808
809 (* ------------------------------------------------------------------------- *)
810 let rewrap_str s ii =
811 {ii with pinfo =
812 (match ii.pinfo with
813 OriginTok pi -> OriginTok { pi with Common.str = s;}
814 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
815 | FakeTok (_,vpi) -> FakeTok (s,vpi)
816 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
817
818 let rewrap_pinfo pi ii =
819 {ii with pinfo = pi}
820
821 (* info about the current location *)
822 let get_pi = function
823 OriginTok pi -> pi
824 | ExpandedTok (_,(pi,_)) -> pi
825 | FakeTok (_,(pi,_)) -> pi
826 | AbstractLineTok pi -> pi
827
828 (* original info *)
829 let get_opi = function
830 OriginTok pi -> pi
831 | ExpandedTok (pi,_) -> pi
832 | FakeTok (_,_) -> failwith "no position information"
833 | AbstractLineTok pi -> pi
834
835 let str_of_info ii =
836 match ii.pinfo with
837 OriginTok pi -> pi.Common.str
838 | ExpandedTok (pi,_) -> pi.Common.str
839 | FakeTok (s,_) -> s
840 | AbstractLineTok pi -> pi.Common.str
841
842 let get_info f ii =
843 match ii.pinfo with
844 OriginTok pi -> f pi
845 | ExpandedTok (_,(pi,_)) -> f pi
846 | FakeTok (_,(pi,_)) -> f pi
847 | AbstractLineTok pi -> f pi
848
849 let get_orig_info f ii =
850 match ii.pinfo with
851 OriginTok pi -> f pi
852 | ExpandedTok (pi,_) -> f pi
853 | FakeTok (_,(pi,_)) -> f pi
854 | AbstractLineTok pi -> f pi
855
856 let make_expanded ii =
857 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
858
859 let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
860 let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
861 let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
862 let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
863 let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
864 let mcode_of_info ii = fst (!(ii.cocci_tag))
865 let pinfo_of_info ii = ii.pinfo
866 let parse_info_of_info ii = get_pi ii.pinfo
867
868 let is_fake ii =
869 match ii.pinfo with
870 FakeTok (_,_) -> true
871 | _ -> false
872
873 let is_origintok ii =
874 match ii.pinfo with
875 | OriginTok pi -> true
876 | _ -> false
877
878 (* ------------------------------------------------------------------------- *)
879 type posrv = Real of Common.parse_info | Virt of virtual_position
880
881 let compare_pos ii1 ii2 =
882 let get_pos = function
883 OriginTok pi -> Real pi
884 | FakeTok (s,vpi) -> Virt vpi
885 | ExpandedTok (pi,vpi) -> Virt vpi
886 | AbstractLineTok pi -> Real pi in (* used for printing *)
887 let pos1 = get_pos (pinfo_of_info ii1) in
888 let pos2 = get_pos (pinfo_of_info ii2) in
889 match (pos1,pos2) with
890 (Real p1, Real p2) ->
891 compare p1.Common.charpos p2.Common.charpos
892 | (Virt (p1,_), Real p2) ->
893 if (compare p1.Common.charpos p2.Common.charpos) =|= (-1) then (-1) else 1
894 | (Real p1, Virt (p2,_)) ->
895 if (compare p1.Common.charpos p2.Common.charpos) =|= 1 then 1 else (-1)
896 | (Virt (p1,o1), Virt (p2,o2)) ->
897 let poi1 = p1.Common.charpos in
898 let poi2 = p2.Common.charpos in
899 match compare poi1 poi2 with
900 -1 -> -1
901 | 0 -> compare o1 o2
902 | x -> x
903
904 let equal_posl (l1,c1) (l2,c2) =
905 (l1 =|= l2) && (c1 =|= c2)
906
907 let info_to_fixpos ii =
908 match pinfo_of_info ii with
909 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
910 | ExpandedTok (_,(pi,offset)) ->
911 Ast_cocci.Virt (pi.Common.charpos,offset)
912 | FakeTok (_,(pi,offset)) ->
913 Ast_cocci.Virt (pi.Common.charpos,offset)
914 | AbstractLineTok pi -> failwith "unexpected abstract"
915
916 (* cocci: *)
917 let is_test (e : expression) =
918 let (_,info) = unwrap e in
919 let (_,test) = !info in
920 test =*= Test
921
922 (*****************************************************************************)
923 (* Abstract line *)
924 (*****************************************************************************)
925
926 (* When we have extended the C Ast to add some info to the tokens,
927 * such as its line number in the file, we can not use anymore the
928 * ocaml '=' to compare Ast elements. To overcome this problem, to be
929 * able to use again '=', we just have to get rid of all those extra
930 * information, to "abstract those line" (al) information.
931 *
932 * Julia then modifies it a little to have a tokenindex, so the original
933 * true al_info is in fact real_al_info.
934 *)
935
936 let al_info tokenindex x =
937 { pinfo =
938 (AbstractLineTok
939 {charpos = tokenindex;
940 line = tokenindex;
941 column = tokenindex;
942 file = "";
943 str = str_of_info x});
944 cocci_tag = ref emptyAnnot;
945 comments_tag = ref emptyComments;
946 }
947
948 let semi_al_info x =
949 { x with
950 cocci_tag = ref emptyAnnot;
951 comments_tag = ref emptyComments;
952 }
953
954 let magic_real_number = -10
955
956 let real_al_info x =
957 { pinfo =
958 (AbstractLineTok
959 {charpos = magic_real_number;
960 line = magic_real_number;
961 column = magic_real_number;
962 file = "";
963 str = str_of_info x});
964 cocci_tag = ref emptyAnnot;
965 comments_tag = ref emptyComments;
966 }
967
968 let al_comments x =
969 let keep_cpp l =
970 List.filter (function (Token_c.TCommentCpp _,_) -> true | _ -> false) l in
971 let al_com (x,i) =
972 (x,{i with Common.charpos = magic_real_number;
973 Common.line = magic_real_number;
974 Common.column = magic_real_number}) in
975 {mbefore = []; (* duplicates mafter of the previous token *)
976 mafter = List.map al_com (keep_cpp x.mafter)}
977
978 let al_info_cpp tokenindex x =
979 { pinfo =
980 (AbstractLineTok
981 {charpos = tokenindex;
982 line = tokenindex;
983 column = tokenindex;
984 file = "";
985 str = str_of_info x});
986 cocci_tag = ref emptyAnnot;
987 comments_tag = ref (al_comments !(x.comments_tag));
988 }
989
990 let semi_al_info_cpp x =
991 { x with
992 cocci_tag = ref emptyAnnot;
993 comments_tag = ref (al_comments !(x.comments_tag));
994 }
995
996 let real_al_info_cpp x =
997 { pinfo =
998 (AbstractLineTok
999 {charpos = magic_real_number;
1000 line = magic_real_number;
1001 column = magic_real_number;
1002 file = "";
1003 str = str_of_info x});
1004 cocci_tag = ref emptyAnnot;
1005 comments_tag = ref (al_comments !(x.comments_tag));
1006 }
1007
1008
1009 (*****************************************************************************)
1010 (* Views *)
1011 (*****************************************************************************)
1012
1013 (* Transform a list of arguments (or parameters) where the commas are
1014 * represented via the wrap2 and associated with an element, with
1015 * a list where the comma are on their own. f(1,2,2) was
1016 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
1017 *
1018 * Used in cocci_vs_c.ml, to have a more direct correspondance between
1019 * the ast_cocci of julia and ast_c.
1020 *)
1021 let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
1022 function
1023 | [] -> []
1024 | (e, ii)::xs ->
1025 if null ii
1026 then (Left e)::split_comma xs
1027 else Right ii::Left e::split_comma xs
1028
1029 let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
1030 function
1031 | [] -> []
1032 | Right ii::Left e::xs ->
1033 (e, ii)::unsplit_comma xs
1034 | Left e::xs ->
1035 let empty_ii = [] in
1036 (e, empty_ii)::unsplit_comma xs
1037 | Right ii::_ ->
1038 raise Impossible
1039
1040
1041
1042
1043 (*****************************************************************************)
1044 (* Helpers, could also be put in lib_parsing_c.ml instead *)
1045 (*****************************************************************************)
1046
1047 let rec stmt_elems_of_sequencable xs =
1048 xs +> Common.map (fun x ->
1049 match x with
1050 | StmtElem e -> [e]
1051 | CppDirectiveStmt _
1052 | IfdefStmt _
1053 ->
1054 pr2_once ("stmt_elems_of_sequencable: filter a directive");
1055 []
1056 | IfdefStmt2 (_ifdef, xxs) ->
1057 pr2 ("stmt_elems_of_sequencable: IfdefStm2 TODO?");
1058 xxs +> List.map (fun xs ->
1059 let xs' = stmt_elems_of_sequencable xs in
1060 xs'
1061 ) +> List.flatten
1062 ) +> List.flatten
1063
1064
1065
1066
1067 (* should maybe be in pretty_print_c ? *)
1068
1069 let s_of_inc_file inc_file =
1070 match inc_file with
1071 | Local xs -> xs +> Common.join "/"
1072 | NonLocal xs -> xs +> Common.join "/"
1073 | Weird s -> s
1074
1075 let s_of_inc_file_bis inc_file =
1076 match inc_file with
1077 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
1078 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
1079 | Weird s -> s
1080
1081 let fieldname_of_fieldkind fieldkind =
1082 match fieldkind with
1083 | Simple (sopt, ft) -> sopt
1084 | BitField (sopt, ft, info, expr) -> sopt
1085
1086
1087 let s_of_attr attr =
1088 attr
1089 +> List.map (fun (Attribute s, ii) -> s)
1090 +> Common.join ","
1091
1092 let str_of_name ident =
1093 match ident with
1094 | RegularName (s,ii) -> s
1095 | CppConcatenatedName xs ->
1096 xs +> List.map (fun (x,iiop) -> unwrap x) +> Common.join "##"
1097 | CppVariadicName (s, ii) -> "##" ^ s
1098 | CppIdentBuilder ((s,iis), xs) ->
1099 s ^ "(" ^
1100 (xs +> List.map (fun ((x,iix), iicomma) -> x) +> Common.join ",") ^
1101 ")"
1102
1103 let info_of_name ident =
1104 match ident with
1105 | RegularName (s,ii) -> List.hd ii
1106 | CppConcatenatedName xs ->
1107 (match xs with
1108 | [] -> raise Impossible
1109 | ((x,ii1),ii2)::xs ->
1110 List.hd ii1
1111 )
1112 | CppVariadicName (s, ii) ->
1113 let (iihash, iis) = Common.tuple_of_list2 ii in
1114 iihash
1115 | CppIdentBuilder ((s,iis),xs) ->
1116 List.hd iis
1117
1118 let get_s_and_ii_of_name name =
1119 match name with
1120 | RegularName (s, iis) -> s, List.hd iis
1121 | _ -> raise Todo
1122
1123
1124 let name_of_parameter param =
1125 param.p_namei +> Common.map_option (str_of_name)
1126