Release coccinelle-0.1.5
[bpt/coccinelle.git] / parsing_c / ast_c.ml
1 (* Copyright (C) 2002, 2006, 2007, 2008 Yoann Padioleau
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License (GPL)
5 * version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * file license.txt for more details.
11 *)
12 open Common
13
14 (*****************************************************************************)
15 (* The AST C related types *)
16 (*****************************************************************************)
17
18 (* To allow some transformations over the AST, we keep as much information
19 * as possible in the AST such as the tokens content and their locations.
20 * Those info are called 'info' (how original) and can be tagged.
21 * For instance one tag may say that the unparser should remove this token.
22 *
23 * Update: Now I use a ref! in those 'info' so take care.
24 *
25 * Sometimes we want to add someting at the beginning or at the end
26 * of a construct. For 'function' and 'decl' we want to add something
27 * to their left and for 'if' 'while' et 'for' and so on at their right.
28 * We want some kinds of "virtual placeholders" that represent the start or
29 * end of a construct. We use fakeInfo for that purpose.
30 * To identify those cases I have added a fakestart/fakeend comment.
31 *
32 * convention: I often use 'ii' for the name of a list of info.
33 *
34 * update: I now allow ifdefs in the ast but there must be only between
35 * "sequencable" elements. They can be put in a type only if this type
36 * is used only in a list, like at toplevel, used in 'toplevel list',
37 * or inside compound, used in 'statement list'. I must not allow
38 * ifdef anywhere. For instance I can not make ifdef a statement
39 * cos some instruction like If accept only one statement and the
40 * ifdef directive must not take the place of a legitimate instruction.
41 * We had a similar phenomena in SmPL where we have the notion
42 * of statement and sequencable statement too. Once you have
43 * such a type of sequencable thing, then s/xx list/xx_sequencable list/
44 * and introduce the ifdef.
45 *
46 * update: those ifdefs are either passed, or present in the AST but in
47 * a flat form. To structure those flat ifdefs you have to run
48 * a transformation that will put in a tree the statements inside
49 * ifdefs branches. Cf cpp_ast_c.ml. This is for instance the difference
50 * between a IfdefStmt (flat) and IfdefStmt2 (tree structured).
51 *
52 * Some stuff are tagged semantic: which means that they are computed
53 * after parsing.
54 *
55 * cocci: Each token will be decorated in the future by the mcodekind
56 * of cocci. It is the job of the pretty printer to look at this
57 * information and decide to print or not the token (and also the
58 * pending '+' associated sometimes with the token).
59 *
60 * The first time that we parse the original C file, the mcodekind is
61 * empty, or more precisely all is tagged as a CONTEXT with NOTHING
62 * associated. This is what I call a "clean" expr/statement/....
63 *
64 * Each token will also be decorated in the future with an environment,
65 * because the pending '+' may contain metavariables that refer to some
66 * C code.
67 *
68 *
69 * All of this means that some elements in this AST are present only if
70 * some annotation/transformation has been done on the original AST returned
71 * by the parser. Cf type_annotater, comment_annotater, cpp_ast_c, etc.
72 *)
73
74 (* forunparser: *)
75
76 type posl = int * int (* line-col, for MetaPosValList, for position variables *)
77
78 (* the virtual position is set in Parsing_hacks.insert_virtual_positions *)
79 type virtual_position = Common.parse_info * int (* character offset *)
80
81 type parse_info =
82 (* Present both in ast and list of tokens *)
83 | OriginTok of Common.parse_info
84 (* Present only in ast and generated after parsing. Used mainly
85 * by Julia, to add stuff at virtual places, beginning of func or decl *)
86 | FakeTok of string * virtual_position
87 (* Present both in ast and list of tokens. *)
88 | ExpandedTok of Common.parse_info * virtual_position
89 (* Present neither in ast nor in list of tokens
90 * but only in the '+' of the mcode of some tokens. Those kind of tokens
91 * are used to be able to use '=' to compare big ast portions.
92 *)
93 | AbstractLineTok of Common.parse_info (* local to the abstracted thing *)
94
95 type info = {
96 pinfo : parse_info;
97 (* this tag can be changed, which is how we can express some program
98 * transformations by tagging the tokens involved in this transformation.
99 *)
100 cocci_tag: (Ast_cocci.mcodekind * metavars_binding) ref;
101 (* set in comment_annotater.ml *)
102 comments_tag: comments_around ref;
103 (* todo? token_info : sometimes useful to know what token it was *)
104 }
105 and il = info list
106
107 (* wrap2 is like wrap, except that I use it often for separator such
108 * as ','. In that case the info is associated to the argument that
109 * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
110 and 'a wrap = 'a * il
111 and 'a wrap2 = 'a * il
112
113 (* ------------------------------------------------------------------------- *)
114 (* C Type *)
115 (* ------------------------------------------------------------------------- *)
116 (* Could have more precise type in fullType, in expression, etc, but
117 * it requires to do too much things in parsing such as checking no
118 * conflicting structname, computing value, etc. Better to separate
119 * concern. So I put '=>' to mean what we would really like. In fact
120 * what we really like is defining another fullType, expression, etc
121 * from scratch, because many stuff are just sugar.
122 *
123 * invariant: Array and FunctionType have also typeQualifier but they
124 * dont have sense. I put this to factorise some code. If you look in
125 * the grammar, you see that we can never specify const for the array
126 * himself (but we can do it for pointer) or function, we always
127 * have in the action rule of the grammar a { (nQ, FunctionType ...) }.
128 *
129 *
130 * Because of ExprStatement, we can have more 'new scope' events, but
131 * rare I think. For instance with 'array of constExpression' there can
132 * have an exprStatement and a new (local) struct defined. Same for
133 * Constructor.
134 *
135 * Some stuff are tagged semantic: which means that they are computed
136 * after parsing.
137 *)
138
139
140 and fullType = typeQualifier * typeC
141 and typeC = typeCbis wrap
142
143 and typeCbis =
144 | BaseType of baseType
145
146 | Pointer of fullType
147 | Array of constExpression option * fullType
148 | FunctionType of functionType
149
150 | Enum of string option * enumType
151 | StructUnion of structUnion * string option * structType (* new scope *)
152
153 | EnumName of string
154 | StructUnionName of structUnion * string
155
156 | TypeName of string * fullType option (* semantic: filled later *)
157
158 | ParenType of fullType (* forunparser: *)
159
160 (* gccext: TypeOfType may seems useless; why declare a
161 * __typeof__(int) x; ?
162 * But when used with macro, it allows to fix a problem of C which
163 * is that type declaration can be spread around the ident. Indeed it
164 * may be difficult to have a macro such as
165 * '#define macro(type, ident) type ident;'
166 * because when you want to do a
167 * macro(char[256], x),
168 * then it will generate invalid code, but with a
169 * '#define macro(type, ident) __typeof(type) ident;'
170 * it will work. *)
171 | TypeOfExpr of expression
172 | TypeOfType of fullType
173
174 (* cppext: IfdefType TODO *)
175
176 (* -------------------------------------- *)
177 and baseType = Void
178 | IntType of intType
179 | FloatType of floatType
180
181 (* stdC: type section
182 * add a | SizeT ?
183 * note: char and signed char are semantically different!!
184 *)
185 and intType = CChar (* obsolete? | CWchar *)
186 | Si of signed
187
188 and signed = sign * base
189 and base = CChar2 | CShort | CInt | CLong | CLongLong (* gccext: *)
190 and sign = Signed | UnSigned
191
192 and floatType = CFloat | CDouble | CLongDouble
193
194
195 (* -------------------------------------- *)
196 and structUnion = Struct | Union
197 and structType = field list
198 and field = fieldbis wrap
199 and fieldbis =
200 | DeclarationField of field_declaration
201 | EmptyField (* gccext: *)
202 (* cppext: *)
203 | MacroStructDeclTodo
204
205 (* cppext: *)
206 | CppDirectiveStruct of cpp_directive
207 | IfdefStruct of ifdef_directive (* * field list list *)
208
209
210 (* before unparser, I didn't have a FieldDeclList but just a Field. *)
211 and field_declaration =
212 | FieldDeclList of fieldkind wrap2 list (* , *) wrap (* ; *)
213
214 (* At first I thought that a bitfield could be only Signed/Unsigned.
215 * But it seems that gcc allow char i:4. C rule must say that you
216 * can cast into int so enum too, ...
217 *)
218 and fieldkind = fieldkindbis wrap (* s : *)
219 and fieldkindbis =
220 | Simple of string option * fullType
221 | BitField of string option * fullType * constExpression
222 (* fullType => BitFieldInt | BitFieldUnsigned *)
223
224
225 (* -------------------------------------- *)
226 and enumType = (string * constExpression option) wrap (* s = *)
227 wrap2 (* , *) list
228 (* => string * int list *)
229
230
231 (* -------------------------------------- *)
232 (* return * (params * has "...") *)
233 and functionType = fullType * (parameterType wrap2 list * bool wrap)
234 and parameterType = (bool * string option * fullType) wrap (* reg s *)
235 (* => (bool (register) * fullType) list * bool *)
236
237
238 and typeQualifier = typeQualifierbis wrap
239 and typeQualifierbis = {const: bool; volatile: bool}
240
241 (* gccext: cppext: *)
242 and attribute = attributebis wrap
243 and attributebis =
244 | Attribute of string
245
246 (* ------------------------------------------------------------------------- *)
247 (* C expression *)
248 (* ------------------------------------------------------------------------- *)
249 and expression = (expressionbis * exp_info ref (* semantic: *)) wrap
250 and exp_info = exp_type option * test
251 and exp_type = fullType * local
252 and local = LocalVar of parse_info | NotLocalVar (* cocci: *)
253 and test = Test | NotTest (* cocci: *)
254
255 and expressionbis =
256
257 (* Ident can be a enumeration constant, a simple variable, a name of a func.
258 * With cppext, Ident can also be the name of a macro. Sparse says
259 * "an identifier with a meaning is a symbol".
260 *)
261 | Ident of string (* todo? more semantic info such as LocalFunc *)
262 | Constant of constant
263 | FunCall of expression * argument wrap2 (* , *) list
264 (* gccext: x ? /* empty */ : y <=> x ? x : y; *)
265 | CondExpr of expression * expression option * expression
266
267 (* should be considered as statements, bad C langage *)
268 | Sequence of expression * expression
269 | Assignment of expression * assignOp * expression
270
271
272 | Postfix of expression * fixOp
273 | Infix of expression * fixOp
274
275 | Unary of expression * unaryOp
276 | Binary of expression * binaryOp * expression
277
278 | ArrayAccess of expression * expression
279
280 (* field ident access *)
281 | RecordAccess of expression * string
282 | RecordPtAccess of expression * string
283 (* redundant normally, could replace it by DeRef RecordAcces *)
284
285 | SizeOfExpr of expression
286 | SizeOfType of fullType
287 | Cast of fullType * expression
288
289 (* gccext: *)
290 | StatementExpr of compound wrap (* ( ) new scope *)
291 | Constructor of fullType * initialiser wrap2 (* , *) list
292
293 (* forunparser: *)
294 | ParenExpr of expression
295
296 (* cppext: IfdefExpr TODO *)
297
298 (* cppext: normmally just expression *)
299 and argument = (expression, wierd_argument) either
300 and wierd_argument =
301 | ArgType of parameterType
302 | ArgAction of action_macro
303 and action_macro =
304 (* todo: ArgStatement of statement, possibly have ghost token *)
305 | ActMisc of il
306
307
308 (* I put string for Int and Float because int would not be enough because
309 * OCaml int are 31 bits. So simpler to do string. Same reason to have
310 * string instead of int list for the String case.
311 *
312 * note: that -2 is not a constant, it is the unary operator '-'
313 * applied to constant 2. So the string must represent a positive
314 * integer only. *)
315
316 and constant =
317 | String of (string * isWchar)
318 | MultiString (* can contain MacroString, todo: more info *)
319 | Char of (string * isWchar) (* normally it is equivalent to Int *)
320 | Int of (string (* * intType*))
321 | Float of (string * floatType)
322
323 and isWchar = IsWchar | IsChar
324
325
326 and unaryOp = GetRef | DeRef | UnPlus | UnMinus | Tilde | Not
327 | GetRefLabel (* gccext: GetRefLabel, via &&label notation *)
328 and assignOp = SimpleAssign | OpAssign of arithOp
329 and fixOp = Dec | Inc
330
331 and binaryOp = Arith of arithOp | Logical of logicalOp
332
333 and arithOp =
334 | Plus | Minus | Mul | Div | Mod
335 | DecLeft | DecRight
336 | And | Or | Xor
337
338 and logicalOp =
339 | Inf | Sup | InfEq | SupEq
340 | Eq | NotEq
341 | AndLog | OrLog
342
343 and constExpression = expression (* => int *)
344
345
346
347 (* ------------------------------------------------------------------------- *)
348 (* C statement *)
349 (* ------------------------------------------------------------------------- *)
350 (* note: that assignement is not a statement but an expression;
351 * wonderful C langage.
352 *
353 * note: I use 'and' for type definition cos gccext allow statement as
354 * expression, so need mutual recursive type definition. *)
355
356 and statement = statementbis wrap
357 and statementbis =
358 | Labeled of labeled
359 | Compound of compound (* new scope *)
360 | ExprStatement of exprStatement
361 | Selection of selection (* have fakeend *)
362 | Iteration of iteration (* have fakeend *)
363 | Jump of jump
364
365 (* simplify cocci: only at the beginning of a compound normally *)
366 | Decl of declaration
367
368 (* gccext: *)
369 | Asm of asmbody
370 | NestedFunc of definition
371
372 (* cppext: *)
373 | MacroStmt
374
375
376
377 and labeled = Label of string * statement
378 | Case of expression * statement
379 | CaseRange of expression * expression * statement (* gccext: *)
380 | Default of statement
381
382 (* cppext:
383 * old: compound = (declaration list * statement list)
384 * old: (declaration, statement) either list
385 * Simplify cocci to just have statement list, by integrating Decl in stmt.
386 *
387 * update: now introduce also the _sequencable to allow ifdef in the middle.
388 *)
389 and compound = statement_sequencable list
390
391 (* cppext: easier to put at statement_list level than statement level *)
392 and statement_sequencable =
393 | StmtElem of statement
394 (* cppext: *)
395 | CppDirectiveStmt of cpp_directive
396 | IfdefStmt of ifdef_directive
397
398 (* this will be build in cpp_ast_c from the previous flat IfdefStmt *)
399 | IfdefStmt2 of ifdef_directive list * (statement_sequencable list) list
400
401 and exprStatement = expression option
402
403 (* for Switch, need check that all elements in the compound start
404 * with a case:, otherwise unreachable code.
405 *)
406 and selection =
407 | If of expression * statement * statement
408 | Switch of expression * statement
409
410
411 and iteration =
412 | While of expression * statement
413 | DoWhile of statement * expression
414 | For of exprStatement wrap * exprStatement wrap * exprStatement wrap *
415 statement
416 (* cppext: *)
417 | MacroIteration of string * argument wrap2 list * statement
418
419 and jump = Goto of string
420 | Continue | Break
421 | Return | ReturnExpr of expression
422 | GotoComputed of expression (* gccext: goto *exp ';' *)
423
424
425 (* gccext: *)
426 and asmbody = il (* string list *) * colon wrap (* : *) list
427 and colon = Colon of colon_option wrap2 list
428 and colon_option = colon_option_bis wrap
429 and colon_option_bis = ColonMisc | ColonExpr of expression
430
431
432 (* ------------------------------------------------------------------------- *)
433 (* Declaration *)
434 (* ------------------------------------------------------------------------- *)
435 (* (string * ...) option cos can have empty declaration or struct tag
436 * declaration.
437 *
438 * Before I had Typedef constructor, but why make this special case and not
439 * have StructDef, EnumDef, ... so that 'struct t {...} v' will generate 2
440 * declarations ? So I try to generalise and not have Typedef either. This
441 * requires more work in parsing. Better to separate concern.
442 *
443 * Before the need for unparser, I didn't have a DeclList but just a Decl.
444 *
445 * I am not sure what it means to declare a prototype inline, but gcc
446 * accepts it.
447 *)
448
449 and local_decl = LocalDecl | NotLocalDecl
450
451 and declaration =
452 | DeclList of onedecl wrap2 (* , *) list wrap (* ; fakestart sto *)
453 (* cppext: *)
454 | MacroDecl of (string * argument wrap2 list) wrap
455
456 and onedecl =
457 { v_namei: (string * initialiser option) wrap (* s = *) option;
458 v_type: fullType;
459 v_storage: storage;
460 v_local: local_decl; (* cocci: *)
461 v_attr: attribute list; (* gccext: *)
462 }
463 and storage = storagebis * bool (* gccext: inline or not *)
464 and storagebis = NoSto | StoTypedef | Sto of storageClass
465 and storageClass = Auto | Static | Register | Extern
466
467 and initialiser = initialiserbis wrap
468 and initialiserbis =
469 | InitExpr of expression
470 | InitList of initialiser wrap2 (* , *) list
471 (* gccext: *)
472 | InitDesignators of designator list * initialiser
473 | InitFieldOld of string * initialiser
474 | InitIndexOld of expression * initialiser
475
476 (* ex: [2].y = x, or .y[2] or .y.x. They can be nested *)
477 and designator = designatorbis wrap
478 and designatorbis =
479 | DesignatorField of string
480 | DesignatorIndex of expression
481 | DesignatorRange of expression * expression
482
483 (* ------------------------------------------------------------------------- *)
484 (* Function definition *)
485 (* ------------------------------------------------------------------------- *)
486 (* Normally we should define another type functionType2 because there
487 * are more restrictions on what can define a function than a pointer
488 * function. For instance a function declaration can omit the name of the
489 * parameter wheras a function definition can not. But, in some cases such
490 * as 'f(void) {', there is no name too, so I simplified and reused the
491 * same functionType type for both declaration and function definition.
492 * Also old style C does not have type in the parameter.
493 *)
494 and definition = definitionbis wrap (* s ( ) { } fakestart sto *)
495 and definitionbis =
496 { f_name: string;
497 f_type: functionType;
498 f_storage: storage;
499 f_body: compound;
500 f_attr: attribute list; (* gccext: *)
501 f_old_c_style: declaration list option;
502 }
503 (* cppext: IfdefFunHeader TODO *)
504
505 (* ------------------------------------------------------------------------- *)
506 (* cppext: cpp directives, #ifdef, #define and #include body *)
507 (* ------------------------------------------------------------------------- *)
508 and cpp_directive =
509 | Include of includ
510 | Define of define
511 | Undef of string wrap
512 | PragmaAndCo of il
513
514 (* to specialize if someone need more info *)
515 and ifdef_directive = (* or and 'a ifdefed = 'a list wrap *)
516 | IfdefDirective of (ifdefkind * matching_tag) wrap
517 and ifdefkind =
518 | Ifdef (* todo? of string ? of formula_cpp *)
519 | IfdefElseif (* same *)
520 | IfdefElse (* same *)
521 | IfdefEndif
522 (* set in Parsing_hacks.set_ifdef_parenthize_info. It internally use
523 * a global so it means if you parse same file twice you may get
524 * different id. I try now to avoid this pb by resetting it each
525 * time I parse a file.
526 *)
527 and matching_tag =
528 IfdefTag of (int (* tag *) * int (* total with this tag *))
529
530 and define = string wrap * define_body (* #define s *)
531 and define_body = define_kind * define_val
532 and define_kind =
533 | DefineVar
534 | DefineFunc of ((string wrap) wrap2 list) wrap (* () *)
535 and define_val =
536 | DefineExpr of expression (* most common case, to define int constant *)
537
538 | DefineStmt of statement
539 | DefineType of fullType
540 | DefineDoWhileZero of (statement * expression) wrap (* do { } while(0) *)
541
542 | DefineFunction of definition
543 | DefineInit of initialiser (* in practice only { } with possible ',' *)
544 (* TODO DefineMulti of define_val list *)
545
546 | DefineText of string wrap
547 | DefineEmpty
548
549 | DefineTodo
550
551
552
553 and includ =
554 { i_include: inc_file wrap; (* #include s *)
555 (* cocci: computed in ? *)
556 i_rel_pos: include_rel_pos option ref;
557 (* cocci: cf -test incl *)
558 i_is_in_ifdef: bool;
559 (* cf cpp_ast_c.ml. set to None at parsing time. *)
560 i_content: (Common.filename (* full path *) * program) option;
561 }
562 and inc_file =
563 | Local of inc_elem list
564 | NonLocal of inc_elem list
565 | Wierd of string (* ex: #include SYSTEM_H *)
566 and inc_elem = string
567
568 (* cocci: to tag the first of #include <xx/> and last of #include <yy/>
569 *
570 * The first_of and last_of store the list of prefixes that was
571 * introduced by the include. On #include <a/b/x>, if the include was
572 * the first in the file, it would give in first_of the following
573 * prefixes a/b/c; a/b/; a/ ; <empty>
574 *
575 * This is set after parsing, in cocci.ml, in update_rel_pos.
576 *)
577 and include_rel_pos = {
578 first_of : string list list;
579 last_of : string list list;
580 }
581
582
583
584
585
586
587 (* ------------------------------------------------------------------------- *)
588 (* The toplevels elements *)
589 (* ------------------------------------------------------------------------- *)
590 and toplevel =
591 | Declaration of declaration
592 | Definition of definition
593
594 (* cppext: *)
595 | CppTop of cpp_directive
596 | IfdefTop of ifdef_directive (* * toplevel list *)
597
598 (* cppext: *)
599 | MacroTop of string * argument wrap2 list * il
600
601 | EmptyDef of il (* gccext: allow redundant ';' *)
602 | NotParsedCorrectly of il
603
604 | FinalDef of info (* EOF *)
605
606 (* ------------------------------------------------------------------------- *)
607 and program = toplevel list
608
609
610 (*****************************************************************************)
611 (* Cocci Bindings *)
612 (*****************************************************************************)
613 (* Was previously in pattern.ml, but because of the transformer,
614 * we need to decorate each token with some cocci code AND the environment
615 * for this cocci code.
616 *)
617 and metavars_binding = (Ast_cocci.meta_name, metavar_binding_kind) assoc
618 and metavar_binding_kind =
619 | MetaIdVal of string
620 | MetaFuncVal of string
621 | MetaLocalFuncVal of string
622
623 | MetaExprVal of expression (* a "clean expr" *)
624 | MetaExprListVal of argument wrap2 list
625 | MetaParamVal of parameterType
626 | MetaParamListVal of parameterType wrap2 list
627
628 | MetaTypeVal of fullType
629 | MetaInitVal of initialiser
630 | MetaStmtVal of statement
631
632 (* Could also be in Lib_engine.metavars_binding2 with the ParenVal,
633 * because don't need to have the value for a position in the env of
634 * a '+'. But ParenVal or LabelVal are used only by CTL, they are not
635 * variables accessible via SmPL whereas the position can be one day
636 * so I think it's better to put MetaPosVal here *)
637 | MetaPosVal of (Ast_cocci.fixpos * Ast_cocci.fixpos) (* max, min *)
638 | MetaPosValList of
639 (Common.filename * string (*element*) * posl * posl) list (* min, max *)
640 | MetaListlenVal of int
641
642
643 (*****************************************************************************)
644 (* C comments *)
645 (*****************************************************************************)
646
647 (* convention: I often use "m" for comments as I can not use "c"
648 * (already use for c stuff) and "com" is too long.
649 *)
650
651 (* this type will be associated to each token *)
652 and comments_around = {
653 mbefore: comment_and_relative_pos list;
654 mafter: comment_and_relative_pos list;
655 }
656 and comment_and_relative_pos = {
657
658 minfo: Common.parse_info;
659 (* the int represent the number of lines of difference between the
660 * current token and the comment. When on same line, this number is 0.
661 * When previous line, -1. In some way the after/before in previous
662 * record is useless because the sign of the integer can helps
663 * do the difference too, but I keep it that way.
664 *)
665 mpos: int;
666 (* todo?
667 * cppbetween: bool; touse? if false positive
668 * is_alone_in_line: bool; (*for labels, to avoid false positive*)
669 *)
670 }
671
672 and comment = Common.parse_info
673 and com = comment list ref
674
675
676 (*****************************************************************************)
677 (* Cpp constructs put it comments in lexer or parsing_hack *)
678 (*****************************************************************************)
679
680 (* This type is not in the Ast but is associated with the TCommentCpp token.
681 * I put this enum here because parser_c.mly need it. I could have put
682 * it also in lexer_parser.
683 *)
684 type cppcommentkind =
685 | CppDirective
686 | CppAttr
687 | CppMacro
688 | CppPassingNormal (* ifdef 0, cplusplus, etc *)
689 | CppPassingCosWouldGetError (* expr passsing *)
690
691
692
693
694 (*****************************************************************************)
695 (* Some constructors *)
696 (*****************************************************************************)
697 let nullQualif = ({const=false; volatile= false}, [])
698 let nQ = nullQualif
699
700 let defaultInt = (BaseType (IntType (Si (Signed, CInt))))
701
702 let noType () = ref (None,NotTest)
703 let noInstr = (ExprStatement (None), [])
704 let noTypedefDef () = None
705
706 let emptyMetavarsBinding =
707 ([]: metavars_binding)
708
709 let emptyAnnot =
710 (Ast_cocci.CONTEXT (Ast_cocci.NoPos,Ast_cocci.NOTHING),
711 emptyMetavarsBinding)
712
713 let emptyComments= {
714 mbefore = [];
715 mafter = [];
716 }
717
718
719 (* for include, some meta information needed by cocci *)
720 let noRelPos () =
721 ref (None: include_rel_pos option)
722 let noInIfdef () =
723 ref false
724
725
726 (* When want add some info in ast that does not correspond to
727 * an existing C element.
728 * old: or when don't want 'synchronize' on it in unparse_c.ml
729 * (now have other mark for tha matter).
730 *)
731 let no_virt_pos = ({str="";charpos=0;line=0;column=0;file=""},-1)
732
733 let fakeInfo pi =
734 { pinfo = FakeTok ("",no_virt_pos);
735 cocci_tag = ref emptyAnnot;
736 comments_tag = ref emptyComments;
737 }
738
739 let noii = []
740 let noattr = []
741 let noi_content = (None: ((Common.filename * program) option))
742
743 (*****************************************************************************)
744 (* Wrappers *)
745 (*****************************************************************************)
746 let unwrap = fst
747
748 let unwrap2 = fst
749
750 let unwrap_expr ((unwrap_e, typ), iie) = unwrap_e
751 let rewrap_expr ((_old_unwrap_e, typ), iie) newe = ((newe, typ), iie)
752
753 let get_type_expr ((unwrap_e, typ), iie) = !typ
754 let set_type_expr ((unwrap_e, oldtyp), iie) newtyp =
755 oldtyp := newtyp
756 (* old: (unwrap_e, newtyp), iie *)
757
758 let get_onlytype_expr ((unwrap_e, typ), iie) =
759 match !typ with
760 | Some (ft,_local), _test -> Some ft
761 | None, _ -> None
762
763
764 let unwrap_typeC (qu, (typeC, ii)) = typeC
765 let rewrap_typeC (qu, (typeC, ii)) newtypeC = (qu, (newtypeC, ii))
766
767
768 (* ------------------------------------------------------------------------- *)
769 let rewrap_str s ii =
770 {ii with pinfo =
771 (match ii.pinfo with
772 OriginTok pi -> OriginTok { pi with Common.str = s;}
773 | ExpandedTok (pi,vpi) -> ExpandedTok ({ pi with Common.str = s;},vpi)
774 | FakeTok (_,vpi) -> FakeTok (s,vpi)
775 | AbstractLineTok pi -> OriginTok { pi with Common.str = s;})}
776
777 let rewrap_pinfo pi ii =
778 {ii with pinfo = pi}
779
780 (* info about the current location *)
781 let get_pi = function
782 OriginTok pi -> pi
783 | ExpandedTok (_,(pi,_)) -> pi
784 | FakeTok (_,(pi,_)) -> pi
785 | AbstractLineTok pi -> pi
786
787 (* original info *)
788 let get_opi = function
789 OriginTok pi -> pi
790 | ExpandedTok (pi,_) -> pi
791 | FakeTok (_,_) -> failwith "no position information"
792 | AbstractLineTok pi -> pi
793
794 let str_of_info ii =
795 match ii.pinfo with
796 OriginTok pi -> pi.Common.str
797 | ExpandedTok (pi,_) -> pi.Common.str
798 | FakeTok (s,_) -> s
799 | AbstractLineTok pi -> pi.Common.str
800
801 let get_info f ii =
802 match ii.pinfo with
803 OriginTok pi -> f pi
804 | ExpandedTok (_,(pi,_)) -> f pi
805 | FakeTok (_,(pi,_)) -> f pi
806 | AbstractLineTok pi -> f pi
807
808 let get_orig_info f ii =
809 match ii.pinfo with
810 OriginTok pi -> f pi
811 | ExpandedTok (pi,_) -> f pi
812 | FakeTok (_,(pi,_)) -> f pi
813 | AbstractLineTok pi -> f pi
814
815 let make_expanded ii =
816 {ii with pinfo = ExpandedTok (get_opi ii.pinfo,no_virt_pos)}
817
818 let pos_of_info ii = get_info (function x -> x.Common.charpos) ii
819 let opos_of_info ii = get_orig_info (function x -> x.Common.charpos) ii
820 let line_of_info ii = get_orig_info (function x -> x.Common.line) ii
821 let col_of_info ii = get_orig_info (function x -> x.Common.column) ii
822 let file_of_info ii = get_orig_info (function x -> x.Common.file) ii
823 let mcode_of_info ii = fst (!(ii.cocci_tag))
824 let pinfo_of_info ii = ii.pinfo
825 let parse_info_of_info ii = get_pi ii.pinfo
826
827 let is_fake ii =
828 match ii.pinfo with
829 FakeTok (_,_) -> true
830 | _ -> false
831
832 let is_origintok ii =
833 match ii.pinfo with
834 | OriginTok pi -> true
835 | _ -> false
836
837 (* ------------------------------------------------------------------------- *)
838 type posrv = Real of Common.parse_info | Virt of virtual_position
839
840 let compare_pos ii1 ii2 =
841 let get_pos = function
842 OriginTok pi -> Real pi
843 | FakeTok (s,vpi) -> Virt vpi
844 | ExpandedTok (pi,vpi) -> Virt vpi
845 | AbstractLineTok pi -> Real pi in (* used for printing *)
846 let pos1 = get_pos (pinfo_of_info ii1) in
847 let pos2 = get_pos (pinfo_of_info ii2) in
848 match (pos1,pos2) with
849 (Real p1, Real p2) ->
850 compare p1.Common.charpos p2.Common.charpos
851 | (Virt (p1,_), Real p2) ->
852 if (compare p1.Common.charpos p2.Common.charpos) = (-1) then (-1) else 1
853 | (Real p1, Virt (p2,_)) ->
854 if (compare p1.Common.charpos p2.Common.charpos) = 1 then 1 else (-1)
855 | (Virt (p1,o1), Virt (p2,o2)) ->
856 let poi1 = p1.Common.charpos in
857 let poi2 = p2.Common.charpos in
858 match compare poi1 poi2 with
859 -1 -> -1
860 | 0 -> compare o1 o2
861 | x -> x
862
863 let equal_posl (l1,c1) (l2,c2) =
864 (l1 =|= l2) && (c1 =|= c2)
865
866 let info_to_fixpos ii =
867 match pinfo_of_info ii with
868 OriginTok pi -> Ast_cocci.Real pi.Common.charpos
869 | ExpandedTok (_,(pi,offset)) ->
870 Ast_cocci.Virt (pi.Common.charpos,offset)
871 | FakeTok (_,(pi,offset)) ->
872 Ast_cocci.Virt (pi.Common.charpos,offset)
873 | AbstractLineTok pi -> failwith "unexpected abstract"
874
875 (* cocci: *)
876 let is_test (e : expression) =
877 let (_,info) = unwrap e in
878 let (_,test) = !info in
879 test = Test
880
881 (*****************************************************************************)
882 (* Abstract line *)
883 (*****************************************************************************)
884
885 (* When we have extended the C Ast to add some info to the tokens,
886 * such as its line number in the file, we can not use anymore the
887 * ocaml '=' to compare Ast elements. To overcome this problem, to be
888 * able to use again '=', we just have to get rid of all those extra
889 * information, to "abstract those line" (al) information.
890 *
891 * Julia then modifies it a little to have a tokenindex, so the original
892 * true al_info is in fact real_al_info.
893 *)
894
895 let al_info tokenindex x =
896 { pinfo =
897 (AbstractLineTok
898 {charpos = tokenindex;
899 line = tokenindex;
900 column = tokenindex;
901 file = "";
902 str = str_of_info x});
903 cocci_tag = ref emptyAnnot;
904 comments_tag = ref emptyComments;
905 }
906
907 let semi_al_info x =
908 { x with
909 cocci_tag = ref emptyAnnot;
910 comments_tag = ref emptyComments;
911 }
912
913 let magic_real_number = -10
914
915 let real_al_info x =
916 { pinfo =
917 (AbstractLineTok
918 {charpos = magic_real_number;
919 line = magic_real_number;
920 column = magic_real_number;
921 file = "";
922 str = str_of_info x});
923 cocci_tag = ref emptyAnnot;
924 comments_tag = ref emptyComments;
925 }
926
927
928 (*****************************************************************************)
929 (* Views *)
930 (*****************************************************************************)
931
932 (* Transform a list of arguments (or parameters) where the commas are
933 * represented via the wrap2 and associated with an element, with
934 * a list where the comma are on their own. f(1,2,2) was
935 * [(1,[]); (2,[,]); (2,[,])] and become [1;',';2;',';2].
936 *
937 * Used in cocci_vs_c.ml, to have a more direct correspondance between
938 * the ast_cocci of julia and ast_c.
939 *)
940 let rec (split_comma: 'a wrap2 list -> ('a, il) either list) =
941 function
942 | [] -> []
943 | (e, ii)::xs ->
944 if null ii
945 then (Left e)::split_comma xs
946 else Right ii::Left e::split_comma xs
947
948 let rec (unsplit_comma: ('a, il) either list -> 'a wrap2 list) =
949 function
950 | [] -> []
951 | Right ii::Left e::xs ->
952 (e, ii)::unsplit_comma xs
953 | Left e::xs ->
954 let empty_ii = [] in
955 (e, empty_ii)::unsplit_comma xs
956 | Right ii::_ ->
957 raise Impossible
958
959
960
961
962 let split_register_param = fun (hasreg, idb, ii_b_s) ->
963 match hasreg, idb, ii_b_s with
964 | false, Some s, [i1] -> Left (s, [], i1)
965 | true, Some s, [i1;i2] -> Left (s, [i1], i2)
966 | _, None, ii -> Right ii
967 | _ -> raise Impossible
968
969
970
971 (*****************************************************************************)
972 (* Helpers, could also be put in lib_parsing_c.ml instead *)
973 (*****************************************************************************)
974
975 let rec stmt_elems_of_sequencable xs =
976 xs +> Common.map (fun x ->
977 match x with
978 | StmtElem e -> [e]
979 | CppDirectiveStmt _
980 | IfdefStmt _
981 ->
982 pr2_once ("stmt_elems_of_sequencable: filter a directive");
983 []
984 | IfdefStmt2 (_ifdef, xxs) ->
985 pr2 ("stmt_elems_of_sequencable: IfdefStm2 TODO?");
986 xxs +> List.map (fun xs ->
987 let xs' = stmt_elems_of_sequencable xs in
988 xs'
989 ) +> List.flatten
990 ) +> List.flatten
991
992
993
994
995 (* should maybe be in pretty_print_c ? *)
996
997 let s_of_inc_file inc_file =
998 match inc_file with
999 | Local xs -> xs +> Common.join "/"
1000 | NonLocal xs -> xs +> Common.join "/"
1001 | Wierd s -> s
1002
1003 let s_of_inc_file_bis inc_file =
1004 match inc_file with
1005 | Local xs -> "\"" ^ xs +> Common.join "/" ^ "\""
1006 | NonLocal xs -> "<" ^ xs +> Common.join "/" ^ ">"
1007 | Wierd s -> s
1008
1009 let fieldname_of_fieldkind fieldkind =
1010 match unwrap fieldkind with
1011 | Simple (sopt, ft) -> sopt
1012 | BitField (sopt, ft, expr) -> sopt
1013
1014
1015 let s_of_attr attr =
1016 attr
1017 +> List.map (fun (Attribute s, ii) -> s)
1018 +> Common.join ","
1019
1020
1021 let type_of_parameter param =
1022 let ((b, sopt, typ), ii) = param in
1023 typ