permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / control_flow_c.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913
C
4 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
0708f913
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
34e49164
C
15open Common
16
17open Ast_c
18
19(*****************************************************************************)
ae4735db 20(*
34e49164
C
21 * There is more information in the CFG we build that in the CFG usually built
22 * in a compiler. This is because:
23 *
ae4735db 24 * - We need later to go back from flow to original ast, because we are
34e49164
C
25 * doing a refactoring tool, so different context. So we have to add
26 * some nodes for '{' or '}' or goto that normally disapear in a CFG.
27 * We must keep those entities, in the same way that we must keep the parens
28 * (ParenExpr, ParenType) in the Ast_c during parsing.
29 *
30 * Moreover, the coccier can mention in his semantic patch those entities,
31 * so we must keep those entities in the CFG.
ae4735db
C
32 *
33 * We also have to add some extra nodes to make the process that goes from
34 * flow to ast deterministic with for instance the CaseNode, or easier
34e49164
C
35 * with for instance the Fake node.
36 *
37 * - The coccinelle engine later transforms some nodes, and we need to rebuild
ae4735db 38 * the ast from a statement now defined and altered in different nodes.
34e49164 39 * So we can't just put all the parsing info (Ast_c.il) in the top node of
ae4735db 40 * a statement. We have to split those Ast_c.il in different nodes, to
34e49164
C
41 * later reconstruct a full Ast_c.il from different nodes. This is why
42 * we need the Else node, ...
ae4735db
C
43 *
44 * Note that at the same time, we also need to store the fullstatement
34e49164
C
45 * in the top node, because the CTL engine need to get that information
46 * when dealing with MetaStatement (statement S; in a Semantic Patch).
ae4735db
C
47 *
48 *
34e49164
C
49 * - The CTL engine needs more information than just the CFG, and we use
50 * tricks to encode those informations in the nodes:
51 *
52 * - We have some TrueNode, FalseNode to know in what branch we are.
53 * Normally we could achieve this by putting this information in the
54 * edges, but CTL engine know nothing about edges, it must do
55 * everything with only nodes information.
56 *
57 * - We need to mark each braces with an identifier so that the CTL
58 * can know if one specific '}' correspond to a specific '{'.
59 *
ae4735db 60 * - We add some labels to each node to handle the MetaRuleElem and
34e49164
C
61 * MetaStatement. It allows to groups nodes that belong to the same
62 * statement. Normally CFG are there to abstract from this, but in
63 * Coccinelle we need sometimes the CFG view, and sometimes the Ast
64 * view and the labels allow that.
65 *
66 * - We even add nodes. We add '}', not only to be able to go back to AST
ae4735db 67 * but also because of the CTL engine. So one '}' may in fact be
34e49164 68 * represented by multiple nodes, one in each CFG path.
ae4735db
C
69 *
70 * - need After,
34e49164 71 * - need FallThrough.
ae4735db 72 * - Need know if ErrorExit,
34e49164
C
73 *
74 * choice: Julia proposed that the flow is in fact just
75 * a view through the Ast, which means just Ocaml ref, so that when we
ae4735db 76 * modify some nodes, in fact it modifies the ast. But I prefer do it
34e49164 77 * the functionnal way.
ae4735db 78 *
34e49164
C
79 * The node2 type should be as close as possible to Ast_cocci.rule_elem to
80 * facilitate the job of cocci_vs_c.
ae4735db 81 *
34e49164
C
82 *)
83
84(*****************************************************************************)
85
ae4735db 86type fullstatement = statement
34e49164 87
485bce71 88(* ---------------------------------------------------------------------- *)
ae4735db
C
89(* The string is for debugging. Used by Ograph_extended.print_graph.
90 * The int list are Labels. Trick used for CTL engine. Must not
34e49164
C
91 * transform that in a triple or record because print_graph would
92 * not work.
93 *)
ae4735db
C
94type node = node1 * string
95 and node1 = node2 * nodeinfo
34e49164
C
96 and nodeinfo = {
97 labels: int list;
98 bclabels: int list; (* parent of a break or continue node *)
99 is_loop: bool;
100 is_fake: bool;
101 }
102 and node2 =
103
485bce71 104 (* ------------------------ *)
34e49164
C
105 (* For CTL to work, we need that some nodes loop over itself. We
106 * need that every nodes have a successor. Julia also want to go back
107 * indefinitely. So must tag some nodes as the beginning and end of
108 * the graph so that some fix_ctl function can easily find those
109 * nodes.
ae4735db 110 *
34e49164
C
111 * If have a function, then no need for EndNode; Exit and ErrorExit
112 * will play that role.
ae4735db 113 *
34e49164
C
114 * When everything we analyze was a function there was no pb. We used
115 * FunHeader as a Topnode and Exit for EndNode but now that we also
116 * analyse #define body, so we need those nodes.
117 *)
ae4735db
C
118 | TopNode
119 | EndNode
34e49164 120
485bce71
C
121 (* ------------------------ *)
122 | FunHeader of definition (* but empty body *)
34e49164
C
123
124 | Decl of declaration
125
485bce71 126 (* ------------------------ *)
34e49164
C
127 (* flow_to_ast: cocci: Need the { and } in the control flow graph also
128 * because the coccier can express patterns containing such { }.
129 *
130 * ctl: to make possible the forall (AX, A[...]), have to add more than
131 * one node sometimes for the same '}' (one in each CFG path) in the graph.
132 *
ae4735db
C
133 * ctl: Morover, the int in the type is here to indicate to what { }
134 * they correspond. Two pairwise { } share the same number. kind of
34e49164
C
135 * "brace_identifier". Used for debugging or for checks and more importantly,
136 * needed by CTL engine.
137 *
138 * Because of those nodes, there is no equivalent for Compound.
ae4735db
C
139 *
140 * There was a problem with SeqEnd. Some info can be tagged on it
34e49164
C
141 * but there is multiple SeqEnd that correspond to the same '}' even
142 * if they are in different nodes. Solved by using shared ref
143 * and allow the "already-tagged" token.
144 *)
708f4980 145 | SeqStart of fullstatement * int * info
34e49164
C
146 | SeqEnd of int * info
147
148
708f4980 149 | ExprStatement of fullstatement * (expression option) wrap
34e49164
C
150
151
708f4980 152 | IfHeader of fullstatement * expression wrap
34e49164 153 | Else of info
708f4980
C
154 | WhileHeader of fullstatement * expression wrap
155 | DoHeader of fullstatement * info
34e49164 156 | DoWhileTail of expression wrap
ae4735db 157 | ForHeader of fullstatement *
755320b0 158 (declOrExpr * exprStatement wrap * exprStatement wrap)
34e49164 159 wrap
708f4980
C
160 | SwitchHeader of fullstatement * expression wrap
161 | MacroIterHeader of fullstatement * (string * argument wrap2 list) wrap
34e49164
C
162
163 (* Used to mark the end of if, while, dowhile, for, switch. Later we
164 * will be able to "tag" some cocci code on this node.
ae4735db 165 *
34e49164 166 * This is because in
ae4735db 167 *
34e49164 168 * - S + foo();
ae4735db 169 *
34e49164
C
170 * the S can be anything, including an if, and this is internally
171 * translated in a series of MetaRuleElem, and the last element is a
172 * EndStatement, and we must tag foo() to this EndStatement.
173 * Otherwise, without this last common node, we would tag foo() to 2
174 * nodes :( So having a unique node makes it correct, and in
175 * flow_to_ast we must propagate back this + foo() to the last token
ae4735db
C
176 * of an if (maybe a '}', maybe a ';')
177 *
178 * The problem is that this stuff should be in transformation.ml,
34e49164
C
179 * but need information available in flow_to_ast, but we dont want
180 * to polluate both files.
ae4735db
C
181 *
182 * So the choices are
183 *
34e49164 184 * - soluce julia1, extend Ast_c by adding a fake token to the if
ae4735db 185 *
34e49164
C
186 * - extend Ast with a Skip, and add this next to EndStatement node,
187 * and do special case in flow_to_ast to start from this node
188 * (not to get_next EndStatement, but from EndStatement directly)
189 * and so add a case when have directly a EndStatement node an extract
190 * the statement from it.
ae4735db
C
191 *
192 * - remonter dans le graphe pour accrocher le foo() non plus au
193 * EndStatement (qui n'a pas d'equivalent niveau token dans l'ast_c),
34e49164 194 * mais au dernier token de la branche Else (ou Then si y'a pas de else).
ae4735db 195 *
34e49164
C
196 * I first did solution 2 and then when we decided to use ref,
197 * I use julia'as solution. Have virtual-placeholders, the fakeInfo
198 * for the if, while, and put this shared ref in the EndStatement.
199 *)
200 | EndStatement of info option (* fake_info *)
201
708f4980
C
202 | Return of fullstatement * unit wrap
203 | ReturnExpr of fullstatement * expression wrap
34e49164 204
485bce71
C
205 (* ------------------------ *)
206 | IfdefHeader of ifdef_directive
207 | IfdefElse of ifdef_directive
208 | IfdefEndif of ifdef_directive
209
34e49164
C
210
211 (* ------------------------ *)
212 | DefineHeader of string wrap * define_kind
213
ae4735db 214 | DefineExpr of expression
34e49164
C
215 | DefineType of fullType
216 | DefineDoWhileZeroHeader of unit wrap
485bce71 217 | DefineTodo
34e49164 218
485bce71 219 | Include of includ
34e49164
C
220
221 (* obsolete? *)
ae4735db 222 | MacroTop of string * argument wrap2 list * il
34e49164
C
223
224 (* ------------------------ *)
708f4980
C
225 | Case of fullstatement * expression wrap
226 | Default of fullstatement * unit wrap
34e49164 227
708f4980
C
228 | Continue of fullstatement * unit wrap
229 | Break of fullstatement * unit wrap
34e49164
C
230
231 (* no counter part in cocci *)
b1b2de81
C
232 | CaseRange of fullstatement * (expression * expression) wrap
233 | Label of fullstatement * name * unit wrap (* : *)
234 | Goto of fullstatement * name * unit wrap (* goto *)
34e49164
C
235
236
708f4980
C
237 | Asm of fullstatement * asmbody wrap
238 | MacroStmt of fullstatement * unit wrap
34e49164 239
34e49164
C
240 (* ------------------------ *)
241 (* some control nodes *)
ae4735db 242 | Enter
34e49164
C
243 | Exit
244
245
246 (* Redundant nodes, often to mark the end of an if/switch.
ae4735db 247 * That makes it easier to do later the flow_to_ast.
34e49164
C
248 * update: no more used for the end. see Endstatement. Just used
249 * to mark the start of the function, as required by julia.
250 * Maybe would be better to use instead a Enter2.
251 *)
ae4735db 252 | Fake
34e49164
C
253
254 (* flow_to_ast: In this case, I need to know the order between the children
ae4735db 255 * of the switch in the graph.
34e49164 256 *)
ae4735db 257 | CaseNode of int
34e49164
C
258
259 (* ------------------------ *)
260 (* for ctl: *)
261 | TrueNode
262 | FalseNode
263 | InLoopNode (* almost equivalent to TrueNode but just for loops *)
264
265 | AfterNode
266 | FallThroughNode
951c7801 267 | LoopFallThroughNode
34e49164
C
268
269 | ErrorExit
270
271type edge = Direct (* Normal | Shadow *)
272
273type cflow = (node, edge) Ograph_extended.ograph_mutable
274
275
276(* ------------------------------------------------------------------------ *)
277let unwrap ((node, info), nodestr) = node
278let rewrap ((_node, info), nodestr) node = (node, info), nodestr
279let extract_labels ((node, info), nodestr) = info.labels
280let extract_bclabels ((node, info), nodestr) = info.bclabels
ae4735db 281let extract_is_loop ((node, info), nodestr) = info.is_loop
34e49164
C
282let extract_is_fake ((node, info), nodestr) = info.is_fake
283
284let mk_any_node is_fake node labels bclabels nodestr =
ae4735db 285 let nodestr =
34e49164
C
286 if !Flag_parsing_c.show_flow_labels
287 then nodestr ^ ("[" ^ (labels +> List.map i_to_s +> join ",") ^ "]")
288 else nodestr
289 in
290 ((node, {labels = labels;is_loop=false;bclabels=bclabels;is_fake=is_fake}),
291 nodestr)
292
293let mk_node = mk_any_node false
294let mk_fake_node = mk_any_node true (* for duplicated braces *)
295
296(* ------------------------------------------------------------------------ *)
ae4735db
C
297let first_node g =
298 g#nodes#tolist +> List.find (fun (i, node) ->
34e49164
C
299 match unwrap node with TopNode -> true | _ -> false
300 ) +> fst
301
ae4735db
C
302let find_node f g =
303 g#nodes#tolist +> List.find (fun (nodei, node) ->
304 f (unwrap node))
34e49164
C
305 +> fst
306
307
308(* remove an intermediate node and redirect the connexion *)
ae4735db 309let remove_one_node nodei g =
34e49164
C
310 let preds = (g#predecessors nodei)#tolist in
311 let succs = (g#successors nodei)#tolist in
312 assert (not (null preds));
313
ae4735db 314 preds +> List.iter (fun (predi, Direct) ->
34e49164
C
315 g#del_arc ((predi, nodei), Direct);
316 );
ae4735db 317 succs +> List.iter (fun (succi, Direct) ->
34e49164
C
318 g#del_arc ((nodei, succi), Direct);
319 );
ae4735db 320
34e49164 321 g#del_node nodei;
ae4735db 322
34e49164 323 (* connect in-nodes to out-nodes *)
ae4735db
C
324 preds +> List.iter (fun (pred, Direct) ->
325 succs +> List.iter (fun (succ, Direct) ->
34e49164
C
326 g#add_arc ((pred, succ), Direct);
327 );
328 )
329
330
331
332(* ------------------------------------------------------------------------ *)
333
ae4735db 334let extract_fullstatement node =
34e49164 335 match unwrap node with
ae4735db 336 | Decl decl ->
34e49164
C
337 (* new policy. no more considered as a statement *)
338 (* old: Some (Ast_c.Decl decl, []) *)
ae4735db 339 None
34e49164
C
340 | MacroStmt (st, _) -> Some st
341 | MacroIterHeader (st, _) -> Some st
342
ae4735db 343 | Include _
34e49164 344 | DefineHeader _ | DefineType _ | DefineExpr _ | DefineDoWhileZeroHeader _
485bce71 345 | DefineTodo
34e49164
C
346 | MacroTop _
347 -> None
348
ae4735db 349 | IfdefHeader _ | IfdefElse _ | IfdefEndif _
485bce71
C
350 -> None
351
ae4735db 352 | SeqStart (st,_,_)
34e49164 353 | ExprStatement (st, _)
ae4735db 354 | IfHeader (st, _)
34e49164
C
355 | WhileHeader (st, _)
356 | DoHeader (st, _)
357 | ForHeader (st, _)
358 | SwitchHeader (st, _)
359 | Return (st, _)
360 | ReturnExpr (st, _)
361 (* no counter part in cocci *)
b1b2de81 362 | Label (st, _, _)
34e49164
C
363 | Case (st,_)
364 | CaseRange (st, _)
365 | Default (st, _)
b1b2de81 366 | Goto (st, _, _)
34e49164
C
367 | Continue (st, _)
368 | Break (st, _)
369 | Asm (st,_)
370 -> Some st
371
372 | TopNode|EndNode
373 | FunHeader _
ae4735db
C
374 | SeqEnd _
375 | Else _
34e49164
C
376 | EndStatement _
377 | DoWhileTail _
ae4735db 378 | Enter
34e49164
C
379 | Exit
380 | Fake
381 | CaseNode _
382 | TrueNode
383 | FalseNode
384 | InLoopNode
385 | AfterNode
386 | FallThroughNode
951c7801 387 | LoopFallThroughNode
34e49164
C
388 | ErrorExit
389 -> None