Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / control_flow_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14 open Common
15
16 open Ast_c
17
18 (*****************************************************************************)
19 (*
20 * There is more information in the CFG we build that in the CFG usually built
21 * in a compiler. This is because:
22 *
23 * - We need later to go back from flow to original ast, because we are
24 * doing a refactoring tool, so different context. So we have to add
25 * some nodes for '{' or '}' or goto that normally disapear in a CFG.
26 * We must keep those entities, in the same way that we must keep the parens
27 * (ParenExpr, ParenType) in the Ast_c during parsing.
28 *
29 * Moreover, the coccier can mention in his semantic patch those entities,
30 * so we must keep those entities in the CFG.
31 *
32 * We also have to add some extra nodes to make the process that goes from
33 * flow to ast deterministic with for instance the CaseNode, or easier
34 * with for instance the Fake node.
35 *
36 * - The coccinelle engine later transforms some nodes, and we need to rebuild
37 * the ast from a statement now defined and altered in different nodes.
38 * So we can't just put all the parsing info (Ast_c.il) in the top node of
39 * a statement. We have to split those Ast_c.il in different nodes, to
40 * later reconstruct a full Ast_c.il from different nodes. This is why
41 * we need the Else node, ...
42 *
43 * Note that at the same time, we also need to store the fullstatement
44 * in the top node, because the CTL engine need to get that information
45 * when dealing with MetaStatement (statement S; in a Semantic Patch).
46 *
47 *
48 * - The CTL engine needs more information than just the CFG, and we use
49 * tricks to encode those informations in the nodes:
50 *
51 * - We have some TrueNode, FalseNode to know in what branch we are.
52 * Normally we could achieve this by putting this information in the
53 * edges, but CTL engine know nothing about edges, it must do
54 * everything with only nodes information.
55 *
56 * - We need to mark each braces with an identifier so that the CTL
57 * can know if one specific '}' correspond to a specific '{'.
58 *
59 * - We add some labels to each node to handle the MetaRuleElem and
60 * MetaStatement. It allows to groups nodes that belong to the same
61 * statement. Normally CFG are there to abstract from this, but in
62 * Coccinelle we need sometimes the CFG view, and sometimes the Ast
63 * view and the labels allow that.
64 *
65 * - We even add nodes. We add '}', not only to be able to go back to AST
66 * but also because of the CTL engine. So one '}' may in fact be
67 * represented by multiple nodes, one in each CFG path.
68 *
69 * - need After,
70 * - need FallThrough.
71 * - Need know if ErrorExit,
72 *
73 * choice: Julia proposed that the flow is in fact just
74 * a view through the Ast, which means just Ocaml ref, so that when we
75 * modify some nodes, in fact it modifies the ast. But I prefer do it
76 * the functionnal way.
77 *
78 * The node2 type should be as close as possible to Ast_cocci.rule_elem to
79 * facilitate the job of cocci_vs_c.
80 *
81 *)
82
83 (*****************************************************************************)
84
85 type fullstatement = statement
86
87 (* ---------------------------------------------------------------------- *)
88 (* The string is for debugging. Used by Ograph_extended.print_graph.
89 * The int list are Labels. Trick used for CTL engine. Must not
90 * transform that in a triple or record because print_graph would
91 * not work.
92 *)
93 type node = node1 * string
94 and node1 = node2 * nodeinfo
95 and nodeinfo = {
96 labels: int list;
97 bclabels: int list; (* parent of a break or continue node *)
98 is_loop: bool;
99 is_fake: bool;
100 }
101 and node2 =
102
103 (* ------------------------ *)
104 (* For CTL to work, we need that some nodes loop over itself. We
105 * need that every nodes have a successor. Julia also want to go back
106 * indefinitely. So must tag some nodes as the beginning and end of
107 * the graph so that some fix_ctl function can easily find those
108 * nodes.
109 *
110 * If have a function, then no need for EndNode; Exit and ErrorExit
111 * will play that role.
112 *
113 * When everything we analyze was a function there was no pb. We used
114 * FunHeader as a Topnode and Exit for EndNode but now that we also
115 * analyse #define body, so we need those nodes.
116 *)
117 | TopNode
118 | EndNode
119
120 (* ------------------------ *)
121 | FunHeader of definition (* but empty body *)
122
123 | Decl of declaration
124
125 (* ------------------------ *)
126 (* flow_to_ast: cocci: Need the { and } in the control flow graph also
127 * because the coccier can express patterns containing such { }.
128 *
129 * ctl: to make possible the forall (AX, A[...]), have to add more than
130 * one node sometimes for the same '}' (one in each CFG path) in the graph.
131 *
132 * ctl: Morover, the int in the type is here to indicate to what { }
133 * they correspond. Two pairwise { } share the same number. kind of
134 * "brace_identifier". Used for debugging or for checks and more importantly,
135 * needed by CTL engine.
136 *
137 * Because of those nodes, there is no equivalent for Compound.
138 *
139 * There was a problem with SeqEnd. Some info can be tagged on it
140 * but there is multiple SeqEnd that correspond to the same '}' even
141 * if they are in different nodes. Solved by using shared ref
142 * and allow the "already-tagged" token.
143 *)
144 | SeqStart of fullstatement * int * info
145 | SeqEnd of int * info
146
147
148 | ExprStatement of fullstatement * (expression option) wrap
149
150
151 | IfHeader of fullstatement * expression wrap
152 | Else of info
153 | WhileHeader of fullstatement * expression wrap
154 | DoHeader of fullstatement * info
155 | DoWhileTail of expression wrap
156 | ForHeader of fullstatement *
157 (exprStatement wrap * exprStatement wrap * exprStatement wrap)
158 wrap
159 | SwitchHeader of fullstatement * expression wrap
160 | MacroIterHeader of fullstatement * (string * argument wrap2 list) wrap
161
162 (* Used to mark the end of if, while, dowhile, for, switch. Later we
163 * will be able to "tag" some cocci code on this node.
164 *
165 * This is because in
166 *
167 * - S + foo();
168 *
169 * the S can be anything, including an if, and this is internally
170 * translated in a series of MetaRuleElem, and the last element is a
171 * EndStatement, and we must tag foo() to this EndStatement.
172 * Otherwise, without this last common node, we would tag foo() to 2
173 * nodes :( So having a unique node makes it correct, and in
174 * flow_to_ast we must propagate back this + foo() to the last token
175 * of an if (maybe a '}', maybe a ';')
176 *
177 * The problem is that this stuff should be in transformation.ml,
178 * but need information available in flow_to_ast, but we dont want
179 * to polluate both files.
180 *
181 * So the choices are
182 *
183 * - soluce julia1, extend Ast_c by adding a fake token to the if
184 *
185 * - extend Ast with a Skip, and add this next to EndStatement node,
186 * and do special case in flow_to_ast to start from this node
187 * (not to get_next EndStatement, but from EndStatement directly)
188 * and so add a case when have directly a EndStatement node an extract
189 * the statement from it.
190 *
191 * - remonter dans le graphe pour accrocher le foo() non plus au
192 * EndStatement (qui n'a pas d'equivalent niveau token dans l'ast_c),
193 * mais au dernier token de la branche Else (ou Then si y'a pas de else).
194 *
195 * I first did solution 2 and then when we decided to use ref,
196 * I use julia'as solution. Have virtual-placeholders, the fakeInfo
197 * for the if, while, and put this shared ref in the EndStatement.
198 *)
199 | EndStatement of info option (* fake_info *)
200
201 | Return of fullstatement * unit wrap
202 | ReturnExpr of fullstatement * expression wrap
203
204 (* ------------------------ *)
205 | IfdefHeader of ifdef_directive
206 | IfdefElse of ifdef_directive
207 | IfdefEndif of ifdef_directive
208
209
210 (* ------------------------ *)
211 | DefineHeader of string wrap * define_kind
212
213 | DefineExpr of expression
214 | DefineType of fullType
215 | DefineDoWhileZeroHeader of unit wrap
216 | DefineTodo
217
218 | Include of includ
219
220 (* obsolete? *)
221 | MacroTop of string * argument wrap2 list * il
222
223 (* ------------------------ *)
224 | Case of fullstatement * expression wrap
225 | Default of fullstatement * unit wrap
226
227 | Continue of fullstatement * unit wrap
228 | Break of fullstatement * unit wrap
229
230 (* no counter part in cocci *)
231 | CaseRange of fullstatement * (expression * expression) wrap
232 | Label of fullstatement * name * unit wrap (* : *)
233 | Goto of fullstatement * name * unit wrap (* goto *)
234
235
236 | Asm of fullstatement * asmbody wrap
237 | MacroStmt of fullstatement * unit wrap
238
239 (* ------------------------ *)
240 (* some control nodes *)
241 | Enter
242 | Exit
243
244
245 (* Redundant nodes, often to mark the end of an if/switch.
246 * That makes it easier to do later the flow_to_ast.
247 * update: no more used for the end. see Endstatement. Just used
248 * to mark the start of the function, as required by julia.
249 * Maybe would be better to use instead a Enter2.
250 *)
251 | Fake
252
253 (* flow_to_ast: In this case, I need to know the order between the children
254 * of the switch in the graph.
255 *)
256 | CaseNode of int
257
258 (* ------------------------ *)
259 (* for ctl: *)
260 | TrueNode
261 | FalseNode
262 | InLoopNode (* almost equivalent to TrueNode but just for loops *)
263
264 | AfterNode
265 | FallThroughNode
266
267 | ErrorExit
268
269 type edge = Direct (* Normal | Shadow *)
270
271 type cflow = (node, edge) Ograph_extended.ograph_mutable
272
273
274 (* ------------------------------------------------------------------------ *)
275 let unwrap ((node, info), nodestr) = node
276 let rewrap ((_node, info), nodestr) node = (node, info), nodestr
277 let extract_labels ((node, info), nodestr) = info.labels
278 let extract_bclabels ((node, info), nodestr) = info.bclabels
279 let extract_is_loop ((node, info), nodestr) = info.is_loop
280 let extract_is_fake ((node, info), nodestr) = info.is_fake
281
282 let mk_any_node is_fake node labels bclabels nodestr =
283 let nodestr =
284 if !Flag_parsing_c.show_flow_labels
285 then nodestr ^ ("[" ^ (labels +> List.map i_to_s +> join ",") ^ "]")
286 else nodestr
287 in
288 ((node, {labels = labels;is_loop=false;bclabels=bclabels;is_fake=is_fake}),
289 nodestr)
290
291 let mk_node = mk_any_node false
292 let mk_fake_node = mk_any_node true (* for duplicated braces *)
293
294 (* ------------------------------------------------------------------------ *)
295 let first_node g =
296 g#nodes#tolist +> List.find (fun (i, node) ->
297 match unwrap node with TopNode -> true | _ -> false
298 ) +> fst
299
300 let find_node f g =
301 g#nodes#tolist +> List.find (fun (nodei, node) ->
302 f (unwrap node))
303 +> fst
304
305
306 (* remove an intermediate node and redirect the connexion *)
307 let remove_one_node nodei g =
308 let preds = (g#predecessors nodei)#tolist in
309 let succs = (g#successors nodei)#tolist in
310 assert (not (null preds));
311
312 preds +> List.iter (fun (predi, Direct) ->
313 g#del_arc ((predi, nodei), Direct);
314 );
315 succs +> List.iter (fun (succi, Direct) ->
316 g#del_arc ((nodei, succi), Direct);
317 );
318
319 g#del_node nodei;
320
321 (* connect in-nodes to out-nodes *)
322 preds +> List.iter (fun (pred, Direct) ->
323 succs +> List.iter (fun (succ, Direct) ->
324 g#add_arc ((pred, succ), Direct);
325 );
326 )
327
328
329
330 (* ------------------------------------------------------------------------ *)
331
332 let extract_fullstatement node =
333 match unwrap node with
334 | Decl decl ->
335 (* new policy. no more considered as a statement *)
336 (* old: Some (Ast_c.Decl decl, []) *)
337 None
338 | MacroStmt (st, _) -> Some st
339 | MacroIterHeader (st, _) -> Some st
340
341 | Include _
342 | DefineHeader _ | DefineType _ | DefineExpr _ | DefineDoWhileZeroHeader _
343 | DefineTodo
344 | MacroTop _
345 -> None
346
347 | IfdefHeader _ | IfdefElse _ | IfdefEndif _
348 -> None
349
350 | SeqStart (st,_,_)
351 | ExprStatement (st, _)
352 | IfHeader (st, _)
353 | WhileHeader (st, _)
354 | DoHeader (st, _)
355 | ForHeader (st, _)
356 | SwitchHeader (st, _)
357 | Return (st, _)
358 | ReturnExpr (st, _)
359 (* no counter part in cocci *)
360 | Label (st, _, _)
361 | Case (st,_)
362 | CaseRange (st, _)
363 | Default (st, _)
364 | Goto (st, _, _)
365 | Continue (st, _)
366 | Break (st, _)
367 | Asm (st,_)
368 -> Some st
369
370 | TopNode|EndNode
371 | FunHeader _
372 | SeqEnd _
373 | Else _
374 | EndStatement _
375 | DoWhileTail _
376 | Enter
377 | Exit
378 | Fake
379 | CaseNode _
380 | TrueNode
381 | FalseNode
382 | InLoopNode
383 | AfterNode
384 | FallThroughNode
385 | ErrorExit
386 -> None