Release coccinelle-0.1.2
[bpt/coccinelle.git] / parsing_c / control_flow_c.ml
CommitLineData
34e49164
C
1open Common
2
3open Ast_c
4
5(*****************************************************************************)
6(*
7 * There is more information in the CFG we build that in the CFG usually built
8 * in a compiler. This is because:
9 *
10 * - We need later to go back from flow to original ast, because we are
11 * doing a refactoring tool, so different context. So we have to add
12 * some nodes for '{' or '}' or goto that normally disapear in a CFG.
13 * We must keep those entities, in the same way that we must keep the parens
14 * (ParenExpr, ParenType) in the Ast_c during parsing.
15 *
16 * Moreover, the coccier can mention in his semantic patch those entities,
17 * so we must keep those entities in the CFG.
18 *
19 * We also have to add some extra nodes to make the process that goes from
20 * flow to ast deterministic with for instance the CaseNode, or easier
21 * with for instance the Fake node.
22 *
23 * - The coccinelle engine later transforms some nodes, and we need to rebuild
24 * the ast from a statement now defined and altered in different nodes.
25 * So we can't just put all the parsing info (Ast_c.il) in the top node of
26 * a statement. We have to split those Ast_c.il in different nodes, to
27 * later reconstruct a full Ast_c.il from different nodes. This is why
28 * we need the Else node, ...
29 *
30 * Note that at the same time, we also need to store the fullstatement
31 * in the top node, because the CTL engine need to get that information
32 * when dealing with MetaStatement (statement S; in a Semantic Patch).
33 *
34 *
35 * - The CTL engine needs more information than just the CFG, and we use
36 * tricks to encode those informations in the nodes:
37 *
38 * - We have some TrueNode, FalseNode to know in what branch we are.
39 * Normally we could achieve this by putting this information in the
40 * edges, but CTL engine know nothing about edges, it must do
41 * everything with only nodes information.
42 *
43 * - We need to mark each braces with an identifier so that the CTL
44 * can know if one specific '}' correspond to a specific '{'.
45 *
46 * - We add some labels to each node to handle the MetaRuleElem and
47 * MetaStatement. It allows to groups nodes that belong to the same
48 * statement. Normally CFG are there to abstract from this, but in
49 * Coccinelle we need sometimes the CFG view, and sometimes the Ast
50 * view and the labels allow that.
51 *
52 * - We even add nodes. We add '}', not only to be able to go back to AST
53 * but also because of the CTL engine. So one '}' may in fact be
54 * represented by multiple nodes, one in each CFG path.
55 *
56 * - need After,
57 * - need FallThrough.
58 * - Need know if ErrorExit,
59 *
60 * choice: Julia proposed that the flow is in fact just
61 * a view through the Ast, which means just Ocaml ref, so that when we
62 * modify some nodes, in fact it modifies the ast. But I prefer do it
63 * the functionnal way.
64 *
65 * The node2 type should be as close as possible to Ast_cocci.rule_elem to
66 * facilitate the job of cocci_vs_c.
67 *
68 *)
69
70(*****************************************************************************)
71
72
485bce71 73(* ---------------------------------------------------------------------- *)
34e49164
C
74(* The string is for debugging. Used by Ograph_extended.print_graph.
75 * The int list are Labels. Trick used for CTL engine. Must not
76 * transform that in a triple or record because print_graph would
77 * not work.
78 *)
79type node = node1 * string
80 and node1 = node2 * nodeinfo
81 and nodeinfo = {
82 labels: int list;
83 bclabels: int list; (* parent of a break or continue node *)
84 is_loop: bool;
85 is_fake: bool;
86 }
87 and node2 =
88
485bce71 89 (* ------------------------ *)
34e49164
C
90 (* For CTL to work, we need that some nodes loop over itself. We
91 * need that every nodes have a successor. Julia also want to go back
92 * indefinitely. So must tag some nodes as the beginning and end of
93 * the graph so that some fix_ctl function can easily find those
94 * nodes.
95 *
96 * If have a function, then no need for EndNode; Exit and ErrorExit
97 * will play that role.
98 *
99 * When everything we analyze was a function there was no pb. We used
100 * FunHeader as a Topnode and Exit for EndNode but now that we also
101 * analyse #define body, so we need those nodes.
102 *)
103 | TopNode
104 | EndNode
105
485bce71
C
106 (* ------------------------ *)
107 | FunHeader of definition (* but empty body *)
34e49164
C
108
109 | Decl of declaration
110
485bce71 111 (* ------------------------ *)
34e49164
C
112 (* flow_to_ast: cocci: Need the { and } in the control flow graph also
113 * because the coccier can express patterns containing such { }.
114 *
115 * ctl: to make possible the forall (AX, A[...]), have to add more than
116 * one node sometimes for the same '}' (one in each CFG path) in the graph.
117 *
118 * ctl: Morover, the int in the type is here to indicate to what { }
119 * they correspond. Two pairwise { } share the same number. kind of
120 * "brace_identifier". Used for debugging or for checks and more importantly,
121 * needed by CTL engine.
122 *
123 * Because of those nodes, there is no equivalent for Compound.
124 *
125 * There was a problem with SeqEnd. Some info can be tagged on it
126 * but there is multiple SeqEnd that correspond to the same '}' even
127 * if they are in different nodes. Solved by using shared ref
128 * and allow the "already-tagged" token.
129 *)
130 | SeqStart of statement * int * info
131 | SeqEnd of int * info
132
133
134 | ExprStatement of statement * (expression option) wrap
135
136
137 | IfHeader of statement * expression wrap
138 | Else of info
139 | WhileHeader of statement * expression wrap
140 | DoHeader of statement * info
141 | DoWhileTail of expression wrap
142 | ForHeader of statement *
143 (exprStatement wrap * exprStatement wrap * exprStatement wrap)
144 wrap
145 | SwitchHeader of statement * expression wrap
146 | MacroIterHeader of statement * (string * argument wrap2 list) wrap
147
148 (* Used to mark the end of if, while, dowhile, for, switch. Later we
149 * will be able to "tag" some cocci code on this node.
150 *
151 * This is because in
152 *
153 * - S + foo();
154 *
155 * the S can be anything, including an if, and this is internally
156 * translated in a series of MetaRuleElem, and the last element is a
157 * EndStatement, and we must tag foo() to this EndStatement.
158 * Otherwise, without this last common node, we would tag foo() to 2
159 * nodes :( So having a unique node makes it correct, and in
160 * flow_to_ast we must propagate back this + foo() to the last token
161 * of an if (maybe a '}', maybe a ';')
162 *
163 * The problem is that this stuff should be in transformation.ml,
164 * but need information available in flow_to_ast, but we dont want
165 * to polluate both files.
166 *
167 * So the choices are
168 *
169 * - soluce julia1, extend Ast_c by adding a fake token to the if
170 *
171 * - extend Ast with a Skip, and add this next to EndStatement node,
172 * and do special case in flow_to_ast to start from this node
173 * (not to get_next EndStatement, but from EndStatement directly)
174 * and so add a case when have directly a EndStatement node an extract
175 * the statement from it.
176 *
177 * - remonter dans le graphe pour accrocher le foo() non plus au
178 * EndStatement (qui n'a pas d'equivalent niveau token dans l'ast_c),
179 * mais au dernier token de la branche Else (ou Then si y'a pas de else).
180 *
181 * I first did solution 2 and then when we decided to use ref,
182 * I use julia'as solution. Have virtual-placeholders, the fakeInfo
183 * for the if, while, and put this shared ref in the EndStatement.
184 *)
185 | EndStatement of info option (* fake_info *)
186
187 | Return of statement * unit wrap
188 | ReturnExpr of statement * expression wrap
189
485bce71
C
190 (* ------------------------ *)
191 | IfdefHeader of ifdef_directive
192 | IfdefElse of ifdef_directive
193 | IfdefEndif of ifdef_directive
194
34e49164
C
195
196 (* ------------------------ *)
197 | DefineHeader of string wrap * define_kind
198
199 | DefineExpr of expression
200 | DefineType of fullType
201 | DefineDoWhileZeroHeader of unit wrap
485bce71 202 | DefineTodo
34e49164 203
485bce71 204 | Include of includ
34e49164
C
205
206 (* obsolete? *)
207 | MacroTop of string * argument wrap2 list * il
208
209 (* ------------------------ *)
210 | Case of statement * expression wrap
211 | Default of statement * unit wrap
212
213 | Continue of statement * unit wrap
214 | Break of statement * unit wrap
215
216 (* no counter part in cocci *)
217 | CaseRange of statement * (expression * expression) wrap
218 | Label of statement * string wrap
219 | Goto of statement * string wrap
220
221
222 | Asm of statement * asmbody wrap
223 | MacroStmt of statement * unit wrap
224
34e49164
C
225 (* ------------------------ *)
226 (* some control nodes *)
227 | Enter
228 | Exit
229
230
231 (* Redundant nodes, often to mark the end of an if/switch.
232 * That makes it easier to do later the flow_to_ast.
233 * update: no more used for the end. see Endstatement. Just used
234 * to mark the start of the function, as required by julia.
235 * Maybe would be better to use instead a Enter2.
236 *)
237 | Fake
238
239 (* flow_to_ast: In this case, I need to know the order between the children
240 * of the switch in the graph.
241 *)
242 | CaseNode of int
243
244 (* ------------------------ *)
245 (* for ctl: *)
246 | TrueNode
247 | FalseNode
248 | InLoopNode (* almost equivalent to TrueNode but just for loops *)
249
250 | AfterNode
251 | FallThroughNode
252
253 | ErrorExit
254
255type edge = Direct (* Normal | Shadow *)
256
257type cflow = (node, edge) Ograph_extended.ograph_mutable
258
259
260(* ------------------------------------------------------------------------ *)
261let unwrap ((node, info), nodestr) = node
262let rewrap ((_node, info), nodestr) node = (node, info), nodestr
263let extract_labels ((node, info), nodestr) = info.labels
264let extract_bclabels ((node, info), nodestr) = info.bclabels
265let extract_is_loop ((node, info), nodestr) = info.is_loop
266let extract_is_fake ((node, info), nodestr) = info.is_fake
267
268let mk_any_node is_fake node labels bclabels nodestr =
269 let nodestr =
270 if !Flag_parsing_c.show_flow_labels
271 then nodestr ^ ("[" ^ (labels +> List.map i_to_s +> join ",") ^ "]")
272 else nodestr
273 in
274 ((node, {labels = labels;is_loop=false;bclabels=bclabels;is_fake=is_fake}),
275 nodestr)
276
277let mk_node = mk_any_node false
278let mk_fake_node = mk_any_node true (* for duplicated braces *)
279
280(* ------------------------------------------------------------------------ *)
281let first_node g =
282 g#nodes#tolist +> List.find (fun (i, node) ->
283 match unwrap node with TopNode -> true | _ -> false
284 ) +> fst
285
286let find_node f g =
287 g#nodes#tolist +> List.find (fun (nodei, node) ->
288 f (unwrap node))
289 +> fst
290
291
292(* remove an intermediate node and redirect the connexion *)
293let remove_one_node nodei g =
294 let preds = (g#predecessors nodei)#tolist in
295 let succs = (g#successors nodei)#tolist in
296 assert (not (null preds));
297
298 preds +> List.iter (fun (predi, Direct) ->
299 g#del_arc ((predi, nodei), Direct);
300 );
301 succs +> List.iter (fun (succi, Direct) ->
302 g#del_arc ((nodei, succi), Direct);
303 );
304
305 g#del_node nodei;
306
307 (* connect in-nodes to out-nodes *)
308 preds +> List.iter (fun (pred, Direct) ->
309 succs +> List.iter (fun (succ, Direct) ->
310 g#add_arc ((pred, succ), Direct);
311 );
312 )
313
314
315
316(* ------------------------------------------------------------------------ *)
317
318let extract_fullstatement node =
319 match unwrap node with
320 | Decl decl ->
321 (* new policy. no more considered as a statement *)
322 (* old: Some (Ast_c.Decl decl, []) *)
323 None
324 | MacroStmt (st, _) -> Some st
325 | MacroIterHeader (st, _) -> Some st
326
34e49164
C
327 | Include _
328 | DefineHeader _ | DefineType _ | DefineExpr _ | DefineDoWhileZeroHeader _
485bce71 329 | DefineTodo
34e49164
C
330 | MacroTop _
331 -> None
332
485bce71
C
333 | IfdefHeader _ | IfdefElse _ | IfdefEndif _
334 -> None
335
34e49164
C
336 | SeqStart (st,_,_)
337 | ExprStatement (st, _)
338 | IfHeader (st, _)
339 | WhileHeader (st, _)
340 | DoHeader (st, _)
341 | ForHeader (st, _)
342 | SwitchHeader (st, _)
343 | Return (st, _)
344 | ReturnExpr (st, _)
345 (* no counter part in cocci *)
346 | Label (st, _)
347 | Case (st,_)
348 | CaseRange (st, _)
349 | Default (st, _)
350 | Goto (st, _)
351 | Continue (st, _)
352 | Break (st, _)
353 | Asm (st,_)
354 -> Some st
355
356 | TopNode|EndNode
357 | FunHeader _
358 | SeqEnd _
359 | Else _
360 | EndStatement _
361 | DoWhileTail _
362 | Enter
363 | Exit
364 | Fake
365 | CaseNode _
366 | TrueNode
367 | FalseNode
368 | InLoopNode
369 | AfterNode
370 | FallThroughNode
371 | ErrorExit
372 -> None