5 (*****************************************************************************)
7 * There is more information in the CFG we build that in the CFG usually built
8 * in a compiler. This is because:
10 * - We need later to go back from flow to original ast, because we are
11 * doing a refactoring tool, so different context. So we have to add
12 * some nodes for '{' or '}' or goto that normally disapear in a CFG.
13 * We must keep those entities, in the same way that we must keep the parens
14 * (ParenExpr, ParenType) in the Ast_c during parsing.
16 * Moreover, the coccier can mention in his semantic patch those entities,
17 * so we must keep those entities in the CFG.
19 * We also have to add some extra nodes to make the process that goes from
20 * flow to ast deterministic with for instance the CaseNode, or easier
21 * with for instance the Fake node.
23 * - The coccinelle engine later transforms some nodes, and we need to rebuild
24 * the ast from a statement now defined and altered in different nodes.
25 * So we can't just put all the parsing info (Ast_c.il) in the top node of
26 * a statement. We have to split those Ast_c.il in different nodes, to
27 * later reconstruct a full Ast_c.il from different nodes. This is why
28 * we need the Else node, ...
30 * Note that at the same time, we also need to store the fullstatement
31 * in the top node, because the CTL engine need to get that information
32 * when dealing with MetaStatement (statement S; in a Semantic Patch).
35 * - The CTL engine needs more information than just the CFG, and we use
36 * tricks to encode those informations in the nodes:
38 * - We have some TrueNode, FalseNode to know in what branch we are.
39 * Normally we could achieve this by putting this information in the
40 * edges, but CTL engine know nothing about edges, it must do
41 * everything with only nodes information.
43 * - We need to mark each braces with an identifier so that the CTL
44 * can know if one specific '}' correspond to a specific '{'.
46 * - We add some labels to each node to handle the MetaRuleElem and
47 * MetaStatement. It allows to groups nodes that belong to the same
48 * statement. Normally CFG are there to abstract from this, but in
49 * Coccinelle we need sometimes the CFG view, and sometimes the Ast
50 * view and the labels allow that.
52 * - We even add nodes. We add '}', not only to be able to go back to AST
53 * but also because of the CTL engine. So one '}' may in fact be
54 * represented by multiple nodes, one in each CFG path.
58 * - Need know if ErrorExit,
60 * choice: Julia proposed that the flow is in fact just
61 * a view through the Ast, which means just Ocaml ref, so that when we
62 * modify some nodes, in fact it modifies the ast. But I prefer do it
63 * the functionnal way.
65 * The node2 type should be as close as possible to Ast_cocci.rule_elem to
66 * facilitate the job of cocci_vs_c.
70 (*****************************************************************************)
73 (* ---------------------------------------------------------------------- *)
74 (* The string is for debugging. Used by Ograph_extended.print_graph.
75 * The int list are Labels. Trick used for CTL engine. Must not
76 * transform that in a triple or record because print_graph would
79 type node
= node1
* string
80 and node1
= node2
* nodeinfo
83 bclabels
: int list
; (* parent of a break or continue node *)
89 (* ------------------------ *)
90 (* For CTL to work, we need that some nodes loop over itself. We
91 * need that every nodes have a successor. Julia also want to go back
92 * indefinitely. So must tag some nodes as the beginning and end of
93 * the graph so that some fix_ctl function can easily find those
96 * If have a function, then no need for EndNode; Exit and ErrorExit
97 * will play that role.
99 * When everything we analyze was a function there was no pb. We used
100 * FunHeader as a Topnode and Exit for EndNode but now that we also
101 * analyse #define body, so we need those nodes.
106 (* ------------------------ *)
107 | FunHeader
of definition
(* but empty body *)
109 | Decl
of declaration
111 (* ------------------------ *)
112 (* flow_to_ast: cocci: Need the { and } in the control flow graph also
113 * because the coccier can express patterns containing such { }.
115 * ctl: to make possible the forall (AX, A[...]), have to add more than
116 * one node sometimes for the same '}' (one in each CFG path) in the graph.
118 * ctl: Morover, the int in the type is here to indicate to what { }
119 * they correspond. Two pairwise { } share the same number. kind of
120 * "brace_identifier". Used for debugging or for checks and more importantly,
121 * needed by CTL engine.
123 * Because of those nodes, there is no equivalent for Compound.
125 * There was a problem with SeqEnd. Some info can be tagged on it
126 * but there is multiple SeqEnd that correspond to the same '}' even
127 * if they are in different nodes. Solved by using shared ref
128 * and allow the "already-tagged" token.
130 | SeqStart
of statement
* int * info
131 | SeqEnd
of int * info
134 | ExprStatement
of statement
* (expression
option) wrap
137 | IfHeader
of statement
* expression wrap
139 | WhileHeader
of statement
* expression wrap
140 | DoHeader
of statement
* info
141 | DoWhileTail
of expression wrap
142 | ForHeader
of statement
*
143 (exprStatement wrap
* exprStatement wrap
* exprStatement wrap
)
145 | SwitchHeader
of statement
* expression wrap
146 | MacroIterHeader
of statement
* (string * argument wrap2 list
) wrap
148 (* Used to mark the end of if, while, dowhile, for, switch. Later we
149 * will be able to "tag" some cocci code on this node.
155 * the S can be anything, including an if, and this is internally
156 * translated in a series of MetaRuleElem, and the last element is a
157 * EndStatement, and we must tag foo() to this EndStatement.
158 * Otherwise, without this last common node, we would tag foo() to 2
159 * nodes :( So having a unique node makes it correct, and in
160 * flow_to_ast we must propagate back this + foo() to the last token
161 * of an if (maybe a '}', maybe a ';')
163 * The problem is that this stuff should be in transformation.ml,
164 * but need information available in flow_to_ast, but we dont want
165 * to polluate both files.
169 * - soluce julia1, extend Ast_c by adding a fake token to the if
171 * - extend Ast with a Skip, and add this next to EndStatement node,
172 * and do special case in flow_to_ast to start from this node
173 * (not to get_next EndStatement, but from EndStatement directly)
174 * and so add a case when have directly a EndStatement node an extract
175 * the statement from it.
177 * - remonter dans le graphe pour accrocher le foo() non plus au
178 * EndStatement (qui n'a pas d'equivalent niveau token dans l'ast_c),
179 * mais au dernier token de la branche Else (ou Then si y'a pas de else).
181 * I first did solution 2 and then when we decided to use ref,
182 * I use julia'as solution. Have virtual-placeholders, the fakeInfo
183 * for the if, while, and put this shared ref in the EndStatement.
185 | EndStatement
of info
option (* fake_info *)
187 | Return
of statement
* unit wrap
188 | ReturnExpr
of statement
* expression wrap
190 (* ------------------------ *)
191 | IfdefHeader
of ifdef_directive
192 | IfdefElse
of ifdef_directive
193 | IfdefEndif
of ifdef_directive
196 (* ------------------------ *)
197 | DefineHeader
of string wrap
* define_kind
199 | DefineExpr
of expression
200 | DefineType
of fullType
201 | DefineDoWhileZeroHeader
of unit wrap
207 | MacroTop
of string * argument wrap2 list
* il
209 (* ------------------------ *)
210 | Case
of statement
* expression wrap
211 | Default
of statement
* unit wrap
213 | Continue
of statement
* unit wrap
214 | Break
of statement
* unit wrap
216 (* no counter part in cocci *)
217 | CaseRange
of statement
* (expression
* expression
) wrap
218 | Label
of statement
* string wrap
219 | Goto
of statement
* string wrap
222 | Asm
of statement
* asmbody wrap
223 | MacroStmt
of statement
* unit wrap
225 (* ------------------------ *)
226 (* some control nodes *)
231 (* Redundant nodes, often to mark the end of an if/switch.
232 * That makes it easier to do later the flow_to_ast.
233 * update: no more used for the end. see Endstatement. Just used
234 * to mark the start of the function, as required by julia.
235 * Maybe would be better to use instead a Enter2.
239 (* flow_to_ast: In this case, I need to know the order between the children
240 * of the switch in the graph.
244 (* ------------------------ *)
248 | InLoopNode
(* almost equivalent to TrueNode but just for loops *)
255 type edge
= Direct
(* Normal | Shadow *)
257 type cflow
= (node
, edge
) Ograph_extended.ograph_mutable
260 (* ------------------------------------------------------------------------ *)
261 let unwrap ((node
, info
), nodestr
) = node
262 let rewrap ((_node
, info
), nodestr
) node
= (node
, info
), nodestr
263 let extract_labels ((node
, info
), nodestr
) = info
.labels
264 let extract_bclabels ((node
, info
), nodestr
) = info
.bclabels
265 let extract_is_loop ((node
, info
), nodestr
) = info
.is_loop
266 let extract_is_fake ((node
, info
), nodestr
) = info
.is_fake
268 let mk_any_node is_fake node labels bclabels nodestr
=
270 if !Flag_parsing_c.show_flow_labels
271 then nodestr ^
("[" ^
(labels
+> List.map i_to_s
+> join
",") ^
"]")
274 ((node
, {labels
= labels
;is_loop
=false;bclabels
=bclabels
;is_fake
=is_fake
}),
277 let mk_node = mk_any_node false
278 let mk_fake_node = mk_any_node true (* for duplicated braces *)
280 (* ------------------------------------------------------------------------ *)
282 g#nodes#tolist
+> List.find
(fun (i
, node
) ->
283 match unwrap node
with TopNode
-> true | _
-> false
287 g#nodes#tolist
+> List.find
(fun (nodei
, node
) ->
292 (* remove an intermediate node and redirect the connexion *)
293 let remove_one_node nodei g
=
294 let preds = (g#predecessors nodei
)#tolist
in
295 let succs = (g#successors nodei
)#tolist
in
296 assert (not
(null
preds));
298 preds +> List.iter
(fun (predi
, Direct
) ->
299 g#del_arc
((predi
, nodei
), Direct
);
301 succs +> List.iter
(fun (succi
, Direct
) ->
302 g#del_arc
((nodei
, succi
), Direct
);
307 (* connect in-nodes to out-nodes *)
308 preds +> List.iter
(fun (pred
, Direct
) ->
309 succs +> List.iter
(fun (succ
, Direct
) ->
310 g#add_arc
((pred
, succ
), Direct
);
316 (* ------------------------------------------------------------------------ *)
318 let extract_fullstatement node
=
319 match unwrap node
with
321 (* new policy. no more considered as a statement *)
322 (* old: Some (Ast_c.Decl decl, []) *)
324 | MacroStmt
(st
, _
) -> Some st
325 | MacroIterHeader
(st
, _
) -> Some st
328 | DefineHeader _
| DefineType _
| DefineExpr _
| DefineDoWhileZeroHeader _
333 | IfdefHeader _
| IfdefElse _
| IfdefEndif _
337 | ExprStatement
(st
, _
)
339 | WhileHeader
(st
, _
)
342 | SwitchHeader
(st
, _
)
345 (* no counter part in cocci *)