parsing_c/control_flow_c.ml

   1 (* Yoann Padioleau
   2  *
   3  * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License (GPL)
   7  * version 2 as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * file license.txt for more details.
  13  *)
  14 open Common
  15
  16 open Ast_c
  17
  18 (*****************************************************************************)
  19 (*
  20  * There is more information in the CFG we build that in the CFG usually built
  21  * in a compiler. This is because:
  22  *
  23  *  - We need later to go back from flow to original ast, because we are
  24  *    doing a refactoring tool, so different context. So we have to add
  25  *    some nodes for '{' or '}' or goto that normally disapear in a CFG.
  26  *    We must keep those entities, in the same way that we must keep the parens
  27  *    (ParenExpr, ParenType) in the Ast_c during parsing.
  28  *
  29  *    Moreover, the coccier can mention in his semantic patch those entities,
  30  *    so we must keep those entities in the CFG.
  31  *
  32  *    We also have to add some extra nodes to make the process that goes from
  33  *    flow to ast deterministic with for instance the CaseNode, or easier
  34  *    with for instance the Fake node.
  35  *
  36  *  - The coccinelle engine later transforms some nodes, and we need to rebuild
  37  *    the ast from a statement now defined and altered in different nodes.
  38  *    So we can't just put all the parsing info (Ast_c.il) in the top node of
  39  *    a statement. We have to split those Ast_c.il in different nodes, to
  40  *    later reconstruct a full Ast_c.il from different nodes. This is why
  41  *    we need the Else node, ...
  42  *
  43  *    Note that at the same time, we also need to store the fullstatement
  44  *    in the top node, because the CTL engine need to get that information
  45  *    when dealing with MetaStatement (statement S; in a Semantic Patch).
  46  *
  47  *
  48  *  - The CTL engine needs more information than just the CFG, and we use
  49  *    tricks to encode those informations in the nodes:
  50  *
  51  *       - We have some TrueNode, FalseNode to know in what branch we are.
  52  *         Normally we could achieve this by putting this information in the
  53  *         edges, but CTL engine know nothing about edges, it must do
  54  *         everything with only nodes information.
  55  *
  56  *       - We need to mark each braces with an identifier so that the CTL
  57  *         can know if one specific '}' correspond to a specific '{'.
  58  *
  59  *       - We add some labels to each node to handle the MetaRuleElem and
  60  *         MetaStatement. It allows to groups nodes that belong to the same
  61  *         statement. Normally CFG are there to abstract from this, but in
  62  *         Coccinelle we need sometimes the CFG view, and sometimes the Ast
  63  *         view and the labels allow that.
  64  *
  65  *       - We even add nodes. We add '}', not only to be able to go back to AST
  66  *         but also because of the CTL engine. So one '}' may in fact be
  67  *         represented by multiple nodes, one in each CFG path.
  68  *
  69  *       - need After,
  70  *       - need FallThrough.
  71  *       - Need know if ErrorExit,
  72  *
  73  * choice: Julia proposed that the flow is in fact just
  74  * a view through the Ast, which means just Ocaml ref, so that when we
  75  * modify some nodes, in fact it modifies the ast. But I prefer do it
  76  * the functionnal way.
  77  *
  78  * The node2 type should be as close as possible to Ast_cocci.rule_elem to
  79  * facilitate the job of cocci_vs_c.
  80  *
  81  *)
  82
  83 (*****************************************************************************)
  84
  85
  86 (* ---------------------------------------------------------------------- *)
  87 (* The string is for debugging. Used by Ograph_extended.print_graph.
  88  * The int list are Labels. Trick used for CTL engine. Must not
  89  * transform that in a triple or record because print_graph would
  90  * not work.
  91  *)
  92 type node = node1 * string
  93   and node1 = node2 * nodeinfo
  94     and nodeinfo = {
  95       labels: int list;
  96       bclabels: int list; (* parent of a break or continue node *)
  97       is_loop: bool;
  98       is_fake: bool;
  99     }
 100     and node2 =
 101
 102   (* ------------------------ *)
 103   (* For CTL to work, we need that some nodes loop over itself. We
 104    * need that every nodes have a successor. Julia also want to go back
 105    * indefinitely. So must tag some nodes as the beginning and end of
 106    * the graph so that some fix_ctl function can easily find those
 107    * nodes.
 108    *
 109    * If have a function, then no need for EndNode; Exit and ErrorExit
 110    * will play that role.
 111    *
 112    * When everything we analyze was a function there was no pb. We used
 113    * FunHeader as a Topnode and Exit for EndNode but now that we also
 114    * analyse #define body, so we need those nodes.
 115    *)
 116    | TopNode
 117    | EndNode
 118
 119    (* ------------------------ *)
 120    | FunHeader of definition (* but empty body *)
 121
 122    | Decl   of declaration
 123
 124    (* ------------------------ *)
 125    (* flow_to_ast: cocci: Need the { and } in the control flow graph also
 126     * because the coccier can express patterns containing such { }.
 127     *
 128     * ctl: to make possible the forall (AX, A[...]), have to add more than
 129     * one node sometimes for the same '}' (one in each CFG path) in the graph.
 130     *
 131     * ctl: Morover, the int in the type is here to indicate to what { }
 132     * they correspond. Two pairwise { } share the same number. kind of
 133     * "brace_identifier". Used for debugging or for checks and more importantly,
 134     * needed by CTL engine.
 135     *
 136     * Because of those nodes, there is no equivalent for Compound.
 137     *
 138     * There was a problem with SeqEnd. Some info can be tagged on it
 139     * but there is multiple SeqEnd that correspond to the same '}' even
 140     * if they are in different nodes. Solved by using shared ref
 141     * and allow the "already-tagged" token.
 142     *)
 143   | SeqStart of statement * int * info
 144   | SeqEnd   of int * info
 145
 146
 147   | ExprStatement of statement * (expression option) wrap
 148
 149
 150   | IfHeader  of statement * expression wrap
 151   | Else of info
 152   | WhileHeader of statement * expression wrap
 153   | DoHeader of statement * info
 154   | DoWhileTail of expression wrap
 155   | ForHeader of statement *
 156                  (exprStatement wrap * exprStatement wrap * exprStatement wrap)
 157                  wrap
 158   | SwitchHeader of statement * expression wrap
 159   | MacroIterHeader of statement * (string * argument wrap2 list) wrap
 160
 161   (* Used to mark the end of if, while, dowhile, for, switch. Later we
 162    * will be able to "tag" some cocci code on this node.
 163    *
 164    * This is because in
 165    *
 166    * - S + foo();
 167    *
 168    * the S can be anything, including an if, and this is internally
 169    * translated in a series of MetaRuleElem, and the last element is a
 170    * EndStatement, and we must tag foo() to this EndStatement.
 171    * Otherwise, without this last common node, we would tag foo() to 2
 172    * nodes :( So having a unique node makes it correct, and in
 173    * flow_to_ast we must propagate back this + foo() to the last token
 174    * of an if (maybe a '}', maybe a ';')
 175    *
 176    * The problem is that this stuff should be in transformation.ml,
 177    * but need information available in flow_to_ast, but we dont want
 178    * to polluate both files.
 179    *
 180    * So the choices are
 181    *
 182    * - soluce julia1, extend Ast_c by adding a fake token to the if
 183    *
 184    * - extend Ast with a Skip, and add this next to EndStatement node,
 185    * and do special case in flow_to_ast to start from this node
 186    * (not to get_next EndStatement, but from EndStatement directly)
 187    * and so add a case when have directly a EndStatement node an extract
 188    * the statement from it.
 189    *
 190    * - remonter dans le graphe pour accrocher le foo() non plus au
 191    * EndStatement (qui n'a pas d'equivalent niveau token dans l'ast_c),
 192    * mais au dernier token de la branche Else (ou Then si y'a pas de else).
 193    *
 194    * I first did solution 2 and then when we decided to use ref,
 195    * I use julia'as solution. Have virtual-placeholders, the fakeInfo
 196    * for the if, while, and put this shared ref in the EndStatement.
 197    *)
 198   | EndStatement of info option (* fake_info *)
 199
 200   | Return     of statement * unit wrap
 201   | ReturnExpr of statement * expression wrap
 202
 203   (* ------------------------ *)
 204   | IfdefHeader of ifdef_directive
 205   | IfdefElse of ifdef_directive
 206   | IfdefEndif of ifdef_directive
 207
 208
 209   (* ------------------------ *)
 210   | DefineHeader of string wrap * define_kind
 211
 212   | DefineExpr of expression
 213   | DefineType of fullType
 214   | DefineDoWhileZeroHeader of unit wrap
 215   | DefineTodo
 216
 217   | Include of includ
 218
 219   (* obsolete? *)
 220   | MacroTop of string * argument wrap2 list * il
 221
 222   (* ------------------------ *)
 223   | Case  of statement * expression wrap
 224   | Default of statement * unit wrap
 225
 226   | Continue of statement * unit wrap
 227   | Break    of statement * unit wrap
 228
 229   (* no counter part in cocci *)
 230   | CaseRange of statement * (expression * expression) wrap
 231   | Label of statement * string wrap
 232   | Goto of statement * string wrap
 233
 234
 235   | Asm of statement * asmbody wrap
 236   | MacroStmt of statement * unit wrap
 237
 238   (* ------------------------ *)
 239   (* some control nodes *)
 240   | Enter
 241   | Exit
 242
 243
 244   (* Redundant nodes, often to mark the end of an if/switch.
 245    * That makes it easier to do later the flow_to_ast.
 246    * update: no more used for the end. see Endstatement. Just used
 247    * to mark the start of the function, as required by julia.
 248    * Maybe would be better to use instead a Enter2.
 249    *)
 250   | Fake
 251
 252   (* flow_to_ast: In this case, I need to know the  order between the children
 253    * of the switch in the graph.
 254    *)
 255   | CaseNode of int
 256
 257   (* ------------------------ *)
 258   (* for ctl:  *)
 259   | TrueNode
 260   | FalseNode
 261   | InLoopNode (* almost equivalent to TrueNode but just for loops *)
 262
 263   | AfterNode
 264   | FallThroughNode
 265
 266   | ErrorExit
 267
 268 type edge = Direct (* Normal | Shadow *)
 269
 270 type cflow = (node, edge) Ograph_extended.ograph_mutable
 271
 272
 273 (* ------------------------------------------------------------------------ *)
 274 let unwrap ((node, info), nodestr) = node
 275 let rewrap ((_node, info), nodestr) node = (node, info), nodestr
 276 let extract_labels ((node, info), nodestr) = info.labels
 277 let extract_bclabels ((node, info), nodestr) = info.bclabels
 278 let extract_is_loop ((node, info), nodestr) = info.is_loop
 279 let extract_is_fake ((node, info), nodestr) = info.is_fake
 280
 281 let mk_any_node is_fake node labels bclabels nodestr =
 282   let nodestr =
 283     if !Flag_parsing_c.show_flow_labels
 284     then nodestr ^ ("[" ^ (labels +> List.map i_to_s +> join ",") ^ "]")
 285     else nodestr
 286   in
 287   ((node, {labels = labels;is_loop=false;bclabels=bclabels;is_fake=is_fake}),
 288    nodestr)
 289
 290 let mk_node = mk_any_node false
 291 let mk_fake_node = mk_any_node true (* for duplicated braces *)
 292
 293 (* ------------------------------------------------------------------------ *)
 294 let first_node g =
 295   g#nodes#tolist +> List.find (fun (i, node) ->
 296     match unwrap node with TopNode -> true | _ -> false
 297     ) +> fst
 298
 299 let find_node f g =
 300   g#nodes#tolist +> List.find (fun (nodei, node) ->
 301     f (unwrap node))
 302      +> fst
 303
 304
 305 (* remove an intermediate node and redirect the connexion  *)
 306 let remove_one_node nodei g =
 307   let preds = (g#predecessors nodei)#tolist in
 308   let succs = (g#successors nodei)#tolist in
 309   assert (not (null preds));
 310
 311   preds +> List.iter (fun (predi, Direct) ->
 312     g#del_arc ((predi, nodei), Direct);
 313   );
 314   succs +> List.iter (fun (succi, Direct) ->
 315     g#del_arc ((nodei, succi), Direct);
 316   );
 317
 318   g#del_node nodei;
 319
 320   (* connect in-nodes to out-nodes *)
 321   preds +> List.iter (fun (pred, Direct) ->
 322     succs +> List.iter (fun (succ, Direct) ->
 323       g#add_arc ((pred, succ), Direct);
 324     );
 325   )
 326
 327
 328
 329 (* ------------------------------------------------------------------------ *)
 330
 331 let extract_fullstatement node =
 332   match unwrap node with
 333   | Decl decl ->
 334       (* new policy. no more considered as a statement *)
 335       (* old: Some (Ast_c.Decl decl, []) *)
 336       None
 337   | MacroStmt (st, _) -> Some st
 338   | MacroIterHeader (st, _) -> Some st
 339
 340   | Include _
 341   | DefineHeader _ | DefineType _ | DefineExpr  _ | DefineDoWhileZeroHeader _
 342   | DefineTodo
 343   | MacroTop _
 344       -> None
 345
 346   | IfdefHeader _ | IfdefElse _ | IfdefEndif _
 347       -> None
 348
 349   | SeqStart (st,_,_)
 350   | ExprStatement (st, _)
 351   | IfHeader  (st, _)
 352   | WhileHeader (st, _)
 353   | DoHeader (st, _)
 354   | ForHeader (st, _)
 355   | SwitchHeader (st, _)
 356   | Return     (st, _)
 357   | ReturnExpr (st, _)
 358   (* no counter part in cocci *)
 359   | Label (st, _)
 360   | Case  (st,_)
 361   | CaseRange (st, _)
 362   | Default   (st, _)
 363   | Goto (st, _)
 364   | Continue (st, _)
 365   | Break    (st, _)
 366   | Asm (st,_)
 367       -> Some st
 368
 369   | TopNode|EndNode
 370   | FunHeader _
 371   | SeqEnd  _
 372   | Else _
 373   | EndStatement _
 374   | DoWhileTail _
 375   | Enter
 376   | Exit
 377   | Fake
 378   | CaseNode _
 379   | TrueNode
 380   | FalseNode
 381   | InLoopNode
 382   | AfterNode
 383   | FallThroughNode
 384   | ErrorExit
 385     -> None