(* Yoann Padioleau
 *
 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
 * Copyright (C) 2006, 2007, 2008 Ecole des Mines de Nantes
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License (GPL)
 * version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * file license.txt for more details.
 *)
open Common

open Ast_c

(*****************************************************************************)
(*
 * There is more information in the CFG we build that in the CFG usually built
 * in a compiler. This is because:
 *
 *  - We need later to go back from flow to original ast, because we are
 *    doing a refactoring tool, so different context. So we have to add
 *    some nodes for '{' or '}' or goto that normally disapear in a CFG.
 *    We must keep those entities, in the same way that we must keep the parens
 *    (ParenExpr, ParenType) in the Ast_c during parsing.
 *
 *    Moreover, the coccier can mention in his semantic patch those entities,
 *    so we must keep those entities in the CFG.
 *
 *    We also have to add some extra nodes to make the process that goes from
 *    flow to ast deterministic with for instance the CaseNode, or easier
 *    with for instance the Fake node.
 *
 *  - The coccinelle engine later transforms some nodes, and we need to rebuild
 *    the ast from a statement now defined and altered in different nodes.
 *    So we can't just put all the parsing info (Ast_c.il) in the top node of
 *    a statement. We have to split those Ast_c.il in different nodes, to
 *    later reconstruct a full Ast_c.il from different nodes. This is why
 *    we need the Else node, ...
 *
 *    Note that at the same time, we also need to store the fullstatement
 *    in the top node, because the CTL engine need to get that information
 *    when dealing with MetaStatement (statement S; in a Semantic Patch).
 *
 *
 *  - The CTL engine needs more information than just the CFG, and we use
 *    tricks to encode those informations in the nodes:
 *
 *       - We have some TrueNode, FalseNode to know in what branch we are.
 *         Normally we could achieve this by putting this information in the
 *         edges, but CTL engine know nothing about edges, it must do
 *         everything with only nodes information.
 *
 *       - We need to mark each braces with an identifier so that the CTL
 *         can know if one specific '}' correspond to a specific '{'.
 *
 *       - We add some labels to each node to handle the MetaRuleElem and
 *         MetaStatement. It allows to groups nodes that belong to the same
 *         statement. Normally CFG are there to abstract from this, but in
 *         Coccinelle we need sometimes the CFG view, and sometimes the Ast
 *         view and the labels allow that.
 *
 *       - We even add nodes. We add '}', not only to be able to go back to AST
 *         but also because of the CTL engine. So one '}' may in fact be
 *         represented by multiple nodes, one in each CFG path.
 *
 *       - need After,
 *       - need FallThrough.
 *       - Need know if ErrorExit,
 *
 * choice: Julia proposed that the flow is in fact just
 * a view through the Ast, which means just Ocaml ref, so that when we
 * modify some nodes, in fact it modifies the ast. But I prefer do it
 * the functionnal way.
 *
 * The node2 type should be as close as possible to Ast_cocci.rule_elem to
 * facilitate the job of cocci_vs_c.
 *
 *)

(*****************************************************************************)

type fullstatement = statement

(* ---------------------------------------------------------------------- *)
(* The string is for debugging. Used by Ograph_extended.print_graph.
 * The int list are Labels. Trick used for CTL engine. Must not
 * transform that in a triple or record because print_graph would
 * not work.
 *)
type node = node1 * string
  and node1 = node2 * nodeinfo
    and nodeinfo = {
      labels: int list;
      bclabels: int list; (* parent of a break or continue node *)
      is_loop: bool;
      is_fake: bool;
    }
    and node2 =

  (* ------------------------ *)
  (* For CTL to work, we need that some nodes loop over itself. We
   * need that every nodes have a successor. Julia also want to go back
   * indefinitely. So must tag some nodes as the beginning and end of
   * the graph so that some fix_ctl function can easily find those
   * nodes.
   *
   * If have a function, then no need for EndNode; Exit and ErrorExit
   * will play that role.
   *
   * When everything we analyze was a function there was no pb. We used
   * FunHeader as a Topnode and Exit for EndNode but now that we also
   * analyse #define body, so we need those nodes.
   *)
   | TopNode
   | EndNode

   (* ------------------------ *)
   | FunHeader of definition (* but empty body *)

   | Decl   of declaration

   (* ------------------------ *)
   (* flow_to_ast: cocci: Need the { and } in the control flow graph also
    * because the coccier can express patterns containing such { }.
    *
    * ctl: to make possible the forall (AX, A[...]), have to add more than
    * one node sometimes for the same '}' (one in each CFG path) in the graph.
    *
    * ctl: Morover, the int in the type is here to indicate to what { }
    * they correspond. Two pairwise { } share the same number. kind of
    * "brace_identifier". Used for debugging or for checks and more importantly,
    * needed by CTL engine.
    *
    * Because of those nodes, there is no equivalent for Compound.
    *
    * There was a problem with SeqEnd. Some info can be tagged on it
    * but there is multiple SeqEnd that correspond to the same '}' even
    * if they are in different nodes. Solved by using shared ref
    * and allow the "already-tagged" token.
    *)
  | SeqStart of fullstatement * int * info
  | SeqEnd   of int * info


  | ExprStatement of fullstatement * (expression option) wrap


  | IfHeader  of fullstatement * expression wrap
  | Else of info
  | WhileHeader of fullstatement * expression wrap
  | DoHeader of fullstatement * info
  | DoWhileTail of expression wrap
  | ForHeader of fullstatement *
                 (declOrExpr * exprStatement wrap * exprStatement wrap)
                 wrap
  | SwitchHeader of fullstatement * expression wrap
  | MacroIterHeader of fullstatement * (string * argument wrap2 list) wrap

  (* Used to mark the end of if, while, dowhile, for, switch. Later we
   * will be able to "tag" some cocci code on this node.
   *
   * This is because in
   *
   * - S + foo();
   *
   * the S can be anything, including an if, and this is internally
   * translated in a series of MetaRuleElem, and the last element is a
   * EndStatement, and we must tag foo() to this EndStatement.
   * Otherwise, without this last common node, we would tag foo() to 2
   * nodes :( So having a unique node makes it correct, and in
   * flow_to_ast we must propagate back this + foo() to the last token
   * of an if (maybe a '}', maybe a ';')
   *
   * The problem is that this stuff should be in transformation.ml,
   * but need information available in flow_to_ast, but we dont want
   * to polluate both files.
   *
   * So the choices are
   *
   * - soluce julia1, extend Ast_c by adding a fake token to the if
   *
   * - extend Ast with a Skip, and add this next to EndStatement node,
   * and do special case in flow_to_ast to start from this node
   * (not to get_next EndStatement, but from EndStatement directly)
   * and so add a case when have directly a EndStatement node an extract
   * the statement from it.
   *
   * - remonter dans le graphe pour accrocher le foo() non plus au
   * EndStatement (qui n'a pas d'equivalent niveau token dans l'ast_c),
   * mais au dernier token de la branche Else (ou Then si y'a pas de else).
   *
   * I first did solution 2 and then when we decided to use ref,
   * I use julia'as solution. Have virtual-placeholders, the fakeInfo
   * for the if, while, and put this shared ref in the EndStatement.
   *)
  | EndStatement of info option (* fake_info *)

  | Return     of fullstatement * unit wrap
  | ReturnExpr of fullstatement * expression wrap

  (* ------------------------ *)
  | IfdefHeader of ifdef_directive
  | IfdefElse of ifdef_directive
  | IfdefEndif of ifdef_directive


  (* ------------------------ *)
  | DefineHeader of string wrap * define_kind

  | DefineExpr of expression
  | DefineType of fullType
  | DefineDoWhileZeroHeader of unit wrap
  | DefineTodo

  | Include of includ

  (* obsolete? *)
  | MacroTop of string * argument wrap2 list * il

  (* ------------------------ *)
  | Case  of fullstatement * expression wrap
  | Default of fullstatement * unit wrap

  | Continue of fullstatement * unit wrap
  | Break    of fullstatement * unit wrap

  (* no counter part in cocci *)
  | CaseRange of fullstatement * (expression * expression) wrap
  | Label of fullstatement * name * unit wrap (* : *)
  | Goto of fullstatement * name * unit wrap (* goto *)


  | Asm of fullstatement * asmbody wrap
  | MacroStmt of fullstatement * unit wrap

  (* ------------------------ *)
  (* some control nodes *)
  | Enter
  | Exit


  (* Redundant nodes, often to mark the end of an if/switch.
   * That makes it easier to do later the flow_to_ast.
   * update: no more used for the end. see Endstatement. Just used
   * to mark the start of the function, as required by julia.
   * Maybe would be better to use instead a Enter2.
   *)
  | Fake

  (* flow_to_ast: In this case, I need to know the  order between the children
   * of the switch in the graph.
   *)
  | CaseNode of int

  (* ------------------------ *)
  (* for ctl:  *)
  | TrueNode
  | FalseNode
  | InLoopNode (* almost equivalent to TrueNode but just for loops *)

  | AfterNode
  | FallThroughNode
  | LoopFallThroughNode

  | ErrorExit

type edge = Direct (* Normal | Shadow *)

type cflow = (node, edge) Ograph_extended.ograph_mutable


(* ------------------------------------------------------------------------ *)
let unwrap ((node, info), nodestr) = node
let rewrap ((_node, info), nodestr) node = (node, info), nodestr
let extract_labels ((node, info), nodestr) = info.labels
let extract_bclabels ((node, info), nodestr) = info.bclabels
let extract_is_loop ((node, info), nodestr) = info.is_loop
let extract_is_fake ((node, info), nodestr) = info.is_fake

let mk_any_node is_fake node labels bclabels nodestr =
  let nodestr =
    if !Flag_parsing_c.show_flow_labels
    then nodestr ^ ("[" ^ (labels +> List.map i_to_s +> join ",") ^ "]")
    else nodestr
  in
  ((node, {labels = labels;is_loop=false;bclabels=bclabels;is_fake=is_fake}),
   nodestr)

let mk_node = mk_any_node false
let mk_fake_node = mk_any_node true (* for duplicated braces *)

(* ------------------------------------------------------------------------ *)
let first_node g =
  g#nodes#tolist +> List.find (fun (i, node) ->
    match unwrap node with TopNode -> true | _ -> false
    ) +> fst

let find_node f g =
  g#nodes#tolist +> List.find (fun (nodei, node) ->
    f (unwrap node))
     +> fst


(* remove an intermediate node and redirect the connexion  *)
let remove_one_node nodei g =
  let preds = (g#predecessors nodei)#tolist in
  let succs = (g#successors nodei)#tolist in
  assert (not (null preds));

  preds +> List.iter (fun (predi, Direct) ->
    g#del_arc ((predi, nodei), Direct);
  );
  succs +> List.iter (fun (succi, Direct) ->
    g#del_arc ((nodei, succi), Direct);
  );

  g#del_node nodei;

  (* connect in-nodes to out-nodes *)
  preds +> List.iter (fun (pred, Direct) ->
    succs +> List.iter (fun (succ, Direct) ->
      g#add_arc ((pred, succ), Direct);
    );
  )


(* ------------------------------------------------------------------------ *)

let extract_fullstatement node =
  match unwrap node with
  | Decl decl ->
      (* new policy. no more considered as a statement *)
      (* old: Some (Ast_c.Decl decl, []) *)
      None
  | MacroStmt (st, _) -> Some st
  | MacroIterHeader (st, _) -> Some st

  | Include _
  | DefineHeader _ | DefineType _ | DefineExpr  _ | DefineDoWhileZeroHeader _
  | DefineTodo
  | MacroTop _
      -> None

  | IfdefHeader _ | IfdefElse _ | IfdefEndif _
      -> None

  | SeqStart (st,_,_)
  | ExprStatement (st, _)
  | IfHeader  (st, _)
  | WhileHeader (st, _)
  | DoHeader (st, _)
  | ForHeader (st, _)
  | SwitchHeader (st, _)
  | Return     (st, _)
  | ReturnExpr (st, _)
  (* no counter part in cocci *)
  | Label (st, _, _)
  | Case  (st,_)
  | CaseRange (st, _)
  | Default   (st, _)
  | Goto (st, _, _)
  | Continue (st, _)
  | Break    (st, _)
  | Asm (st,_)
      -> Some st

  | TopNode|EndNode
  | FunHeader _
  | SeqEnd  _
  | Else _
  | EndStatement _
  | DoWhileTail _
  | Enter
  | Exit
  | Fake
  | CaseNode _
  | TrueNode
  | FalseNode
  | InLoopNode
  | AfterNode
  | FallThroughNode
  | LoopFallThroughNode
  | ErrorExit
    -> None