(**************************************************************************)
(*                                                                        *)
(*  Menhir                                                                *)
(*                                                                        *)
(*  François Pottier, INRIA Rocquencourt                                  *)
(*  Yann Régis-Gianas, PPS, Université Paris Diderot                      *)
(*                                                                        *)
(*  Copyright 2005-2008 Institut National de Recherche en Informatique    *)
(*  et en Automatique. All rights reserved. This file is distributed      *)
(*  under the terms of the GNU Library General Public License, with the   *)
(*  special exception on linking described in file LICENSE.               *)
(*                                                                        *)
(**************************************************************************)

(* This module compresses a two-dimensional table, where some values
   are considered insignificant, via row displacement. *)

(* This idea reportedly appears in Aho and Ullman's ``Principles
   of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's
   ``Storing a Sparse Table'' (1979) and in Dencker, Dürre, and Heuft's
   ``Optimization of Parser Tables for Portable Compilers'' (1984). *)

(* A compressed table is represented as a pair of arrays. The
   displacement array is an array of offsets into the data array. *)

type 'a table =
    int array * (* displacement *)
     'a array   (* data *)

(* In a natural version of this algorithm, displacements would be greater
   than (or equal to) [-n]. However, in the particular setting of Menhir,
   both arrays are intended to be compressed with [PackedIntArray], which
   does not efficiently support negative numbers. For this reason, we are
   careful not to produce negative displacements. *)

(* In order to avoid producing negative displacements, we simply use the
   least significant bit as the sign bit. This is implemented by [encode]
   and [decode] below. *)

(* One could also think, say, of adding [n] to every displacement, so as
   to ensure that all displacements are nonnegative. This would work, but
   would require [n] to be published, for use by the decoder. *)

let encode (displacement : int) : int =
  if displacement >= 0 then
    displacement lsl 1
  else
    (-displacement) lsl 1 + 1

let decode (displacement : int) : int =
  if displacement land 1 = 0 then
    displacement lsr 1
  else
    -(displacement lsr 1)

(* It is reasonable to assume that, as matrices grow large, their
   density becomes low, i.e., they have many insignificant entries.
   As a result, it is important to work with a sparse data structure
   for rows. We internally represent a row as a list of its
   significant entries, where each entry is a pair of a [j] index and
   an element. *)

type 'a row =
    (int * 'a) list

(* [compress equal insignificant dummy m n t] turns the two-dimensional table
   [t] into a compressed table. The parameter [equal] is equality of data
   values. The parameter [wildcard] tells which data values are insignificant,
   and can thus be overwritten with other values. The parameter [dummy] is
   used to fill holes in the data array. [m] and [n] are the integer
   dimensions of the table [t]. *)

let compress
    (equal : 'a -> 'a -> bool)
    (insignificant : 'a -> bool)
    (dummy : 'a)
    (m : int) (n : int)
    (t : 'a array array)
    : 'a table =

  (* Be defensive. *)

  assert (Array.length t = m);
  assert begin
    for i = 0 to m - 1 do
      assert (Array.length t.(i) = n)
    done;
    true
  end;

  (* This turns a row-as-array into a row-as-sparse-list. *)

  let sparse (line : 'a array) : 'a row =

    let rec loop (j : int) (row : 'a row) =
      if j < 0 then
	row
      else
	let x = line.(j) in
	loop
	  (j - 1)
	  (if insignificant x then row else (j, x) :: row)
    in

    loop (n - 1) []

  in

  (* Define the rank of a row as its number of significant entries. *)

  let rank (row : 'a row) : int =
    List.length row
  in

  (* Construct a list of all rows, together with their index and rank. *)

  let rows : (int * int * 'a row) list = (* index, rank, row *)
    Array.to_list (
      Array.mapi (fun i line ->
	let row = sparse line in
	i, rank row, row
      ) t
    )
  in

  (* Sort this list by decreasing rank. This does not have any impact
     on correctness, but reportedly improves compression. The
     intuitive idea is that rows with few significant elements are
     easy to fit, so they should be inserted last, after the problem
     has become quite constrained by fitting the heavier rows. This
     heuristic is attributed to Ziegler. *)

  let rows =
    List.sort (fun (_, rank1, _) (_, rank2, _) ->
      compare rank2 rank1
    ) rows
  in

  (* Allocate a one-dimensional array of displacements. *)

  let displacement : int array =
    Array.make m 0
  in

  (* Allocate a one-dimensional, infinite array of values. Indices
     into this array are written [k]. *)

  let data : 'a InfiniteArray.t =
    InfiniteArray.make dummy
  in

  (* Determine whether [row] fits at offset [k] within the current [data]
     array, up to extension of this array. *)

  (* Note that this check always succeeds when [k] equals the length of
     the [data] array. Indeed, the loop is then skipped. This property
     guarantees the termination of the recursive function [fit] below. *)

  let fits k (row : 'a row) : bool =

    let d = InfiniteArray.extent data in

    let rec loop = function
      | [] ->
	  true
      | (j, x) :: row ->

	  (* [x] is a significant element. *)

	  (* By hypothesis, [k + j] is nonnegative. If it is greater than or
	     equal to the current length of the data array, stop -- the row
	     fits. *)

	  assert (k + j >= 0);

	  if k + j >= d then
	    true

	  (* We now know that [k + j] is within bounds of the data
	     array. Check whether it is compatible with the element [y] found
	     there. If it is, continue. If it isn't, stop -- the row does not
	     fit. *)

	  else
	    let y = InfiniteArray.get data (k + j) in
	    if insignificant y || equal x y then
	      loop row
	    else
	      false

    in
    loop row

  in

  (* Find the leftmost position where a row fits. *)

  (* If the leftmost significant element in this row is at offset [j],
     then we can hope to fit as far left as [-j] -- so this element
     lands at offset [0] in the data array. *)

  (* Note that displacements may be negative. This means that, for
     insignificant elements, accesses to the data array could fail: they could
     be out of bounds, either towards the left or towards the right. This is
     not a problem, as long as [get] is invoked only at significant
     elements. *)

  let rec fit k row : int =
    if fits k row then
      k
    else
      fit (k + 1) row
  in

  let fit row =
    match row with
    | [] ->
	0 (* irrelevant *)
    | (j, _) :: _ ->
	fit (-j) row
  in

  (* Write [row] at (compatible) offset [k]. *)

  let rec write k = function
    | [] ->
	()
    | (j, x) :: row ->
	InfiniteArray.set data (k + j) x;
	write k row
  in

  (* Iterate over the sorted list of rows. Fit and write each row at
     the leftmost compatible offset. Update the displacement table. *)

  let () =
    List.iter (fun (i, _, row) ->
      let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *)
      write k row;
      displacement.(i) <- encode k
    ) rows
  in

  (* Return the compressed tables. *)

  displacement, InfiniteArray.domain data

(* [get ct i j] returns the value found at indices [i] and [j] in the
   compressed table [ct]. This function call is permitted only if the
   value found at indices [i] and [j] in the original table is
   significant -- otherwise, it could fail abruptly. *)

(* Together, [compress] and [get] have the property that, if the value
   found at indices [i] and [j] in an uncompressed table [t] is
   significant, then [get (compress t) i j] is equal to that value. *)

let get (displacement, data) i j =
  assert (0 <= i && i < Array.length displacement);
  let k = decode displacement.(i) in
  assert (0 <= k + j && k + j < Array.length data);
    (* failure of this assertion indicates an attempt to access an
       insignificant element that happens to be mapped out of the bounds
       of the [data] array. *)
  data.(k + j)

(* [getget] is a variant of [get] which only requires read access,
   via accessors, to the two components of the table. *)

let getget get_displacement get_data (displacement, data) i j =
  let k = decode (get_displacement displacement i) in
  get_data data (k + j)