menhirlib/rowDisplacement.ml

   1 (**************************************************************************)
   2 (*                                                                        *)
   3 (*  Menhir                                                                *)
   4 (*                                                                        *)
   5 (*  François Pottier, INRIA Rocquencourt                                  *)
   6 (*  Yann Régis-Gianas, PPS, Université Paris Diderot                      *)
   7 (*                                                                        *)
   8 (*  Copyright 2005-2008 Institut National de Recherche en Informatique    *)
   9 (*  et en Automatique. All rights reserved. This file is distributed      *)
  10 (*  under the terms of the GNU Library General Public License, with the   *)
  11 (*  special exception on linking described in file LICENSE.               *)
  12 (*                                                                        *)
  13 (**************************************************************************)
  14
  15 (* This module compresses a two-dimensional table, where some values
  16    are considered insignificant, via row displacement. *)
  17
  18 (* This idea reportedly appears in Aho and Ullman's ``Principles
  19    of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's
  20    ``Storing a Sparse Table'' (1979) and in Dencker, Dürre, and Heuft's
  21    ``Optimization of Parser Tables for Portable Compilers'' (1984). *)
  22
  23 (* A compressed table is represented as a pair of arrays. The
  24    displacement array is an array of offsets into the data array. *)
  25
  26 type 'a table =
  27     int array * (* displacement *)
  28      'a array   (* data *)
  29
  30 (* In a natural version of this algorithm, displacements would be greater
  31    than (or equal to) [-n]. However, in the particular setting of Menhir,
  32    both arrays are intended to be compressed with [PackedIntArray], which
  33    does not efficiently support negative numbers. For this reason, we are
  34    careful not to produce negative displacements. *)
  35
  36 (* In order to avoid producing negative displacements, we simply use the
  37    least significant bit as the sign bit. This is implemented by [encode]
  38    and [decode] below. *)
  39
  40 (* One could also think, say, of adding [n] to every displacement, so as
  41    to ensure that all displacements are nonnegative. This would work, but
  42    would require [n] to be published, for use by the decoder. *)
  43
  44 let encode (displacement : int) : int =
  45   if displacement >= 0 then
  46     displacement lsl 1
  47   else
  48     (-displacement) lsl 1 + 1
  49
  50 let decode (displacement : int) : int =
  51   if displacement land 1 = 0 then
  52     displacement lsr 1
  53   else
  54     -(displacement lsr 1)
  55
  56 (* It is reasonable to assume that, as matrices grow large, their
  57    density becomes low, i.e., they have many insignificant entries.
  58    As a result, it is important to work with a sparse data structure
  59    for rows. We internally represent a row as a list of its
  60    significant entries, where each entry is a pair of a [j] index and
  61    an element. *)
  62
  63 type 'a row =
  64     (int * 'a) list
  65
  66 (* [compress equal insignificant dummy m n t] turns the two-dimensional table
  67    [t] into a compressed table. The parameter [equal] is equality of data
  68    values. The parameter [wildcard] tells which data values are insignificant,
  69    and can thus be overwritten with other values. The parameter [dummy] is
  70    used to fill holes in the data array. [m] and [n] are the integer
  71    dimensions of the table [t]. *)
  72
  73 let compress
  74     (equal : 'a -> 'a -> bool)
  75     (insignificant : 'a -> bool)
  76     (dummy : 'a)
  77     (m : int) (n : int)
  78     (t : 'a array array)
  79     : 'a table =
  80
  81   (* Be defensive. *)
  82
  83   assert (Array.length t = m);
  84   assert begin
  85     for i = 0 to m - 1 do
  86       assert (Array.length t.(i) = n)
  87     done;
  88     true
  89   end;
  90
  91   (* This turns a row-as-array into a row-as-sparse-list. *)
  92
  93   let sparse (line : 'a array) : 'a row =
  94
  95     let rec loop (j : int) (row : 'a row) =
  96       if j < 0 then
  97         row
  98       else
  99         let x = line.(j) in
 100         loop
 101           (j - 1)
 102           (if insignificant x then row else (j, x) :: row)
 103     in
 104
 105     loop (n - 1) []
 106
 107   in
 108
 109   (* Define the rank of a row as its number of significant entries. *)
 110
 111   let rank (row : 'a row) : int =
 112     List.length row
 113   in
 114
 115   (* Construct a list of all rows, together with their index and rank. *)
 116
 117   let rows : (int * int * 'a row) list = (* index, rank, row *)
 118     Array.to_list (
 119       Array.mapi (fun i line ->
 120         let row = sparse line in
 121         i, rank row, row
 122       ) t
 123     )
 124   in
 125
 126   (* Sort this list by decreasing rank. This does not have any impact
 127      on correctness, but reportedly improves compression. The
 128      intuitive idea is that rows with few significant elements are
 129      easy to fit, so they should be inserted last, after the problem
 130      has become quite constrained by fitting the heavier rows. This
 131      heuristic is attributed to Ziegler. *)
 132
 133   let rows =
 134     List.sort (fun (_, rank1, _) (_, rank2, _) ->
 135       compare rank2 rank1
 136     ) rows
 137   in
 138
 139   (* Allocate a one-dimensional array of displacements. *)
 140
 141   let displacement : int array =
 142     Array.make m 0
 143   in
 144
 145   (* Allocate a one-dimensional, infinite array of values. Indices
 146      into this array are written [k]. *)
 147
 148   let data : 'a InfiniteArray.t =
 149     InfiniteArray.make dummy
 150   in
 151
 152   (* Determine whether [row] fits at offset [k] within the current [data]
 153      array, up to extension of this array. *)
 154
 155   (* Note that this check always succeeds when [k] equals the length of
 156      the [data] array. Indeed, the loop is then skipped. This property
 157      guarantees the termination of the recursive function [fit] below. *)
 158
 159   let fits k (row : 'a row) : bool =
 160
 161     let d = InfiniteArray.extent data in
 162
 163     let rec loop = function
 164       | [] ->
 165           true
 166       | (j, x) :: row ->
 167
 168           (* [x] is a significant element. *)
 169
 170           (* By hypothesis, [k + j] is nonnegative. If it is greater than or
 171              equal to the current length of the data array, stop -- the row
 172              fits. *)
 173
 174           assert (k + j >= 0);
 175
 176           if k + j >= d then
 177             true
 178
 179           (* We now know that [k + j] is within bounds of the data
 180              array. Check whether it is compatible with the element [y] found
 181              there. If it is, continue. If it isn't, stop -- the row does not
 182              fit. *)
 183
 184           else
 185             let y = InfiniteArray.get data (k + j) in
 186             if insignificant y || equal x y then
 187               loop row
 188             else
 189               false
 190
 191     in
 192     loop row
 193
 194   in
 195
 196   (* Find the leftmost position where a row fits. *)
 197
 198   (* If the leftmost significant element in this row is at offset [j],
 199      then we can hope to fit as far left as [-j] -- so this element
 200      lands at offset [0] in the data array. *)
 201
 202   (* Note that displacements may be negative. This means that, for
 203      insignificant elements, accesses to the data array could fail: they could
 204      be out of bounds, either towards the left or towards the right. This is
 205      not a problem, as long as [get] is invoked only at significant
 206      elements. *)
 207
 208   let rec fit k row : int =
 209     if fits k row then
 210       k
 211     else
 212       fit (k + 1) row
 213   in
 214
 215   let fit row =
 216     match row with
 217     | [] ->
 218         0 (* irrelevant *)
 219     | (j, _) :: _ ->
 220         fit (-j) row
 221   in
 222
 223   (* Write [row] at (compatible) offset [k]. *)
 224
 225   let rec write k = function
 226     | [] ->
 227         ()
 228     | (j, x) :: row ->
 229         InfiniteArray.set data (k + j) x;
 230         write k row
 231   in
 232
 233   (* Iterate over the sorted list of rows. Fit and write each row at
 234      the leftmost compatible offset. Update the displacement table. *)
 235
 236   let () =
 237     List.iter (fun (i, _, row) ->
 238       let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *)
 239       write k row;
 240       displacement.(i) <- encode k
 241     ) rows
 242   in
 243
 244   (* Return the compressed tables. *)
 245
 246   displacement, InfiniteArray.domain data
 247
 248 (* [get ct i j] returns the value found at indices [i] and [j] in the
 249    compressed table [ct]. This function call is permitted only if the
 250    value found at indices [i] and [j] in the original table is
 251    significant -- otherwise, it could fail abruptly. *)
 252
 253 (* Together, [compress] and [get] have the property that, if the value
 254    found at indices [i] and [j] in an uncompressed table [t] is
 255    significant, then [get (compress t) i j] is equal to that value. *)
 256
 257 let get (displacement, data) i j =
 258   assert (0 <= i && i < Array.length displacement);
 259   let k = decode displacement.(i) in
 260   assert (0 <= k + j && k + j < Array.length data);
 261     (* failure of this assertion indicates an attempt to access an
 262        insignificant element that happens to be mapped out of the bounds
 263        of the [data] array. *)
 264   data.(k + j)
 265
 266 (* [getget] is a variant of [get] which only requires read access,
 267    via accessors, to the two components of the table. *)
 268
 269 let getget get_displacement get_data (displacement, data) i j =
 270   let k = decode (get_displacement displacement i) in
 271   get_data data (k + j)
 272