1 (**************************************************************************)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the GNU Library General Public License, with the *)
11 (* special exception on linking described in file LICENSE. *)
13 (**************************************************************************)
15 (* This module compresses a two-dimensional table, where some values
16 are considered insignificant, via row displacement. *)
18 (* This idea reportedly appears in Aho and Ullman's ``Principles
19 of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's
20 ``Storing a Sparse Table'' (1979) and in Dencker, Dürre, and Heuft's
21 ``Optimization of Parser Tables for Portable Compilers'' (1984). *)
23 (* A compressed table is represented as a pair of arrays. The
24 displacement array is an array of offsets into the data array. *)
27 int array
* (* displacement *)
30 (* In a natural version of this algorithm, displacements would be greater
31 than (or equal to) [-n]. However, in the particular setting of Menhir,
32 both arrays are intended to be compressed with [PackedIntArray], which
33 does not efficiently support negative numbers. For this reason, we are
34 careful not to produce negative displacements. *)
36 (* In order to avoid producing negative displacements, we simply use the
37 least significant bit as the sign bit. This is implemented by [encode]
38 and [decode] below. *)
40 (* One could also think, say, of adding [n] to every displacement, so as
41 to ensure that all displacements are nonnegative. This would work, but
42 would require [n] to be published, for use by the decoder. *)
44 let encode (displacement
: int) : int =
45 if displacement
>= 0 then
48 (-displacement
) lsl 1 + 1
50 let decode (displacement
: int) : int =
51 if displacement
land 1 = 0 then
56 (* It is reasonable to assume that, as matrices grow large, their
57 density becomes low, i.e., they have many insignificant entries.
58 As a result, it is important to work with a sparse data structure
59 for rows. We internally represent a row as a list of its
60 significant entries, where each entry is a pair of a [j] index and
66 (* [compress equal insignificant dummy m n t] turns the two-dimensional table
67 [t] into a compressed table. The parameter [equal] is equality of data
68 values. The parameter [wildcard] tells which data values are insignificant,
69 and can thus be overwritten with other values. The parameter [dummy] is
70 used to fill holes in the data array. [m] and [n] are the integer
71 dimensions of the table [t]. *)
74 (equal
: 'a
-> 'a
-> bool)
75 (insignificant
: 'a
-> bool)
83 assert (Array.length t
= m
);
86 assert (Array.length t
.(i
) = n
)
91 (* This turns a row-as-array into a row-as-sparse-list. *)
93 let sparse (line
: 'a array
) : 'a row
=
95 let rec loop (j
: int) (row
: 'a row
) =
102 (if insignificant
x then row
else (j
, x) :: row
)
109 (* Define the rank of a row as its number of significant entries. *)
111 let rank (row
: 'a row
) : int =
115 (* Construct a list of all rows, together with their index and rank. *)
117 let rows : (int * int * 'a row
) list
= (* index, rank, row *)
119 Array.mapi
(fun i line
->
120 let row = sparse line
in
126 (* Sort this list by decreasing rank. This does not have any impact
127 on correctness, but reportedly improves compression. The
128 intuitive idea is that rows with few significant elements are
129 easy to fit, so they should be inserted last, after the problem
130 has become quite constrained by fitting the heavier rows. This
131 heuristic is attributed to Ziegler. *)
134 List.sort
(fun (_
, rank1
, _
) (_
, rank2
, _
) ->
139 (* Allocate a one-dimensional array of displacements. *)
141 let displacement : int array
=
145 (* Allocate a one-dimensional, infinite array of values. Indices
146 into this array are written [k]. *)
148 let data : 'a
InfiniteArray.t
=
149 InfiniteArray.make dummy
152 (* Determine whether [row] fits at offset [k] within the current [data]
153 array, up to extension of this array. *)
155 (* Note that this check always succeeds when [k] equals the length of
156 the [data] array. Indeed, the loop is then skipped. This property
157 guarantees the termination of the recursive function [fit] below. *)
159 let fits k
(row : 'a
row) : bool =
161 let d = InfiniteArray.extent
data in
163 let rec loop = function
168 (* [x] is a significant element. *)
170 (* By hypothesis, [k + j] is nonnegative. If it is greater than or
171 equal to the current length of the data array, stop -- the row
179 (* We now know that [k + j] is within bounds of the data
180 array. Check whether it is compatible with the element [y] found
181 there. If it is, continue. If it isn't, stop -- the row does not
185 let y = InfiniteArray.get
data (k
+ j
) in
186 if insignificant
y || equal
x y then
196 (* Find the leftmost position where a row fits. *)
198 (* If the leftmost significant element in this row is at offset [j],
199 then we can hope to fit as far left as [-j] -- so this element
200 lands at offset [0] in the data array. *)
202 (* Note that displacements may be negative. This means that, for
203 insignificant elements, accesses to the data array could fail: they could
204 be out of bounds, either towards the left or towards the right. This is
205 not a problem, as long as [get] is invoked only at significant
208 let rec fit k
row : int =
223 (* Write [row] at (compatible) offset [k]. *)
225 let rec write k
= function
229 InfiniteArray.set
data (k
+ j
) x;
233 (* Iterate over the sorted list of rows. Fit and write each row at
234 the leftmost compatible offset. Update the displacement table. *)
237 List.iter
(fun (i
, _
, row) ->
238 let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *)
240 displacement.(i
) <- encode k
244 (* Return the compressed tables. *)
246 displacement, InfiniteArray.domain
data
248 (* [get ct i j] returns the value found at indices [i] and [j] in the
249 compressed table [ct]. This function call is permitted only if the
250 value found at indices [i] and [j] in the original table is
251 significant -- otherwise, it could fail abruptly. *)
253 (* Together, [compress] and [get] have the property that, if the value
254 found at indices [i] and [j] in an uncompressed table [t] is
255 significant, then [get (compress t) i j] is equal to that value. *)
257 let get (displacement, data) i j
=
258 assert (0 <= i
&& i
< Array.length
displacement);
259 let k = decode displacement.(i
) in
260 assert (0 <= k + j
&& k + j
< Array.length
data);
261 (* failure of this assertion indicates an attempt to access an
262 insignificant element that happens to be mapped out of the bounds
263 of the [data] array. *)
266 (* [getget] is a variant of [get] which only requires read access,
267 via accessors, to the two components of the table. *)
269 let getget get_displacement get_data
(displacement, data) i j
=
270 let k = decode (get_displacement
displacement i
) in
271 get_data
data (k + j
)