Coccinelle release 1.0.0-rc3
[bpt/coccinelle.git] / menhirlib / rowDisplacement.ml
1 (**************************************************************************)
2 (* *)
3 (* Menhir *)
4 (* *)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
7 (* *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the GNU Library General Public License, with the *)
11 (* special exception on linking described in file LICENSE. *)
12 (* *)
13 (**************************************************************************)
14
15 (* This module compresses a two-dimensional table, where some values
16 are considered insignificant, via row displacement. *)
17
18 (* This idea reportedly appears in Aho and Ullman's ``Principles
19 of Compiler Design'' (1977). It is evaluated in Tarjan and Yao's
20 ``Storing a Sparse Table'' (1979) and in Dencker, Dürre, and Heuft's
21 ``Optimization of Parser Tables for Portable Compilers'' (1984). *)
22
23 (* A compressed table is represented as a pair of arrays. The
24 displacement array is an array of offsets into the data array. *)
25
26 type 'a table =
27 int array * (* displacement *)
28 'a array (* data *)
29
30 (* In a natural version of this algorithm, displacements would be greater
31 than (or equal to) [-n]. However, in the particular setting of Menhir,
32 both arrays are intended to be compressed with [PackedIntArray], which
33 does not efficiently support negative numbers. For this reason, we are
34 careful not to produce negative displacements. *)
35
36 (* In order to avoid producing negative displacements, we simply use the
37 least significant bit as the sign bit. This is implemented by [encode]
38 and [decode] below. *)
39
40 (* One could also think, say, of adding [n] to every displacement, so as
41 to ensure that all displacements are nonnegative. This would work, but
42 would require [n] to be published, for use by the decoder. *)
43
44 let encode (displacement : int) : int =
45 if displacement >= 0 then
46 displacement lsl 1
47 else
48 (-displacement) lsl 1 + 1
49
50 let decode (displacement : int) : int =
51 if displacement land 1 = 0 then
52 displacement lsr 1
53 else
54 -(displacement lsr 1)
55
56 (* It is reasonable to assume that, as matrices grow large, their
57 density becomes low, i.e., they have many insignificant entries.
58 As a result, it is important to work with a sparse data structure
59 for rows. We internally represent a row as a list of its
60 significant entries, where each entry is a pair of a [j] index and
61 an element. *)
62
63 type 'a row =
64 (int * 'a) list
65
66 (* [compress equal insignificant dummy m n t] turns the two-dimensional table
67 [t] into a compressed table. The parameter [equal] is equality of data
68 values. The parameter [wildcard] tells which data values are insignificant,
69 and can thus be overwritten with other values. The parameter [dummy] is
70 used to fill holes in the data array. [m] and [n] are the integer
71 dimensions of the table [t]. *)
72
73 let compress
74 (equal : 'a -> 'a -> bool)
75 (insignificant : 'a -> bool)
76 (dummy : 'a)
77 (m : int) (n : int)
78 (t : 'a array array)
79 : 'a table =
80
81 (* Be defensive. *)
82
83 assert (Array.length t = m);
84 assert begin
85 for i = 0 to m - 1 do
86 assert (Array.length t.(i) = n)
87 done;
88 true
89 end;
90
91 (* This turns a row-as-array into a row-as-sparse-list. *)
92
93 let sparse (line : 'a array) : 'a row =
94
95 let rec loop (j : int) (row : 'a row) =
96 if j < 0 then
97 row
98 else
99 let x = line.(j) in
100 loop
101 (j - 1)
102 (if insignificant x then row else (j, x) :: row)
103 in
104
105 loop (n - 1) []
106
107 in
108
109 (* Define the rank of a row as its number of significant entries. *)
110
111 let rank (row : 'a row) : int =
112 List.length row
113 in
114
115 (* Construct a list of all rows, together with their index and rank. *)
116
117 let rows : (int * int * 'a row) list = (* index, rank, row *)
118 Array.to_list (
119 Array.mapi (fun i line ->
120 let row = sparse line in
121 i, rank row, row
122 ) t
123 )
124 in
125
126 (* Sort this list by decreasing rank. This does not have any impact
127 on correctness, but reportedly improves compression. The
128 intuitive idea is that rows with few significant elements are
129 easy to fit, so they should be inserted last, after the problem
130 has become quite constrained by fitting the heavier rows. This
131 heuristic is attributed to Ziegler. *)
132
133 let rows =
134 List.sort (fun (_, rank1, _) (_, rank2, _) ->
135 compare rank2 rank1
136 ) rows
137 in
138
139 (* Allocate a one-dimensional array of displacements. *)
140
141 let displacement : int array =
142 Array.make m 0
143 in
144
145 (* Allocate a one-dimensional, infinite array of values. Indices
146 into this array are written [k]. *)
147
148 let data : 'a InfiniteArray.t =
149 InfiniteArray.make dummy
150 in
151
152 (* Determine whether [row] fits at offset [k] within the current [data]
153 array, up to extension of this array. *)
154
155 (* Note that this check always succeeds when [k] equals the length of
156 the [data] array. Indeed, the loop is then skipped. This property
157 guarantees the termination of the recursive function [fit] below. *)
158
159 let fits k (row : 'a row) : bool =
160
161 let d = InfiniteArray.extent data in
162
163 let rec loop = function
164 | [] ->
165 true
166 | (j, x) :: row ->
167
168 (* [x] is a significant element. *)
169
170 (* By hypothesis, [k + j] is nonnegative. If it is greater than or
171 equal to the current length of the data array, stop -- the row
172 fits. *)
173
174 assert (k + j >= 0);
175
176 if k + j >= d then
177 true
178
179 (* We now know that [k + j] is within bounds of the data
180 array. Check whether it is compatible with the element [y] found
181 there. If it is, continue. If it isn't, stop -- the row does not
182 fit. *)
183
184 else
185 let y = InfiniteArray.get data (k + j) in
186 if insignificant y || equal x y then
187 loop row
188 else
189 false
190
191 in
192 loop row
193
194 in
195
196 (* Find the leftmost position where a row fits. *)
197
198 (* If the leftmost significant element in this row is at offset [j],
199 then we can hope to fit as far left as [-j] -- so this element
200 lands at offset [0] in the data array. *)
201
202 (* Note that displacements may be negative. This means that, for
203 insignificant elements, accesses to the data array could fail: they could
204 be out of bounds, either towards the left or towards the right. This is
205 not a problem, as long as [get] is invoked only at significant
206 elements. *)
207
208 let rec fit k row : int =
209 if fits k row then
210 k
211 else
212 fit (k + 1) row
213 in
214
215 let fit row =
216 match row with
217 | [] ->
218 0 (* irrelevant *)
219 | (j, _) :: _ ->
220 fit (-j) row
221 in
222
223 (* Write [row] at (compatible) offset [k]. *)
224
225 let rec write k = function
226 | [] ->
227 ()
228 | (j, x) :: row ->
229 InfiniteArray.set data (k + j) x;
230 write k row
231 in
232
233 (* Iterate over the sorted list of rows. Fit and write each row at
234 the leftmost compatible offset. Update the displacement table. *)
235
236 let () =
237 List.iter (fun (i, _, row) ->
238 let k = fit row in (* if [row] has leading insignificant elements, then [k] can be negative *)
239 write k row;
240 displacement.(i) <- encode k
241 ) rows
242 in
243
244 (* Return the compressed tables. *)
245
246 displacement, InfiniteArray.domain data
247
248 (* [get ct i j] returns the value found at indices [i] and [j] in the
249 compressed table [ct]. This function call is permitted only if the
250 value found at indices [i] and [j] in the original table is
251 significant -- otherwise, it could fail abruptly. *)
252
253 (* Together, [compress] and [get] have the property that, if the value
254 found at indices [i] and [j] in an uncompressed table [t] is
255 significant, then [get (compress t) i j] is equal to that value. *)
256
257 let get (displacement, data) i j =
258 assert (0 <= i && i < Array.length displacement);
259 let k = decode displacement.(i) in
260 assert (0 <= k + j && k + j < Array.length data);
261 (* failure of this assertion indicates an attempt to access an
262 insignificant element that happens to be mapped out of the bounds
263 of the [data] array. *)
264 data.(k + j)
265
266 (* [getget] is a variant of [get] which only requires read access,
267 via accessors, to the two components of the table. *)
268
269 let getget get_displacement get_data (displacement, data) i j =
270 let k = decode (get_displacement displacement i) in
271 get_data data (k + j)
272