Initial import
[hcoop/zz_old/domtool.git] / src / smlnj-lib / binary-set-fn.sml
1 (* binary-set-fn.sml
2 *
3 * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details.
4 *
5 * This code was adapted from Stephen Adams' binary tree implementation
6 * of applicative integer sets.
7 *
8 * Copyright 1992 Stephen Adams.
9 *
10 * This software may be used freely provided that:
11 * 1. This copyright notice is attached to any copy, derived work,
12 * or work including all or part of this software.
13 * 2. Any derived work must contain a prominent notice stating that
14 * it has been altered from the original.
15 *
16 * Name(s): Stephen Adams.
17 * Department, Institution: Electronics & Computer Science,
18 * University of Southampton
19 * Address: Electronics & Computer Science
20 * University of Southampton
21 * Southampton SO9 5NH
22 * Great Britian
23 * E-mail: sra@ecs.soton.ac.uk
24 *
25 * Comments:
26 *
27 * 1. The implementation is based on Binary search trees of Bounded
28 * Balance, similar to Nievergelt & Reingold, SIAM J. Computing
29 * 2(1), March 1973. The main advantage of these trees is that
30 * they keep the size of the tree in the node, giving a constant
31 * time size operation.
32 *
33 * 2. The bounded balance criterion is simpler than N&R's alpha.
34 * Simply, one subtree must not have more than `weight' times as
35 * many elements as the opposite subtree. Rebalancing is
36 * guaranteed to reinstate the criterion for weight>2.23, but
37 * the occasional incorrect behaviour for weight=2 is not
38 * detrimental to performance.
39 *
40 * 3. There are two implementations of union. The default,
41 * hedge_union, is much more complex and usually 20% faster. I
42 * am not sure that the performance increase warrants the
43 * complexity (and time it took to write), but I am leaving it
44 * in for the competition. It is derived from the original
45 * union by replacing the split_lt(gt) operations with a lazy
46 * version. The `obvious' version is called old_union.
47 *
48 * 4. Most time is spent in T', the rebalancing constructor. If my
49 * understanding of the output of *<file> in the sml batch
50 * compiler is correct then the code produced by NJSML 0.75
51 * (sparc) for the final case is very disappointing. Most
52 * invocations fall through to this case and most of these cases
53 * fall to the else part, i.e. the plain contructor,
54 * T(v,ln+rn+1,l,r). The poor code allocates a 16 word vector
55 * and saves lots of registers into it. In the common case it
56 * then retrieves a few of the registers and allocates the 5
57 * word T node. The values that it retrieves were live in
58 * registers before the massive save.
59 *
60 * Modified to functor to support general ordered values
61 *)
62
63 functor BinarySetFn (K : ORD_KEY) : ORD_SET =
64 struct
65
66 structure Key = K
67
68 type item = K.ord_key
69
70 datatype set
71 = E
72 | T of {
73 elt : item,
74 cnt : int,
75 left : set,
76 right : set
77 }
78
79 fun numItems E = 0
80 | numItems (T{cnt,...}) = cnt
81
82 fun isEmpty E = true
83 | isEmpty _ = false
84
85 fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r}
86
87 (* N(v,l,r) = T(v,1+numItems(l)+numItems(r),l,r) *)
88 fun N(v,E,E) = mkT(v,1,E,E)
89 | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r)
90 | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E)
91 | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r)
92
93 fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z)
94 | single_L _ = raise Match
95 fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z))
96 | single_R _ = raise Match
97 fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) =
98 N(b,N(a,w,x),N(c,y,z))
99 | double_L _ = raise Match
100 fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) =
101 N(b,N(a,w,x),N(c,y,z))
102 | double_R _ = raise Match
103
104 (*
105 ** val weight = 3
106 ** fun wt i = weight * i
107 *)
108 fun wt (i : int) = i + i + i
109
110 fun T' (v,E,E) = mkT(v,1,E,E)
111 | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r)
112 | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E)
113
114 | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p
115 | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p
116
117 (* these cases almost never happen with small weight*)
118 | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) =
119 if ln<rn then single_L p else double_L p
120 | T' (p as (_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) =
121 if ln>rn then single_R p else double_R p
122
123 | T' (p as (_,E,T{left=E,...})) = single_L p
124 | T' (p as (_,T{right=E,...},E)) = single_R p
125
126 | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr},
127 r as T{elt=rv,cnt=rn,left=rl,right=rr})) =
128 if rn >= wt ln (*right is too big*)
129 then
130 let val rln = numItems rl
131 val rrn = numItems rr
132 in
133 if rln < rrn then single_L p else double_L p
134 end
135 else if ln >= wt rn (*left is too big*)
136 then
137 let val lln = numItems ll
138 val lrn = numItems lr
139 in
140 if lrn < lln then single_R p else double_R p
141 end
142 else mkT(v,ln+rn+1,l,r)
143
144 fun add (E,x) = mkT(x,1,E,E)
145 | add (set as T{elt=v,left=l,right=r,cnt},x) =
146 case K.compare(x,v) of
147 LESS => T'(v,add(l,x),r)
148 | GREATER => T'(v,l,add(r,x))
149 | EQUAL => mkT(x,cnt,l,r)
150 fun add' (s, x) = add(x, s)
151
152 fun concat3 (E,v,r) = add(r,v)
153 | concat3 (l,v,E) = add(l,v)
154 | concat3 (l as T{elt=v1,cnt=n1,left=l1,right=r1}, v,
155 r as T{elt=v2,cnt=n2,left=l2,right=r2}) =
156 if wt n1 < n2 then T'(v2,concat3(l,v,l2),r2)
157 else if wt n2 < n1 then T'(v1,l1,concat3(r1,v,r))
158 else N(v,l,r)
159
160 fun split_lt (E,x) = E
161 | split_lt (T{elt=v,left=l,right=r,...},x) =
162 case K.compare(v,x) of
163 GREATER => split_lt(l,x)
164 | LESS => concat3(l,v,split_lt(r,x))
165 | _ => l
166
167 fun split_gt (E,x) = E
168 | split_gt (T{elt=v,left=l,right=r,...},x) =
169 case K.compare(v,x) of
170 LESS => split_gt(r,x)
171 | GREATER => concat3(split_gt(l,x),v,r)
172 | _ => r
173
174 fun min (T{elt=v,left=E,...}) = v
175 | min (T{left=l,...}) = min l
176 | min _ = raise Match
177
178 fun delmin (T{left=E,right=r,...}) = r
179 | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r)
180 | delmin _ = raise Match
181
182 fun delete' (E,r) = r
183 | delete' (l,E) = l
184 | delete' (l,r) = T'(min r,l,delmin r)
185
186 fun concat (E, s) = s
187 | concat (s, E) = s
188 | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1},
189 t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) =
190 if wt n1 < n2 then T'(v2,concat(t1,l2),r2)
191 else if wt n2 < n1 then T'(v1,l1,concat(r1,t2))
192 else T'(min t2,t1, delmin t2)
193
194
195 local
196 fun trim (lo,hi,E) = E
197 | trim (lo,hi,s as T{elt=v,left=l,right=r,...}) =
198 if K.compare(v,lo) = GREATER
199 then if K.compare(v,hi) = LESS then s else trim(lo,hi,l)
200 else trim(lo,hi,r)
201
202 fun uni_bd (s,E,_,_) = s
203 | uni_bd (E,T{elt=v,left=l,right=r,...},lo,hi) =
204 concat3(split_gt(l,lo),v,split_lt(r,hi))
205 | uni_bd (T{elt=v,left=l1,right=r1,...},
206 s2 as T{elt=v2,left=l2,right=r2,...},lo,hi) =
207 concat3(uni_bd(l1,trim(lo,v,s2),lo,v),
208 v,
209 uni_bd(r1,trim(v,hi,s2),v,hi))
210 (* inv: lo < v < hi *)
211
212 (* all the other versions of uni and trim are
213 * specializations of the above two functions with
214 * lo=-infinity and/or hi=+infinity
215 *)
216
217 fun trim_lo (_, E) = E
218 | trim_lo (lo,s as T{elt=v,right=r,...}) =
219 case K.compare(v,lo) of
220 GREATER => s
221 | _ => trim_lo(lo,r)
222
223 fun trim_hi (_, E) = E
224 | trim_hi (hi,s as T{elt=v,left=l,...}) =
225 case K.compare(v,hi) of
226 LESS => s
227 | _ => trim_hi(hi,l)
228
229 fun uni_hi (s,E,_) = s
230 | uni_hi (E,T{elt=v,left=l,right=r,...},hi) =
231 concat3(l,v,split_lt(r,hi))
232 | uni_hi (T{elt=v,left=l1,right=r1,...},
233 s2 as T{elt=v2,left=l2,right=r2,...},hi) =
234 concat3(uni_hi(l1,trim_hi(v,s2),v),v,uni_bd(r1,trim(v,hi,s2),v,hi))
235
236 fun uni_lo (s,E,_) = s
237 | uni_lo (E,T{elt=v,left=l,right=r,...},lo) =
238 concat3(split_gt(l,lo),v,r)
239 | uni_lo (T{elt=v,left=l1,right=r1,...},
240 s2 as T{elt=v2,left=l2,right=r2,...},lo) =
241 concat3(uni_bd(l1,trim(lo,v,s2),lo,v),v,uni_lo(r1,trim_lo(v,s2),v))
242
243 fun uni (s,E) = s
244 | uni (E,s) = s
245 | uni (T{elt=v,left=l1,right=r1,...},
246 s2 as T{elt=v2,left=l2,right=r2,...}) =
247 concat3(uni_hi(l1,trim_hi(v,s2),v), v, uni_lo(r1,trim_lo(v,s2),v))
248
249 in
250 val hedge_union = uni
251 end
252
253 (* The old_union version is about 20% slower than
254 * hedge_union in most cases
255 *)
256 fun old_union (E,s2) = s2
257 | old_union (s1,E) = s1
258 | old_union (T{elt=v,left=l,right=r,...},s2) =
259 let val l2 = split_lt(s2,v)
260 val r2 = split_gt(s2,v)
261 in
262 concat3(old_union(l,l2),v,old_union(r,r2))
263 end
264
265 val empty = E
266 fun singleton x = T{elt=x,cnt=1,left=E,right=E}
267
268 fun addList (s,l) = List.foldl (fn (i,s) => add(s,i)) s l
269
270 val add = add
271
272 fun member (set, x) = let
273 fun pk E = false
274 | pk (T{elt=v, left=l, right=r, ...}) = (
275 case K.compare(x,v)
276 of LESS => pk l
277 | EQUAL => true
278 | GREATER => pk r
279 (* end case *))
280 in
281 pk set
282 end
283
284 local
285 (* true if every item in t is in t' *)
286 fun treeIn (t,t') = let
287 fun isIn E = true
288 | isIn (T{elt,left=E,right=E,...}) = member(t',elt)
289 | isIn (T{elt,left,right=E,...}) =
290 member(t',elt) andalso isIn left
291 | isIn (T{elt,left=E,right,...}) =
292 member(t',elt) andalso isIn right
293 | isIn (T{elt,left,right,...}) =
294 member(t',elt) andalso isIn left andalso isIn right
295 in
296 isIn t
297 end
298 in
299 fun isSubset (E,_) = true
300 | isSubset (_,E) = false
301 | isSubset (t as T{cnt=n,...},t' as T{cnt=n',...}) =
302 (n<=n') andalso treeIn (t,t')
303
304 fun equal (E,E) = true
305 | equal (t as T{cnt=n,...},t' as T{cnt=n',...}) =
306 (n=n') andalso treeIn (t,t')
307 | equal _ = false
308 end
309
310 local
311 fun next ((t as T{right, ...})::rest) = (t, left(right, rest))
312 | next _ = (E, [])
313 and left (E, rest) = rest
314 | left (t as T{left=l, ...}, rest) = left(l, t::rest)
315 in
316 fun compare (s1, s2) = let
317 fun cmp (t1, t2) = (case (next t1, next t2)
318 of ((E, _), (E, _)) => EQUAL
319 | ((E, _), _) => LESS
320 | (_, (E, _)) => GREATER
321 | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => (
322 case Key.compare(e1, e2)
323 of EQUAL => cmp (r1, r2)
324 | order => order
325 (* end case *))
326 (* end case *))
327 in
328 cmp (left(s1, []), left(s2, []))
329 end
330 end
331
332 fun delete (E,x) = raise LibBase.NotFound
333 | delete (set as T{elt=v,left=l,right=r,...},x) =
334 case K.compare(x,v) of
335 LESS => T'(v,delete(l,x),r)
336 | GREATER => T'(v,l,delete(r,x))
337 | _ => delete'(l,r)
338
339 val union = hedge_union
340
341 fun intersection (E, _) = E
342 | intersection (_, E) = E
343 | intersection (s, T{elt=v,left=l,right=r,...}) = let
344 val l2 = split_lt(s,v)
345 val r2 = split_gt(s,v)
346 in
347 if member(s,v)
348 then concat3(intersection(l2,l),v,intersection(r2,r))
349 else concat(intersection(l2,l),intersection(r2,r))
350 end
351
352 fun difference (E,s) = E
353 | difference (s,E) = s
354 | difference (s, T{elt=v,left=l,right=r,...}) =
355 let val l2 = split_lt(s,v)
356 val r2 = split_gt(s,v)
357 in
358 concat(difference(l2,l),difference(r2,r))
359 end
360
361 fun map f set = let
362 fun map'(acc, E) = acc
363 | map'(acc, T{elt,left,right,...}) =
364 map' (add (map' (acc, left), f elt), right)
365 in
366 map' (E, set)
367 end
368
369 fun app apf =
370 let fun apply E = ()
371 | apply (T{elt,left,right,...}) =
372 (apply left;apf elt; apply right)
373 in
374 apply
375 end
376
377 fun foldl f b set = let
378 fun foldf (E, b) = b
379 | foldf (T{elt,left,right,...}, b) =
380 foldf (right, f(elt, foldf (left, b)))
381 in
382 foldf (set, b)
383 end
384
385 fun foldr f b set = let
386 fun foldf (E, b) = b
387 | foldf (T{elt,left,right,...}, b) =
388 foldf (left, f(elt, foldf (right, b)))
389 in
390 foldf (set, b)
391 end
392
393 fun listItems set = foldr (op::) [] set
394
395 fun filter pred set =
396 foldl (fn (item, s) => if (pred item) then add(s, item) else s)
397 empty set
398
399 fun partition pred set =
400 foldl
401 (fn (item, (s1, s2)) =>
402 if (pred item) then (add(s1, item), s2) else (s1, add(s2, item))
403 )
404 (empty, empty) set
405
406 fun find p E = NONE
407 | find p (T{elt,left,right,...}) = (case find p left
408 of NONE => if (p elt)
409 then SOME elt
410 else find p right
411 | a => a
412 (* end case *))
413
414 fun exists p E = false
415 | exists p (T{elt, left, right,...}) =
416 (exists p left) orelse (p elt) orelse (exists p right)
417
418 end (* BinarySetFn *)