3 * COPYRIGHT (c
) 1993 by AT
&T Bell Laboratories
. See COPYRIGHT file for details
.
5 * This code was adapted from Stephen Adams
' binary tree implementation
6 * of applicative integer sets
.
8 * Copyright
1992 Stephen Adams
.
10 * This software may be used freely provided that
:
11 * 1. This copyright notice is attached to any copy
, derived work
,
12 * or work including all or part
of this software
.
13 * 2. Any derived work must contain a prominent notice stating that
14 * it has been altered from the original
.
16 * Name(s
): Stephen Adams
.
17 * Department
, Institution
: Electronics
& Computer Science
,
18 * University
of Southampton
19 * Address
: Electronics
& Computer Science
20 * University
of Southampton
23 * E
-mail
: sra@ecs
.soton
.ac
.uk
27 * 1. The implementation is based on Binary search trees
of Bounded
28 * Balance
, similar to Nievergelt
& Reingold
, SIAM J
. Computing
29 * 2(1), March
1973. The main advantage
of these trees is that
30 * they keep the size
of the tree
in the node
, giving a constant
31 * time size operation
.
33 * 2. The bounded balance criterion is simpler than N
&R
's alpha
.
34 * Simply
, one subtree must not have more than `weight
' times
as
35 * many elements
as the opposite subtree
. Rebalancing is
36 * guaranteed to reinstate the criterion for weight
>2.23, but
37 * the occasional incorrect behaviour for weight
=2 is not
38 * detrimental to performance
.
40 * 3. There are two implementations
of union
. The default
,
41 * hedge_union
, is much more complex
and usually
20% faster
. I
42 * am not sure that the performance increase warrants the
43 * complexity (and time it took to write
), but I am leaving it
44 * in for the competition
. It is derived from the original
45 * union by replacing the
split_lt(gt
) operations
with a lazy
46 * version
. The `obvious
' version is called old_union
.
48 * 4. Most time is spent
in T
', the rebalancing constructor
. If my
49 * understanding
of the output
of *<file
> in the sml batch
50 * compiler is correct
then the code produced by NJSML
0.75
51 * (sparc
) for the final
case is very disappointing
. Most
52 * invocations fall through to this
case and most
of these cases
53 * fall to the
else part
, i
.e
. the plain contructor
,
54 * T(v
,ln
+rn
+1,l
,r
). The poor code allocates a
16 word vector
55 * and saves lots
of registers into it
. In the common
case it
56 * then retrieves a few
of the registers
and allocates the
5
57 * word T node
. The values that it retrieves were live
in
58 * registers
before the massive save
.
60 * Modified to
functor to support general ordered values
63 functor BinarySetFn (K
: ORD_KEY
) : ORD_SET
=
80 |
numItems (T
{cnt
,...}) = cnt
85 fun mkT(v
,n
,l
,r
) = T
{elt
=v
,cnt
=n
,left
=l
,right
=r
}
87 (* N(v
,l
,r
) = T(v
,1+numItems(l
)+numItems(r
),l
,r
) *)
88 fun N(v
,E
,E
) = mkT(v
,1,E
,E
)
89 |
N(v
,E
,r
as T
{cnt
=n
,...}) = mkT(v
,n
+1,E
,r
)
90 |
N(v
,l
as T
{cnt
=n
,...}, E
) = mkT(v
,n
+1,l
,E
)
91 |
N(v
,l
as T
{cnt
=n
,...}, r
as T
{cnt
=m
,...}) = mkT(v
,n
+m
+1,l
,r
)
93 fun single_L (a
,x
,T
{elt
=b
,left
=y
,right
=z
,...}) = N(b
,N(a
,x
,y
),z
)
94 | single_L _
= raise Match
95 fun single_R (b
,T
{elt
=a
,left
=x
,right
=y
,...},z
) = N(a
,x
,N(b
,y
,z
))
96 | single_R _
= raise Match
97 fun double_L (a
,w
,T
{elt
=c
,left
=T
{elt
=b
,left
=x
,right
=y
,...},right
=z
,...}) =
98 N(b
,N(a
,w
,x
),N(c
,y
,z
))
99 | double_L _
= raise Match
100 fun double_R (c
,T
{elt
=a
,left
=w
,right
=T
{elt
=b
,left
=x
,right
=y
,...},...},z
) =
101 N(b
,N(a
,w
,x
),N(c
,y
,z
))
102 | double_R _
= raise Match
106 ** fun wt i
= weight
* i
108 fun wt (i
: int) = i
+ i
+ i
110 fun T
' (v
,E
,E
) = mkT(v
,1,E
,E
)
111 | T
' (v
,E
,r
as T
{left
=E
,right
=E
,...}) = mkT(v
,2,E
,r
)
112 | T
' (v
,l
as T
{left
=E
,right
=E
,...},E
) = mkT(v
,2,l
,E
)
114 | T
' (p
as (_
,E
,T
{left
=T _
,right
=E
,...})) = double_L p
115 | T
' (p
as (_
,T
{left
=E
,right
=T _
,...},E
)) = double_R p
117 (* these cases almost never happen
with small weight
*)
118 | T
' (p
as (_
,E
,T
{left
=T
{cnt
=ln
,...},right
=T
{cnt
=rn
,...},...})) =
119 if ln
<rn
then single_L p
else double_L p
120 | T
' (p
as (_
,T
{left
=T
{cnt
=ln
,...},right
=T
{cnt
=rn
,...},...},E
)) =
121 if ln
>rn
then single_R p
else double_R p
123 | T
' (p
as (_
,E
,T
{left
=E
,...})) = single_L p
124 | T
' (p
as (_
,T
{right
=E
,...},E
)) = single_R p
126 | T
' (p
as (v
,l
as T
{elt
=lv
,cnt
=ln
,left
=ll
,right
=lr
},
127 r
as T
{elt
=rv
,cnt
=rn
,left
=rl
,right
=rr
})) =
128 if rn
>= wt
ln (*right is too big
*)
130 let val rln
= numItems rl
131 val rrn
= numItems rr
133 if rln
< rrn
then single_L p
else double_L p
135 else if ln
>= wt
rn (*left is too big
*)
137 let val lln
= numItems ll
138 val lrn
= numItems lr
140 if lrn
< lln
then single_R p
else double_R p
142 else mkT(v
,ln
+rn
+1,l
,r
)
144 fun add (E
,x
) = mkT(x
,1,E
,E
)
145 |
add (set
as T
{elt
=v
,left
=l
,right
=r
,cnt
},x
) =
146 case K
.compare(x
,v
) of
147 LESS
=> T
'(v
,add(l
,x
),r
)
148 | GREATER
=> T
'(v
,l
,add(r
,x
))
149 | EQUAL
=> mkT(x
,cnt
,l
,r
)
150 fun add
' (s
, x
) = add(x
, s
)
152 fun concat3 (E
,v
,r
) = add(r
,v
)
153 |
concat3 (l
,v
,E
) = add(l
,v
)
154 |
concat3 (l
as T
{elt
=v1
,cnt
=n1
,left
=l1
,right
=r1
}, v
,
155 r
as T
{elt
=v2
,cnt
=n2
,left
=l2
,right
=r2
}) =
156 if wt n1
< n2
then T
'(v2
,concat3(l
,v
,l2
),r2
)
157 else if wt n2
< n1
then T
'(v1
,l1
,concat3(r1
,v
,r
))
160 fun split_lt (E
,x
) = E
161 |
split_lt (T
{elt
=v
,left
=l
,right
=r
,...},x
) =
162 case K
.compare(v
,x
) of
163 GREATER
=> split_lt(l
,x
)
164 | LESS
=> concat3(l
,v
,split_lt(r
,x
))
167 fun split_gt (E
,x
) = E
168 |
split_gt (T
{elt
=v
,left
=l
,right
=r
,...},x
) =
169 case K
.compare(v
,x
) of
170 LESS
=> split_gt(r
,x
)
171 | GREATER
=> concat3(split_gt(l
,x
),v
,r
)
174 fun min (T
{elt
=v
,left
=E
,...}) = v
175 |
min (T
{left
=l
,...}) = min l
176 | min _
= raise Match
178 fun delmin (T
{left
=E
,right
=r
,...}) = r
179 |
delmin (T
{elt
=v
,left
=l
,right
=r
,...}) = T
'(v
,delmin l
,r
)
180 | delmin _
= raise Match
182 fun delete
' (E
,r
) = r
184 | delete
' (l
,r
) = T
'(min r
,l
,delmin r
)
186 fun concat (E
, s
) = s
188 |
concat (t1
as T
{elt
=v1
,cnt
=n1
,left
=l1
,right
=r1
},
189 t2
as T
{elt
=v2
,cnt
=n2
,left
=l2
,right
=r2
}) =
190 if wt n1
< n2
then T
'(v2
,concat(t1
,l2
),r2
)
191 else if wt n2
< n1
then T
'(v1
,l1
,concat(r1
,t2
))
192 else T
'(min t2
,t1
, delmin t2
)
196 fun trim (lo
,hi
,E
) = E
197 |
trim (lo
,hi
,s
as T
{elt
=v
,left
=l
,right
=r
,...}) =
198 if K
.compare(v
,lo
) = GREATER
199 then if K
.compare(v
,hi
) = LESS
then s
else trim(lo
,hi
,l
)
202 fun uni_bd (s
,E
,_
,_
) = s
203 |
uni_bd (E
,T
{elt
=v
,left
=l
,right
=r
,...},lo
,hi
) =
204 concat3(split_gt(l
,lo
),v
,split_lt(r
,hi
))
205 |
uni_bd (T
{elt
=v
,left
=l1
,right
=r1
,...},
206 s2
as T
{elt
=v2
,left
=l2
,right
=r2
,...},lo
,hi
) =
207 concat3(uni_bd(l1
,trim(lo
,v
,s2
),lo
,v
),
209 uni_bd(r1
,trim(v
,hi
,s2
),v
,hi
))
210 (* inv
: lo
< v
< hi
*)
212 (* all the other versions
of uni
and trim are
213 * specializations
of the above two functions
with
214 * lo
=-infinity
and/or hi
=+infinity
217 fun trim_lo (_
, E
) = E
218 |
trim_lo (lo
,s
as T
{elt
=v
,right
=r
,...}) =
219 case K
.compare(v
,lo
) of
223 fun trim_hi (_
, E
) = E
224 |
trim_hi (hi
,s
as T
{elt
=v
,left
=l
,...}) =
225 case K
.compare(v
,hi
) of
229 fun uni_hi (s
,E
,_
) = s
230 |
uni_hi (E
,T
{elt
=v
,left
=l
,right
=r
,...},hi
) =
231 concat3(l
,v
,split_lt(r
,hi
))
232 |
uni_hi (T
{elt
=v
,left
=l1
,right
=r1
,...},
233 s2
as T
{elt
=v2
,left
=l2
,right
=r2
,...},hi
) =
234 concat3(uni_hi(l1
,trim_hi(v
,s2
),v
),v
,uni_bd(r1
,trim(v
,hi
,s2
),v
,hi
))
236 fun uni_lo (s
,E
,_
) = s
237 |
uni_lo (E
,T
{elt
=v
,left
=l
,right
=r
,...},lo
) =
238 concat3(split_gt(l
,lo
),v
,r
)
239 |
uni_lo (T
{elt
=v
,left
=l1
,right
=r1
,...},
240 s2
as T
{elt
=v2
,left
=l2
,right
=r2
,...},lo
) =
241 concat3(uni_bd(l1
,trim(lo
,v
,s2
),lo
,v
),v
,uni_lo(r1
,trim_lo(v
,s2
),v
))
245 |
uni (T
{elt
=v
,left
=l1
,right
=r1
,...},
246 s2
as T
{elt
=v2
,left
=l2
,right
=r2
,...}) =
247 concat3(uni_hi(l1
,trim_hi(v
,s2
),v
), v
, uni_lo(r1
,trim_lo(v
,s2
),v
))
250 val hedge_union
= uni
253 (* The old_union version is about
20% slower than
254 * hedge_union
in most cases
256 fun old_union (E
,s2
) = s2
257 |
old_union (s1
,E
) = s1
258 |
old_union (T
{elt
=v
,left
=l
,right
=r
,...},s2
) =
259 let val l2
= split_lt(s2
,v
)
260 val r2
= split_gt(s2
,v
)
262 concat3(old_union(l
,l2
),v
,old_union(r
,r2
))
266 fun singleton x
= T
{elt
=x
,cnt
=1,left
=E
,right
=E
}
268 fun addList (s
,l
) = List.foldl (fn (i
,s
) => add(s
,i
)) s l
272 fun member (set
, x
) = let
274 |
pk (T
{elt
=v
, left
=l
, right
=r
, ...}) = (
285 (* true if every item
in t is
in t
' *)
286 fun treeIn (t
,t
') = let
288 |
isIn (T
{elt
,left
=E
,right
=E
,...}) = member(t
',elt
)
289 |
isIn (T
{elt
,left
,right
=E
,...}) =
290 member(t
',elt
) andalso isIn left
291 |
isIn (T
{elt
,left
=E
,right
,...}) =
292 member(t
',elt
) andalso isIn right
293 |
isIn (T
{elt
,left
,right
,...}) =
294 member(t
',elt
) andalso isIn left
andalso isIn right
299 fun isSubset (E
,_
) = true
300 |
isSubset (_
,E
) = false
301 |
isSubset (t
as T
{cnt
=n
,...},t
' as T
{cnt
=n
',...}) =
302 (n
<=n
') andalso treeIn (t
,t
')
304 fun equal (E
,E
) = true
305 |
equal (t
as T
{cnt
=n
,...},t
' as T
{cnt
=n
',...}) =
306 (n
=n
') andalso treeIn (t
,t
')
311 fun next ((t
as T
{right
, ...})::rest
) = (t
, left(right
, rest
))
313 and left (E
, rest
) = rest
314 |
left (t
as T
{left
=l
, ...}, rest
) = left(l
, t
::rest
)
316 fun compare (s1
, s2
) = let
317 fun cmp (t1
, t2
) = (case (next t1
, next t2
)
318 of ((E
, _
), (E
, _
)) => EQUAL
319 |
((E
, _
), _
) => LESS
320 |
(_
, (E
, _
)) => GREATER
321 |
((T
{elt
=e1
, ...}, r1
), (T
{elt
=e2
, ...}, r2
)) => (
322 case Key
.compare(e1
, e2
)
323 of EQUAL
=> cmp (r1
, r2
)
328 cmp (left(s1
, []), left(s2
, []))
332 fun delete (E
,x
) = raise LibBase
.NotFound
333 |
delete (set
as T
{elt
=v
,left
=l
,right
=r
,...},x
) =
334 case K
.compare(x
,v
) of
335 LESS
=> T
'(v
,delete(l
,x
),r
)
336 | GREATER
=> T
'(v
,l
,delete(r
,x
))
339 val union
= hedge_union
341 fun intersection (E
, _
) = E
342 |
intersection (_
, E
) = E
343 |
intersection (s
, T
{elt
=v
,left
=l
,right
=r
,...}) = let
344 val l2
= split_lt(s
,v
)
345 val r2
= split_gt(s
,v
)
348 then concat3(intersection(l2
,l
),v
,intersection(r2
,r
))
349 else concat(intersection(l2
,l
),intersection(r2
,r
))
352 fun difference (E
,s
) = E
353 |
difference (s
,E
) = s
354 |
difference (s
, T
{elt
=v
,left
=l
,right
=r
,...}) =
355 let val l2
= split_lt(s
,v
)
356 val r2
= split_gt(s
,v
)
358 concat(difference(l2
,l
),difference(r2
,r
))
362 fun map
'(acc
, E
) = acc
363 | map
'(acc
, T
{elt
,left
,right
,...}) =
364 map
' (add (map
' (acc
, left
), f elt
), right
)
371 |
apply (T
{elt
,left
,right
,...}) =
372 (apply left
;apf elt
; apply right
)
377 fun foldl f b set
= let
379 |
foldf (T
{elt
,left
,right
,...}, b
) =
380 foldf (right
, f(elt
, foldf (left
, b
)))
385 fun foldr f b set
= let
387 |
foldf (T
{elt
,left
,right
,...}, b
) =
388 foldf (left
, f(elt
, foldf (right
, b
)))
393 fun listItems set
= foldr (op::) [] set
395 fun filter pred set
=
396 foldl (fn (item
, s
) => if (pred item
) then add(s
, item
) else s
)
399 fun partition pred set
=
401 (fn (item
, (s1
, s2
)) =>
402 if (pred item
) then (add(s1
, item
), s2
) else (s1
, add(s2
, item
))
407 | find
p (T
{elt
,left
,right
,...}) = (case find p left
408 of NONE
=> if (p elt
)
414 fun exists p E
= false
415 | exists
p (T
{elt
, left
, right
,...}) =
416 (exists p left
) orelse (p elt
) orelse (exists p right
)
418 end (* BinarySetFn
*)