src/smlnj-lib/binary-set-fn.sml

   1 (* binary-set-fn.sml
   2  *
   3  * COPYRIGHT (c) 1993 by AT&T Bell Laboratories.  See COPYRIGHT file for details.
   4  *
   5  * This code was adapted from Stephen Adams' binary tree implementation
   6  * of applicative integer sets.
   7  *
   8  *    Copyright 1992 Stephen Adams.
   9  *
  10  *    This software may be used freely provided that:
  11  *      1. This copyright notice is attached to any copy, derived work,
  12  *         or work including all or part of this software.
  13  *      2. Any derived work must contain a prominent notice stating that
  14  *         it has been altered from the original.
  15  *
  16  *   Name(s): Stephen Adams.
  17  *   Department, Institution: Electronics & Computer Science,
  18  *      University of Southampton
  19  *   Address:  Electronics & Computer Science
  20  *             University of Southampton
  21  *         Southampton  SO9 5NH
  22  *         Great Britian
  23  *   E-mail:   sra@ecs.soton.ac.uk
  24  *
  25  *   Comments:
  26  *
  27  *     1.  The implementation is based on Binary search trees of Bounded
  28  *         Balance, similar to Nievergelt & Reingold, SIAM J. Computing
  29  *         2(1), March 1973.  The main advantage of these trees is that
  30  *         they keep the size of the tree in the node, giving a constant
  31  *         time size operation.
  32  *
  33  *     2.  The bounded balance criterion is simpler than N&R's alpha.
  34  *         Simply, one subtree must not have more than `weight' times as
  35  *         many elements as the opposite subtree.  Rebalancing is
  36  *         guaranteed to reinstate the criterion for weight>2.23, but
  37  *         the occasional incorrect behaviour for weight=2 is not
  38  *         detrimental to performance.
  39  *
  40  *     3.  There are two implementations of union.  The default,
  41  *         hedge_union, is much more complex and usually 20% faster.  I
  42  *         am not sure that the performance increase warrants the
  43  *         complexity (and time it took to write), but I am leaving it
  44  *         in for the competition.  It is derived from the original
  45  *         union by replacing the split_lt(gt) operations with a lazy
  46  *         version. The `obvious' version is called old_union.
  47  *
  48  *     4.  Most time is spent in T', the rebalancing constructor.  If my
  49  *         understanding of the output of *<file> in the sml batch
  50  *         compiler is correct then the code produced by NJSML 0.75
  51  *         (sparc) for the final case is very disappointing.  Most
  52  *         invocations fall through to this case and most of these cases
  53  *         fall to the else part, i.e. the plain contructor,
  54  *         T(v,ln+rn+1,l,r).  The poor code allocates a 16 word vector
  55  *         and saves lots of registers into it.  In the common case it
  56  *         then retrieves a few of the registers and allocates the 5
  57  *         word T node.  The values that it retrieves were live in
  58  *         registers before the massive save.
  59  *
  60  *   Modified to functor to support general ordered values
  61  *)
  62
  63 functor BinarySetFn (K : ORD_KEY) : ORD_SET =
  64   struct
  65
  66     structure Key = K
  67
  68     type item = K.ord_key
  69
  70     datatype set
  71       = E
  72       | T of {
  73           elt : item,
  74           cnt : int,
  75           left : set,
  76           right : set
  77         }
  78
  79     fun numItems E = 0
  80       | numItems (T{cnt,...}) = cnt
  81
  82     fun isEmpty E = true
  83       | isEmpty _ = false
  84
  85     fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r}
  86
  87       (* N(v,l,r) = T(v,1+numItems(l)+numItems(r),l,r) *)
  88     fun N(v,E,E) = mkT(v,1,E,E)
  89       | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r)
  90       | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E)
  91       | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r)
  92
  93     fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z)
  94       | single_L _ = raise Match
  95     fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z))
  96       | single_R _ = raise Match
  97     fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) =
  98           N(b,N(a,w,x),N(c,y,z))
  99       | double_L _ = raise Match
 100     fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) =
 101           N(b,N(a,w,x),N(c,y,z))
 102       | double_R _ = raise Match
 103
 104     (*
 105     **  val weight = 3
 106     **  fun wt i = weight * i
 107     *)
 108     fun wt (i : int) = i + i + i
 109
 110     fun T' (v,E,E) = mkT(v,1,E,E)
 111       | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r)
 112       | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E)
 113
 114       | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p
 115       | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p
 116
 117         (* these cases almost never happen with small weight*)
 118       | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) =
 119             if ln<rn then single_L p else double_L p
 120       | T' (p as (_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) =
 121             if ln>rn then single_R p else double_R p
 122
 123       | T' (p as (_,E,T{left=E,...})) = single_L p
 124       | T' (p as (_,T{right=E,...},E)) = single_R p
 125
 126       | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr},
 127               r as T{elt=rv,cnt=rn,left=rl,right=rr})) =
 128           if rn >= wt ln (*right is too big*)
 129             then
 130               let val rln = numItems rl
 131                   val rrn = numItems rr
 132               in
 133                 if rln < rrn then single_L p else double_L p
 134               end
 135           else if ln >= wt rn (*left is too big*)
 136             then
 137               let val lln = numItems ll
 138                   val lrn = numItems lr
 139               in
 140                 if lrn < lln then single_R p else double_R p
 141               end
 142           else mkT(v,ln+rn+1,l,r)
 143
 144     fun add (E,x) = mkT(x,1,E,E)
 145       | add (set as T{elt=v,left=l,right=r,cnt},x) =
 146           case K.compare(x,v) of
 147             LESS => T'(v,add(l,x),r)
 148           | GREATER => T'(v,l,add(r,x))
 149           | EQUAL => mkT(x,cnt,l,r)
 150     fun add' (s, x) = add(x, s)
 151
 152     fun concat3 (E,v,r) = add(r,v)
 153       | concat3 (l,v,E) = add(l,v)
 154       | concat3 (l as T{elt=v1,cnt=n1,left=l1,right=r1}, v,
 155                   r as T{elt=v2,cnt=n2,left=l2,right=r2}) =
 156         if wt n1 < n2 then T'(v2,concat3(l,v,l2),r2)
 157         else if wt n2 < n1 then T'(v1,l1,concat3(r1,v,r))
 158         else N(v,l,r)
 159
 160     fun split_lt (E,x) = E
 161       | split_lt (T{elt=v,left=l,right=r,...},x) =
 162           case K.compare(v,x) of
 163             GREATER => split_lt(l,x)
 164           | LESS => concat3(l,v,split_lt(r,x))
 165           | _ => l
 166
 167     fun split_gt (E,x) = E
 168       | split_gt (T{elt=v,left=l,right=r,...},x) =
 169           case K.compare(v,x) of
 170             LESS => split_gt(r,x)
 171           | GREATER => concat3(split_gt(l,x),v,r)
 172           | _ => r
 173
 174     fun min (T{elt=v,left=E,...}) = v
 175       | min (T{left=l,...}) = min l
 176       | min _ = raise Match
 177
 178     fun delmin (T{left=E,right=r,...}) = r
 179       | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r)
 180       | delmin _ = raise Match
 181
 182     fun delete' (E,r) = r
 183       | delete' (l,E) = l
 184       | delete' (l,r) = T'(min r,l,delmin r)
 185
 186     fun concat (E, s) = s
 187       | concat (s, E) = s
 188       | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1},
 189                   t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) =
 190           if wt n1 < n2 then T'(v2,concat(t1,l2),r2)
 191           else if wt n2 < n1 then T'(v1,l1,concat(r1,t2))
 192           else T'(min t2,t1, delmin t2)
 193
 194
 195     local
 196       fun trim (lo,hi,E) = E
 197         | trim (lo,hi,s as T{elt=v,left=l,right=r,...}) =
 198             if K.compare(v,lo) = GREATER
 199               then if K.compare(v,hi) = LESS then s else trim(lo,hi,l)
 200               else trim(lo,hi,r)
 201
 202       fun uni_bd (s,E,_,_) = s
 203         | uni_bd (E,T{elt=v,left=l,right=r,...},lo,hi) =
 204              concat3(split_gt(l,lo),v,split_lt(r,hi))
 205         | uni_bd (T{elt=v,left=l1,right=r1,...},
 206                    s2 as T{elt=v2,left=l2,right=r2,...},lo,hi) =
 207             concat3(uni_bd(l1,trim(lo,v,s2),lo,v),
 208                 v,
 209                 uni_bd(r1,trim(v,hi,s2),v,hi))
 210               (* inv:  lo < v < hi *)
 211
 212         (* all the other versions of uni and trim are
 213          * specializations of the above two functions with
 214          *     lo=-infinity and/or hi=+infinity
 215          *)
 216
 217       fun trim_lo (_, E) = E
 218         | trim_lo (lo,s as T{elt=v,right=r,...}) =
 219             case K.compare(v,lo) of
 220               GREATER => s
 221             | _ => trim_lo(lo,r)
 222
 223       fun trim_hi (_, E) = E
 224         | trim_hi (hi,s as T{elt=v,left=l,...}) =
 225             case K.compare(v,hi) of
 226               LESS => s
 227             | _ => trim_hi(hi,l)
 228
 229       fun uni_hi (s,E,_) = s
 230         | uni_hi (E,T{elt=v,left=l,right=r,...},hi) =
 231              concat3(l,v,split_lt(r,hi))
 232         | uni_hi (T{elt=v,left=l1,right=r1,...},
 233                    s2 as T{elt=v2,left=l2,right=r2,...},hi) =
 234             concat3(uni_hi(l1,trim_hi(v,s2),v),v,uni_bd(r1,trim(v,hi,s2),v,hi))
 235
 236       fun uni_lo (s,E,_) = s
 237         | uni_lo (E,T{elt=v,left=l,right=r,...},lo) =
 238              concat3(split_gt(l,lo),v,r)
 239         | uni_lo (T{elt=v,left=l1,right=r1,...},
 240                    s2 as T{elt=v2,left=l2,right=r2,...},lo) =
 241             concat3(uni_bd(l1,trim(lo,v,s2),lo,v),v,uni_lo(r1,trim_lo(v,s2),v))
 242
 243       fun uni (s,E) = s
 244         | uni (E,s) = s
 245         | uni (T{elt=v,left=l1,right=r1,...},
 246                 s2 as T{elt=v2,left=l2,right=r2,...}) =
 247             concat3(uni_hi(l1,trim_hi(v,s2),v), v, uni_lo(r1,trim_lo(v,s2),v))
 248
 249     in
 250       val hedge_union = uni
 251     end
 252
 253       (* The old_union version is about 20% slower than
 254        *  hedge_union in most cases
 255        *)
 256     fun old_union (E,s2)  = s2
 257       | old_union (s1,E)  = s1
 258       | old_union (T{elt=v,left=l,right=r,...},s2) =
 259           let val l2 = split_lt(s2,v)
 260               val r2 = split_gt(s2,v)
 261           in
 262             concat3(old_union(l,l2),v,old_union(r,r2))
 263           end
 264
 265     val empty = E
 266     fun singleton x = T{elt=x,cnt=1,left=E,right=E}
 267
 268     fun addList (s,l) = List.foldl (fn (i,s) => add(s,i)) s l
 269
 270     val add = add
 271
 272     fun member (set, x) = let
 273           fun pk E = false
 274             | pk (T{elt=v, left=l, right=r, ...}) = (
 275                 case K.compare(x,v)
 276                  of LESS => pk l
 277                   | EQUAL => true
 278                   | GREATER => pk r
 279                 (* end case *))
 280           in
 281             pk set
 282           end
 283
 284     local
 285         (* true if every item in t is in t' *)
 286       fun treeIn (t,t') = let
 287             fun isIn E = true
 288               | isIn (T{elt,left=E,right=E,...}) = member(t',elt)
 289               | isIn (T{elt,left,right=E,...}) =
 290                   member(t',elt) andalso isIn left
 291               | isIn (T{elt,left=E,right,...}) =
 292                   member(t',elt) andalso isIn right
 293               | isIn (T{elt,left,right,...}) =
 294                   member(t',elt) andalso isIn left andalso isIn right
 295             in
 296               isIn t
 297             end
 298     in
 299     fun isSubset (E,_) = true
 300       | isSubset (_,E) = false
 301       | isSubset (t as T{cnt=n,...},t' as T{cnt=n',...}) =
 302           (n<=n') andalso treeIn (t,t')
 303
 304     fun equal (E,E) = true
 305       | equal (t as T{cnt=n,...},t' as T{cnt=n',...}) =
 306           (n=n') andalso treeIn (t,t')
 307       | equal _ = false
 308     end
 309
 310     local
 311       fun next ((t as T{right, ...})::rest) = (t, left(right, rest))
 312         | next _ = (E, [])
 313       and left (E, rest) = rest
 314         | left (t as T{left=l, ...}, rest) = left(l, t::rest)
 315     in
 316     fun compare (s1, s2) = let
 317           fun cmp (t1, t2) = (case (next t1, next t2)
 318                  of ((E, _), (E, _)) => EQUAL
 319                   | ((E, _), _) => LESS
 320                   | (_, (E, _)) => GREATER
 321                   | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => (
 322                       case Key.compare(e1, e2)
 323                        of EQUAL => cmp (r1, r2)
 324                         | order => order
 325                       (* end case *))
 326                 (* end case *))
 327           in
 328             cmp (left(s1, []), left(s2, []))
 329           end
 330     end
 331
 332     fun delete (E,x) = raise LibBase.NotFound
 333       | delete (set as T{elt=v,left=l,right=r,...},x) =
 334           case K.compare(x,v) of
 335             LESS => T'(v,delete(l,x),r)
 336           | GREATER => T'(v,l,delete(r,x))
 337           | _ => delete'(l,r)
 338
 339     val union = hedge_union
 340
 341     fun intersection (E, _) = E
 342       | intersection (_, E) = E
 343       | intersection (s, T{elt=v,left=l,right=r,...}) = let
 344           val l2 = split_lt(s,v)
 345           val r2 = split_gt(s,v)
 346           in
 347             if member(s,v)
 348               then concat3(intersection(l2,l),v,intersection(r2,r))
 349               else concat(intersection(l2,l),intersection(r2,r))
 350           end
 351
 352     fun difference (E,s) = E
 353       | difference (s,E)  = s
 354       | difference (s, T{elt=v,left=l,right=r,...}) =
 355           let val l2 = split_lt(s,v)
 356               val r2 = split_gt(s,v)
 357           in
 358             concat(difference(l2,l),difference(r2,r))
 359           end
 360
 361     fun map f set = let
 362           fun map'(acc, E) = acc
 363             | map'(acc, T{elt,left,right,...}) =
 364                 map' (add (map' (acc, left), f elt), right)
 365           in
 366             map' (E, set)
 367           end
 368
 369     fun app apf =
 370          let fun apply E = ()
 371                | apply (T{elt,left,right,...}) =
 372                    (apply left;apf elt; apply right)
 373          in
 374            apply
 375          end
 376
 377     fun foldl f b set = let
 378           fun foldf (E, b) = b
 379             | foldf (T{elt,left,right,...}, b) =
 380                 foldf (right, f(elt, foldf (left, b)))
 381           in
 382             foldf (set, b)
 383           end
 384
 385     fun foldr f b set = let
 386           fun foldf (E, b) = b
 387             | foldf (T{elt,left,right,...}, b) =
 388                 foldf (left, f(elt, foldf (right, b)))
 389           in
 390             foldf (set, b)
 391           end
 392
 393     fun listItems set = foldr (op::) [] set
 394
 395     fun filter pred set =
 396           foldl (fn (item, s) => if (pred item) then add(s, item) else s)
 397             empty set
 398
 399     fun partition pred set =
 400           foldl
 401             (fn (item, (s1, s2)) =>
 402                 if (pred item) then (add(s1, item), s2) else (s1, add(s2, item))
 403             )
 404               (empty, empty) set
 405
 406     fun find p E = NONE
 407       | find p (T{elt,left,right,...}) = (case find p left
 408            of NONE => if (p elt)
 409                 then SOME elt
 410                 else find p right
 411             | a => a
 412           (* end case *))
 413
 414     fun exists p E = false
 415       | exists p (T{elt, left, right,...}) =
 416           (exists p left) orelse (p elt) orelse (exists p right)
 417
 418   end (* BinarySetFn *)