+++ /dev/null
-(**************************************************************************)
-(* *)
-(* Menhir *)
-(* *)
-(* François Pottier, INRIA Rocquencourt *)
-(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
-(* *)
-(* Copyright 2005-2008 Institut National de Recherche en Informatique *)
-(* et en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0, with the change *)
-(* described in file LICENSE. *)
-(* *)
-(**************************************************************************)
-
-(* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore.
- Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia
- trees. *)
-
-(*i --------------------------------------------------------------------------------------------------------------- i*)
-(*s \mysection{Little-endian vs big-endian trees} *)
-
- (* A tree is little-endian if it expects the key's least significant bits to be tested first during a search. It is
- big-endian if it expects the key's most significant bits to be tested first.
-
- Most of the code is independent of this design choice, so it is written as a functor, parameterized by a small
- structure which defines endianness. Here is the interface which must be adhered to by such a structure. *)
-
-module Endianness = struct
-
- module type S = sig
-
- (* A mask is an integer with a single one bit (i.e. a power of 2). *)
-
- type mask = int
-
- (* [branching_bit] accepts two distinct integers and returns a mask which identifies the first bit where they
- differ. The meaning of ``first'' varies according to the endianness being implemented. *)
-
- val branching_bit: int -> int -> mask
-
- (* [mask i m] returns an integer [i'], where all bits which [m] says are relevant are identical to those in [i],
- and all others are set to some unspecified, but fixed value. Which bits are ``relevant'' according to a given
- mask varies according to the endianness being implemented. *)
-
- val mask: int -> mask -> int
-
- (* [shorter m1 m2] returns [true] if and only if [m1] describes a shorter prefix than [m2], i.e. if it makes fewer
- bits relevant. Which bits are ``relevant'' according to a given mask varies according to the endianness being
- implemented. *)
-
- val shorter: mask -> mask -> bool
-
- end
-
- (* Now, let us define [Little] and [Big], two possible [Endiannness] choices. *)
-
- module Little = struct
-
- type mask = int
-
- let lowest_bit x =
- x land (-x)
-
- (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1]
- show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is
- the lowest bit in this bit field, since we are checking least significant bits first. *)
-
- let branching_bit i0 i1 =
- lowest_bit (i0 lxor i1)
-
- (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the right of the single one bit
- in the mask [m]. We keep these bits, and set all others to 0. *)
-
- let mask i m =
- i land (m-1)
-
- (* The smaller [m] is, the fewer bits are relevant. *)
-
- let shorter =
- (<)
-
- end
-
- module Big = struct
-
- type mask = int
-
- let lowest_bit x =
- x land (-x)
-
- let rec highest_bit x =
- let m = lowest_bit x in
- if x = m then
- m
- else
- highest_bit (x - m)
-
- (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1]
- show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is
- the highest bit in this bit field, since we are checking most significant bits first.
-
- In Okasaki's paper, this loop is sped up by computing a conservative initial guess. Indeed, the bit at which
- the two prefixes disagree must be somewhere within the shorter prefix, so we can begin searching at the
- least-significant valid bit in the shorter prefix. Unfortunately, to allow computing the initial guess, the
- main code has to pass in additional parameters, e.g. a mask which describes the length of each prefix. This
- ``pollutes'' the endianness-independent code. For this reason, this optimization isn't implemented here. *)
-
- let branching_bit i0 i1 =
- highest_bit (i0 lxor i1)
-
- (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the left of the single one bit
- in the mask [m]. We keep these bits, and set all others to 0. Okasaki uses a different convention, which allows
- big-endian Patricia trees to masquerade as binary search trees. This feature does not seem to be useful here. *)
-
- let mask i m =
- i land (lnot (2*m-1))
-
- (* The smaller [m] is, the more bits are relevant. *)
-
- let shorter =
- (>)
-
- end
-
-end
-
-(*i --------------------------------------------------------------------------------------------------------------- i*)
-(*s \mysection{Patricia-tree-based maps} *)
-
-module Make (X : Endianness.S) = struct
-
- (* Patricia trees are maps whose keys are integers. *)
-
- type key = int
-
- (* A tree is either empty, or a leaf node, containing both the integer key and a piece of data, or a binary node.
- Each binary node carries two integers. The first one is the longest common prefix of all keys in this
- sub-tree. The second integer is the branching bit. It is an integer with a single one bit (i.e. a power of 2),
- which describes the bit being tested at this node. *)
-
- type 'a t =
- | Empty
- | Leaf of int * 'a
- | Branch of int * X.mask * 'a t * 'a t
-
- (* The empty map. *)
-
- let empty =
- Empty
-
- (* [choose m] returns an arbitrarily chosen binding in [m], if [m]
- is nonempty, and raises [Not_found] otherwise. *)
-
- let rec choose = function
- | Empty ->
- raise Not_found
- | Leaf (key, data) ->
- key, data
- | Branch (_, _, tree0, _) ->
- choose tree0
-
- (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is
- bound to [k].
-
- This implementation takes branches \emph{without} checking whether the key matches the prefix found at the
- current node. This means that a query for a non-existent key shall be detected only when finally reaching
- a leaf, rather than higher up in the tree. This strategy is better when (most) queries are expected to be
- successful. *)
-
- let rec lookup key = function
- | Empty ->
- raise Not_found
- | Leaf (key', data) ->
- if key = key' then
- data
- else
- raise Not_found
- | Branch (_, mask, tree0, tree1) ->
- lookup key (if (key land mask) = 0 then tree0 else tree1)
-
- let find =
- lookup
-
- (* [mem k m] tells whether the key [k] appears in the domain of the
- map [m]. *)
-
- let mem k m =
- try
- let _ = lookup k m in
- true
- with Not_found ->
- false
-
- (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree.
-
- Assume $t_0$ and $t_1$ are non-empty trees, with longest common prefixes $p_0$ and $p_1$, respectively. Further,
- suppose that $p_0$ and $p_1$ disagree, that is, neither prefix is contained in the other. Then, no matter how
- large $t_0$ and $t_1$ are, we can merge them simply by creating a new [Branch] node that has $t_0$ and $t_1$
- as children! *)
-
- let join p0 t0 p1 t1 =
- let m = X.branching_bit p0 p1 in
- let p = X.mask p0 (* for instance *) m in
- if (p0 land m) = 0 then
- Branch(p, m, t0, t1)
- else
- Branch(p, m, t1, t0)
-
- (* The auxiliary function [match_prefix] tells whether a given key has a given prefix. More specifically,
- [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m].
-
- Throughout our implementation of Patricia trees, prefixes are assumed to be in normal form, i.e. their
- irrelevant bits are set to some predictable value. Formally, we assume [X.mask p m] equals [p] whenever [p]
- is a prefix with [m] relevant bits. This allows implementing [match_prefix] using only one call to
- [X.mask]. On the other hand, this requires normalizing prefixes, as done e.g. in [join] above, where
- [X.mask p0 m] has to be used instead of [p0]. *)
-
- let match_prefix k p m =
- X.mask k m = p
-
- (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to
- the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from
- [k] to [decide d0 d]. *)
-
- type 'a decision = 'a -> 'a -> 'a
-
- exception Unchanged
-
- let basic_add decide k d m =
-
- let rec add t =
- match t with
- | Empty ->
- Leaf (k, d)
- | Leaf (k0, d0) ->
- if k = k0 then
- let d' = decide d0 d in
- if d' == d0 then
- raise Unchanged
- else
- Leaf (k, d')
- else
- join k (Leaf (k, d)) k0 t
- | Branch (p, m, t0, t1) ->
- if match_prefix k p m then
- if (k land m) = 0 then Branch (p, m, add t0, t1)
- else Branch (p, m, t0, add t1)
- else
- join k (Leaf (k, d)) p t in
-
- add m
-
- let strict_add k d m =
- basic_add (fun _ _ -> raise Unchanged) k d m
-
- let fine_add decide k d m =
- try
- basic_add decide k d m
- with Unchanged ->
- m
-
- (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum
- [d]. If a binding already exists for [k], it is overridden. *)
-
- let add k d m =
- fine_add (fun old_binding new_binding -> new_binding) k d m
-
- (* [singleton k d] returns a map whose only binding is from [k] to [d]. *)
-
- let singleton k d =
- Leaf (k, d)
-
- (* [is_singleton m] returns [Some (k, d)] if [m] is a singleton map
- that maps [k] to [d]. Otherwise, it returns [None]. *)
-
- let is_singleton = function
- | Leaf (k, d) ->
- Some (k, d)
- | Empty
- | Branch _ ->
- None
-
- (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *)
-
- let is_empty = function
- | Empty ->
- true
- | Leaf _
- | Branch _ ->
- false
-
- (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, its domain's
- cardinal. *)
-
- let rec cardinal = function
- | Empty ->
- 0
- | Leaf _ ->
- 1
- | Branch (_, _, t0, t1) ->
- cardinal t0 + cardinal t1
-
- (* [remove k m] returns the map [m] deprived from any binding involving [k]. *)
-
- let remove key m =
-
- let rec remove = function
- | Empty ->
- raise Not_found
- | Leaf (key', _) ->
- if key = key' then
- Empty
- else
- raise Not_found
- | Branch (prefix, mask, tree0, tree1) ->
- if (key land mask) = 0 then
- match remove tree0 with
- | Empty ->
- tree1
- | tree0 ->
- Branch (prefix, mask, tree0, tree1)
- else
- match remove tree1 with
- | Empty ->
- tree0
- | tree1 ->
- Branch (prefix, mask, tree0, tree1) in
-
- try
- remove m
- with Not_found ->
- m
-
- (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found]
- if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding
- from [k] to [v]. *)
-
- let rec lookup_and_remove key = function
- | Empty ->
- raise Not_found
- | Leaf (key', data) ->
- if key = key' then
- data, Empty
- else
- raise Not_found
- | Branch (prefix, mask, tree0, tree1) ->
- if (key land mask) = 0 then
- match lookup_and_remove key tree0 with
- | data, Empty ->
- data, tree1
- | data, tree0 ->
- data, Branch (prefix, mask, tree0, tree1)
- else
- match lookup_and_remove key tree1 with
- | data, Empty ->
- data, tree0
- | data, tree1 ->
- data, Branch (prefix, mask, tree0, tree1)
-
- let find_and_remove =
- lookup_and_remove
-
- (* [fine_union decide m1 m2] returns the union of the maps [m1] and
- [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1]
- (resp. [m2]), then [decide] is called. It is passed [x1] and
- [x2], and must return the value which shall be bound to [k] in
- the final map. The operation returns [m2] itself (as opposed to a
- copy of it) when its result is equal to [m2]. *)
-
- let reverse decision elem1 elem2 =
- decision elem2 elem1
-
- let fine_union decide m1 m2 =
-
- let rec union s t =
- match s, t with
-
- | Empty, _ ->
- t
- | (Leaf _ | Branch _), Empty ->
- s
-
- | Leaf(key, value), _ ->
- fine_add (reverse decide) key value t
- | Branch _, Leaf(key, value) ->
- fine_add decide key value s
-
- | Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->
- if (p = q) & (m = n) then
-
- (* The trees have the same prefix. Merge their sub-trees. *)
-
- let u0 = union s0 t0
- and u1 = union s1 t1 in
- if t0 == u0 && t1 == u1 then t
- else Branch(p, m, u0, u1)
-
- else if (X.shorter m n) & (match_prefix q p m) then
-
- (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *)
-
- if (q land m) = 0 then
- Branch(p, m, union s0 t, s1)
- else
- Branch(p, m, s0, union s1 t)
-
- else if (X.shorter n m) & (match_prefix p q n) then
-
- (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *)
-
- if (p land n) = 0 then
- let u0 = union s t0 in
- if t0 == u0 then t
- else Branch(q, n, u0, t1)
- else
- let u1 = union s t1 in
- if t1 == u1 then t
- else Branch(q, n, t0, u1)
-
- else
-
- (* The prefixes disagree. *)
-
- join p s q t in
-
- union m1 m2
-
- (* [union m1 m2] returns the union of the maps [m1] and
- [m2]. Bindings in [m2] take precedence over those in [m1]. *)
-
- let union m1 m2 =
- fine_union (fun d d' -> d') m1 m2
-
- (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are
- presented to [f] according to some unspecified, but fixed, order. *)
-
- let rec iter f = function
- | Empty ->
- ()
- | Leaf (key, data) ->
- f key data
- | Branch (_, _, tree0, tree1) ->
- iter f tree0;
- iter f tree1
-
- (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map
- [m]. Keys are presented to [f] in increasing order according to the map's ordering. The initial value of
- [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The
- value returned by [fold] is the final value of [accu]. *)
-
- let rec fold f m accu =
- match m with
- | Empty ->
- accu
- | Leaf (key, data) ->
- f key data accu
- | Branch (_, _, tree0, tree1) ->
- fold f tree1 (fold f tree0 accu)
-
- (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *)
-
- let rec fold_rev f m accu =
- match m with
- | Empty ->
- accu
- | Leaf (key, data) ->
- f key data accu
- | Branch (_, _, tree0, tree1) ->
- fold_rev f tree0 (fold_rev f tree1 accu)
-
- (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have the same domain. Doing so invokes
- [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f]
- according to some unspecified, but fixed, order. *)
-
- let rec iter2 f t1 t2 =
- match t1, t2 with
- | Empty, Empty ->
- ()
- | Leaf (key1, data1), Leaf (key2, data2) ->
- assert (key1 = key2);
- f key1 (* for instance *) data1 data2
- | Branch (p1, m1, left1, right1), Branch (p2, m2, left2, right2) ->
- assert (p1 = p2);
- assert (m1 = m2);
- iter2 f left1 left2;
- iter2 f right1 right2
- | _, _ ->
- assert false
-
- (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map
- $k\mapsto f(m(k))$. *)
-
- let rec map f = function
- | Empty ->
- Empty
- | Leaf (key, data) ->
- Leaf(key, f data)
- | Branch (p, m, tree0, tree1) ->
- Branch (p, m, map f tree0, map f tree1)
-
- (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves
- memory when [f] is the identity function. *)
-
- let rec endo_map f tree =
- match tree with
- | Empty ->
- tree
- | Leaf (key, data) ->
- let data' = f data in
- if data == data' then
- tree
- else
- Leaf(key, data')
- | Branch (p, m, tree0, tree1) ->
- let tree0' = endo_map f tree0 in
- let tree1' = endo_map f tree1 in
- if (tree0' == tree0) & (tree1' == tree1) then
- tree
- else
- Branch (p, m, tree0', tree1')
-
- (* [iterator m] returns a stateful iterator over the map [m]. *)
-
- (* TEMPORARY performance could be improved, see JCF's paper *)
-
- let iterator m =
-
- let remainder = ref [ m ] in
-
- let rec next () =
- match !remainder with
- | [] ->
- None
- | Empty :: parent ->
- remainder := parent;
- next()
- | (Leaf (key, data)) :: parent ->
- remainder := parent;
- Some (key, data)
- | (Branch(_, _, s0, s1)) :: parent ->
- remainder := s0 :: s1 :: parent;
- next () in
-
- next
-
- (* If [dcompare] is an ordering over data, then [compare dcompare]
- is an ordering over maps. *)
-
- exception Got of int
-
- let compare dcompare m1 m2 =
- let iterator2 = iterator m2 in
- try
- iter (fun key1 data1 ->
- match iterator2() with
- | None ->
- raise (Got 1)
- | Some (key2, data2) ->
- let c = Pervasives.compare key1 key2 in
- if c <> 0 then
- raise (Got c)
- else
- let c = dcompare data1 data2 in
- if c <> 0 then
- raise (Got c)
- ) m1;
- match iterator2() with
- | None ->
- 0
- | Some _ ->
- -1
- with Got c ->
- c
-
-(*i --------------------------------------------------------------------------------------------------------------- i*)
-(*s \mysection{Patricia-tree-based sets} *)
-
-(* To enhance code sharing, it would be possible to implement maps as sets of pairs, or (vice-versa) to implement
- sets as maps to the unit element. However, both possibilities introduce some space and time inefficiency. To
- avoid it, we define each structure separately. *)
-
-module Domain = struct
-
- type element = int
-
- type t =
- | Empty
- | Leaf of int
- | Branch of int * X.mask * t * t
-
- (* The empty set. *)
-
- let empty =
- Empty
-
- (* [is_empty s] returns [true] if and only if the set [s] is empty. *)
-
- let is_empty = function
- | Empty ->
- true
- | Leaf _
- | Branch _ ->
- false
-
- (* [singleton x] returns a set whose only element is [x]. *)
-
- let singleton x =
- Leaf x
-
- (* [is_singleton s] returns [Some x] if [s] is a singleton
- containing [x] as its only element; otherwise, it returns
- [None]. *)
-
- let is_singleton = function
- | Leaf x ->
- Some x
- | Empty
- | Branch _ ->
- None
-
- (* [choose s] returns an arbitrarily chosen element of [s], if [s]
- is nonempty, and raises [Not_found] otherwise. *)
-
- let rec choose = function
- | Empty ->
- raise Not_found
- | Leaf x ->
- x
- | Branch (_, _, tree0, _) ->
- choose tree0
-
- (* [cardinal s] returns [s]'s cardinal. *)
-
- let rec cardinal = function
- | Empty ->
- 0
- | Leaf _ ->
- 1
- | Branch (_, _, t0, t1) ->
- cardinal t0 + cardinal t1
-
- (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *)
-
- let rec mem x = function
- | Empty ->
- false
- | Leaf x' ->
- x = x'
- | Branch (_, mask, tree0, tree1) ->
- mem x (if (x land mask) = 0 then tree0 else tree1)
-
- (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. *)
-
- let join p0 t0 p1 t1 =
- let m = X.branching_bit p0 p1 in
- let p = X.mask p0 (* for instance *) m in
- if (p0 land m) = 0 then
- Branch(p, m, t0, t1)
- else
- Branch(p, m, t1, t0)
-
- (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *)
-
- exception Unchanged
-
- let rec strict_add x t =
- match t with
- | Empty ->
- Leaf x
- | Leaf x0 ->
- if x = x0 then
- raise Unchanged
- else
- join x (Leaf x) x0 t
- | Branch (p, m, t0, t1) ->
- if match_prefix x p m then
- if (x land m) = 0 then Branch (p, m, strict_add x t0, t1)
- else Branch (p, m, t0, strict_add x t1)
- else
- join x (Leaf x) p t
-
- let add x s =
- try
- strict_add x s
- with Unchanged ->
- s
-
- (* [make2 x y] creates a set whose elements are [x] and [y]. [x] and [y] need not be distinct. *)
-
- let make2 x y =
- add x (Leaf y)
-
- (* [fine_add] does not make much sense for sets of integers. Better warn the user. *)
-
- type decision = int -> int -> int
-
- let fine_add decision x s =
- assert false
-
- (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *)
-
- let remove x s =
-
- let rec strict_remove = function
- | Empty ->
- raise Not_found
- | Leaf x' ->
- if x = x' then
- Empty
- else
- raise Not_found
- | Branch (prefix, mask, tree0, tree1) ->
- if (x land mask) = 0 then
- match strict_remove tree0 with
- | Empty ->
- tree1
- | tree0 ->
- Branch (prefix, mask, tree0, tree1)
- else
- match strict_remove tree1 with
- | Empty ->
- tree0
- | tree1 ->
- Branch (prefix, mask, tree0, tree1) in
-
- try
- strict_remove s
- with Not_found ->
- s
-
- (* [union s1 s2] returns the union of the sets [s1] and [s2]. *)
-
- let rec union s t =
- match s, t with
-
- | Empty, _ ->
- t
- | _, Empty ->
- s
-
- | Leaf x, _ ->
- add x t
- | _, Leaf x ->
- add x s
-
- | Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->
- if (p = q) & (m = n) then
-
- (* The trees have the same prefix. Merge their sub-trees. *)
-
- let u0 = union s0 t0
- and u1 = union s1 t1 in
- if t0 == u0 && t1 == u1 then t
- else Branch(p, m, u0, u1)
-
- else if (X.shorter m n) & (match_prefix q p m) then
-
- (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *)
-
- if (q land m) = 0 then
- Branch(p, m, union s0 t, s1)
- else
- Branch(p, m, s0, union s1 t)
-
- else if (X.shorter n m) & (match_prefix p q n) then
-
- (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *)
-
- if (p land n) = 0 then
- let u0 = union s t0 in
- if t0 == u0 then t
- else Branch(q, n, u0, t1)
- else
- let u1 = union s t1 in
- if t1 == u1 then t
- else Branch(q, n, t0, u1)
-
- else
-
- (* The prefixes disagree. *)
-
- join p s q t
-
- (* [fine_union] does not make much sense for sets of integers. Better warn the user. *)
-
- let fine_union decision s1 s2 =
- assert false
-
- (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures
- that the newly created node does not have an [Empty] child. *)
-
- let build p m t0 t1 =
- match t0, t1 with
- | Empty, Empty ->
- Empty
- | Empty, _ ->
- t1
- | _, Empty ->
- t0
- | _, _ ->
- Branch(p, m, t0, t1)
-
- (* [diff s t] returns the set difference of [s] and [t], that is, $s\setminus t$. *)
-
- let rec diff s t =
- match s, t with
-
- | Empty, _
- | _, Empty ->
- s
-
- | Leaf x, _ ->
- if mem x t then Empty else s
- | _, Leaf x ->
- remove x s
-
- | Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->
- if (p = q) & (m = n) then
-
- (* The trees have the same prefix. Compute the differences of their sub-trees. *)
-
- build p m (diff s0 t0) (diff s1 t1)
-
- else if (X.shorter m n) & (match_prefix q p m) then
-
- (* [q] contains [p]. Subtract [t] off a sub-tree of [s]. *)
-
- if (q land m) = 0 then
- build p m (diff s0 t) s1
- else
- build p m s0 (diff s1 t)
-
- else if (X.shorter n m) & (match_prefix p q n) then
-
- (* [p] contains [q]. Subtract a sub-tree of [t] off [s]. *)
-
- diff s (if (p land n) = 0 then t0 else t1)
-
- else
-
- (* The prefixes disagree. *)
-
- s
-
- (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *)
-
- let rec inter s t =
- match s, t with
-
- | Empty, _
- | _, Empty ->
- Empty
-
- | (Leaf x as s), t
- | t, (Leaf x as s) ->
- if mem x t then s else Empty
-
- | Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->
- if (p = q) & (m = n) then
-
- (* The trees have the same prefix. Compute the intersections of their sub-trees. *)
-
- build p m (inter s0 t0) (inter s1 t1)
-
- else if (X.shorter m n) & (match_prefix q p m) then
-
- (* [q] contains [p]. Intersect [t] with a sub-tree of [s]. *)
-
- inter (if (q land m) = 0 then s0 else s1) t
-
- else if (X.shorter n m) & (match_prefix p q n) then
-
- (* [p] contains [q]. Intersect [s] with a sub-tree of [t]. *)
-
- inter s (if (p land n) = 0 then t0 else t1)
-
- else
-
- (* The prefixes disagree. *)
-
- Empty
-
- (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection
- is empty. It is a specialized version of [inter], which uses less space. *)
-
- exception NotDisjoint
-
- let disjoint s t =
-
- let rec inter s t =
- match s, t with
-
- | Empty, _
- | _, Empty ->
- ()
-
- | Leaf x, _ ->
- if mem x t then
- raise NotDisjoint
- | _, Leaf x ->
- if mem x s then
- raise NotDisjoint
-
- | Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->
- if (p = q) & (m = n) then begin
- inter s0 t0;
- inter s1 t1
- end
- else if (X.shorter m n) & (match_prefix q p m) then
- inter (if (q land m) = 0 then s0 else s1) t
- else if (X.shorter n m) & (match_prefix p q n) then
- inter s (if (p land n) = 0 then t0 else t1)
- else
- () in
-
- try
- inter s t;
- true
- with NotDisjoint ->
- false
-
- (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] according
- to some unspecified, but fixed, order. *)
-
- let rec iter f = function
- | Empty ->
- ()
- | Leaf x ->
- f x
- | Branch (_, _, tree0, tree1) ->
- iter f tree0;
- iter f tree1
-
- (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f]
- according to some unspecified, but fixed, order. The initial value of [accu] is [seed]; then, at each new call,
- its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final
- value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then
- [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *)
-
- let rec fold f s accu =
- match s with
- | Empty ->
- accu
- | Leaf x ->
- f x accu
- | Branch (_, _, s0, s1) ->
- fold f s1 (fold f s0 accu)
-
- (* [elements s] is a list of all elements in the set [s]. *)
-
- let elements s =
- fold (fun tl hd -> tl :: hd) s []
-
- (* [fold_rev] performs exactly the same job as [fold], but presents elements to [f] in the opposite order. *)
-
- let rec fold_rev f s accu =
- match s with
- | Empty ->
- accu
- | Leaf x ->
- f x accu
- | Branch (_, _, s0, s1) ->
- fold_rev f s0 (fold_rev f s1 accu)
-
- (* [iter2] does not make much sense for sets of integers. Better warn the user. *)
-
- let rec iter2 f t1 t2 =
- assert false
-
- (* [iterator s] returns a stateful iterator over the set [s]. That is, if $s = \{ x_1, x_2, \ldots, x_n \}$, where
- $x_1 < x_2 < \ldots < x_n$, then [iterator s] is a function which, when invoked for the $k^{\text{th}}$ time,
- returns [Some]$x_k$, if $k\leq n$, and [None] otherwise. Such a function can be useful when one wishes to
- iterate over a set's elements, without being restricted by the call stack's discipline.
-
- For more comments about this algorithm, please see module [Baltree], which defines a similar one. *)
-
- let iterator s =
-
- let remainder = ref [ s ] in
-
- let rec next () =
- match !remainder with
- | [] ->
- None
- | Empty :: parent ->
- remainder := parent;
- next()
- | (Leaf x) :: parent ->
- remainder := parent;
- Some x
- | (Branch(_, _, s0, s1)) :: parent ->
- remainder := s0 :: s1 :: parent;
- next () in
-
- next
-
- (* [exists p s] returns [true] if and only if some element of [s] matches the predicate [p]. *)
-
- exception Exists
-
- let exists p s =
- try
- iter (fun x ->
- if p x then
- raise Exists
- ) s;
- false
- with Exists ->
- true
-
- (* [compare] is an ordering over sets. *)
-
- exception Got of int
-
- let compare s1 s2 =
- let iterator2 = iterator s2 in
- try
- iter (fun x1 ->
- match iterator2() with
- | None ->
- raise (Got 1)
- | Some x2 ->
- let c = Pervasives.compare x1 x2 in
- if c <> 0 then
- raise (Got c)
- ) s1;
- match iterator2() with
- | None ->
- 0
- | Some _ ->
- -1
- with Got c ->
- c
-
- (* [equal] implements equality over sets. *)
-
- let equal s1 s2 =
- compare s1 s2 = 0
-
- (* [subset] implements the subset predicate over sets. In other words, [subset s t] returns [true] if and only if
- $s\subseteq t$. It is a specialized version of [diff]. *)
-
- exception NotSubset
-
- let subset s t =
-
- let rec diff s t =
- match s, t with
-
- | Empty, _ ->
- ()
- | _, Empty
-
- | Branch _, Leaf _ ->
- raise NotSubset
- | Leaf x, _ ->
- if not (mem x t) then
- raise NotSubset
-
- | Branch(p, m, s0, s1), Branch(q, n, t0, t1) ->
-
- if (p = q) & (m = n) then begin
-
- diff s0 t0;
- diff s1 t1
-
- end
- else if (X.shorter n m) & (match_prefix p q n) then
-
- diff s (if (p land n) = 0 then t0 else t1)
-
- else
-
- (* Either [q] contains [p], which means at least one of [s]'s sub-trees is not contained within [t],
- or the prefixes disagree. In either case, the subset relationship cannot possibly hold. *)
-
- raise NotSubset in
-
- try
- diff s t;
- true
- with NotSubset ->
- false
-
- (* [filter p s] returns the subset of [s] formed by all elements which satisfy the predicate [p]. *)
-
- let filter predicate s =
- let modified = ref false in
-
- let subset = fold (fun element subset ->
- if predicate element then
- add element subset
- else begin
- modified := true;
- subset
- end
- ) s Empty in
-
- if !modified then
- subset
- else
- s
-
- (* [map f s] computes the image of [s] through [f]. *)
-
- let map f s =
- fold (fun element accu ->
- add (f element) accu
- ) s Empty
-
- (* [monotone_map] and [endo_map] do not make much sense for sets of integers. Better warn the user. *)
-
- let monotone_map f s =
- assert false
-
- let endo_map f s =
- assert false
-
-end
-
-(*i --------------------------------------------------------------------------------------------------------------- i*)
-(*s \mysection{Relating sets and maps} *)
-
- (* Back to the world of maps. Let us now describe the relationship which exists between maps and their domains. *)
-
- (* [domain m] returns [m]'s domain. *)
-
- let rec domain = function
- | Empty ->
- Domain.Empty
- | Leaf (k, _) ->
- Domain.Leaf k
- | Branch (p, m, t0, t1) ->
- Domain.Branch (p, m, domain t0, domain t1)
-
- (* [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *)
-
- let rec lift f = function
- | Domain.Empty ->
- Empty
- | Domain.Leaf k ->
- Leaf (k, f k)
- | Domain.Branch (p, m, t0, t1) ->
- Branch(p, m, lift f t0, lift f t1)
-
- (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures
- that the newly created node does not have an [Empty] child. *)
-
- let build p m t0 t1 =
- match t0, t1 with
- | Empty, Empty ->
- Empty
- | Empty, _ ->
- t1
- | _, Empty ->
- t0
- | _, _ ->
- Branch(p, m, t0, t1)
-
- (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map
- $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. Its code resembles
- [diff]'s. *)
-
- let rec corestrict s t =
- match s, t with
-
- | Empty, _
- | _, Domain.Empty ->
- s
-
- | Leaf (k, _), _ ->
- if Domain.mem k t then Empty else s
- | _, Domain.Leaf k ->
- remove k s
-
- | Branch(p, m, s0, s1), Domain.Branch(q, n, t0, t1) ->
- if (p = q) & (m = n) then
-
- build p m (corestrict s0 t0) (corestrict s1 t1)
-
- else if (X.shorter m n) & (match_prefix q p m) then
-
- if (q land m) = 0 then
- build p m (corestrict s0 t) s1
- else
- build p m s0 (corestrict s1 t)
-
- else if (X.shorter n m) & (match_prefix p q n) then
-
- corestrict s (if (p land n) = 0 then t0 else t1)
-
- else
-
- s
-
-end
-
-(*i --------------------------------------------------------------------------------------------------------------- i*)
-(*s \mysection{Instantiating the functor} *)
-
-module Little = Make(Endianness.Little)
-
-module Big = Make(Endianness.Big)
-