Release coccinelle-0.2.0
[bpt/coccinelle.git] / commons / ocamlextra / setPt.ml
1 (*
2 * Ptset: Sets of integers implemented as Patricia trees.
3 * Copyright (C) 2000 Jean-Christophe FILLIATRE
4 *
5 * This software is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License version 2, as published by the Free Software Foundation.
8 *
9 * This software is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * See the GNU Library General Public License version 2 for more details
14 * (enclosed in the file LGPL).
15 *)
16
17 (*i ptset.ml 1.8 2001/06/28 07:05:55 filliatr Exp i*)
18
19 (*s Sets of integers implemented as Patricia trees, following Chris
20 Okasaki and Andrew Gill's paper {\em Fast Mergeable Integer Maps}
21 ({\tt\small http://www.cs.columbia.edu/\~{}cdo/papers.html\#ml98maps}).
22 Patricia trees provide faster operations than standard library's
23 module [Set], and especially very fast [union], [subset], [inter]
24 and [diff] operations. *)
25
26 (*s The idea behind Patricia trees is to build a {\em trie} on the
27 binary digits of the elements, and to compact the representation
28 by branching only one the relevant bits (i.e. the ones for which
29 there is at least on element in each subtree). We implement here
30 {\em little-endian} Patricia trees: bits are processed from
31 least-significant to most-significant. The trie is implemented by
32 the following type [t]. [Empty] stands for the empty trie, and
33 [Leaf k] for the singleton [k]. (Note that [k] is the actual
34 element.) [Branch (m,p,l,r)] represents a branching, where [p] is
35 the prefix (from the root of the trie) and [m] is the branching
36 bit (a power of 2). [l] and [r] contain the subsets for which the
37 branching bit is respectively 0 and 1. Invariant: the trees [l]
38 and [r] are not empty. *)
39
40 (*i*)
41 type elt = int
42 (*i*)
43
44 type t =
45 | Empty
46 | Leaf of int
47 | Branch of int * int * t * t
48
49 (*s Example: the representation of the set $\{1,4,5\}$ is
50 $$\mathtt{Branch~(0,~1,~Leaf~4,~Branch~(1,~4,~Leaf~1,~Leaf~5))}$$
51 The first branching bit is the bit 0 (and the corresponding prefix
52 is [0b0], not of use here), with $\{4\}$ on the left and $\{1,5\}$ on the
53 right. Then the right subtree branches on bit 2 (and so has a branching
54 value of $2^2 = 4$), with prefix [0b01 = 1]. *)
55
56 (*s Empty set and singletons. *)
57
58 let empty = Empty
59
60 let is_empty = function Empty -> true | _ -> false
61
62 let singleton k = Leaf k
63
64 (*s Testing the occurrence of a value is similar to the search in a
65 binary search tree, where the branching bit is used to select the
66 appropriate subtree. *)
67
68 let zero_bit k m = (k land m) == 0
69
70 let rec mem k = function
71 | Empty -> false
72 | Leaf j -> k == j
73 | Branch (_, m, l, r) -> mem k (if zero_bit k m then l else r)
74
75 (*s The following operation [join] will be used in both insertion and
76 union. Given two non-empty trees [t0] and [t1] with longest common
77 prefixes [p0] and [p1] respectively, which are supposed to
78 disagree, it creates the union of [t0] and [t1]. For this, it
79 computes the first bit [m] where [p0] and [p1] disagree and create
80 a branching node on that bit. Depending on the value of that bit
81 in [p0], [t0] will be the left subtree and [t1] the right one, or
82 the converse. Computing the first branching bit of [p0] and [p1]
83 uses a nice property of twos-complement representation of integers. *)
84
85 let lowest_bit x = x land (-x)
86
87 let branching_bit p0 p1 = lowest_bit (p0 lxor p1)
88
89 let mask p m = p land (m-1)
90
91 let join (p0,t0,p1,t1) =
92 let m = branching_bit p0 p1 in
93 if zero_bit p0 m then
94 Branch (mask p0 m, m, t0, t1)
95 else
96 Branch (mask p0 m, m, t1, t0)
97
98 (*s Then the insertion of value [k] in set [t] is easily implemented
99 using [join]. Insertion in a singleton is just the identity or a
100 call to [join], depending on the value of [k]. When inserting in
101 a branching tree, we first check if the value to insert [k]
102 matches the prefix [p]: if not, [join] will take care of creating
103 the above branching; if so, we just insert [k] in the appropriate
104 subtree, depending of the branching bit. *)
105
106 let match_prefix k p m = (mask k m) == p
107
108 let add k t =
109 let rec ins = function
110 | Empty -> Leaf k
111 | Leaf j as t ->
112 if j == k then t else join (k, Leaf k, j, t)
113 | Branch (p,m,t0,t1) as t ->
114 if match_prefix k p m then
115 if zero_bit k m then
116 Branch (p, m, ins t0, t1)
117 else
118 Branch (p, m, t0, ins t1)
119 else
120 join (k, Leaf k, p, t)
121 in
122 ins t
123
124 (*s The code to remove an element is basically similar to the code of
125 insertion. But since we have to maintain the invariant that both
126 subtrees of a [Branch] node are non-empty, we use here the
127 ``smart constructor'' [branch] instead of [Branch]. *)
128
129 let branch = function
130 | (_,_,Empty,t) -> t
131 | (_,_,t,Empty) -> t
132 | (p,m,t0,t1) -> Branch (p,m,t0,t1)
133
134 let remove k t =
135 let rec rmv = function
136 | Empty -> Empty
137 | Leaf j as t -> if k == j then Empty else t
138 | Branch (p,m,t0,t1) as t ->
139 if match_prefix k p m then
140 if zero_bit k m then
141 branch (p, m, rmv t0, t1)
142 else
143 branch (p, m, t0, rmv t1)
144 else
145 t
146 in
147 rmv t
148
149 (*s One nice property of Patricia trees is to support a fast union
150 operation (and also fast subset, difference and intersection
151 operations). When merging two branching trees we examine the
152 following four cases: (1) the trees have exactly the same
153 prefix; (2/3) one prefix contains the other one; and (4) the
154 prefixes disagree. In cases (1), (2) and (3) the recursion is
155 immediate; in case (4) the function [join] creates the appropriate
156 branching. *)
157
158 let rec merge = function
159 | Empty, t -> t
160 | t, Empty -> t
161 | Leaf k, t -> add k t
162 | t, Leaf k -> add k t
163 | (Branch (p,m,s0,s1) as s), (Branch (q,n,t0,t1) as t) ->
164 if m == n && match_prefix q p m then
165 (* The trees have the same prefix. Merge the subtrees. *)
166 Branch (p, m, merge (s0,t0), merge (s1,t1))
167 else if m < n && match_prefix q p m then
168 (* [q] contains [p]. Merge [t] with a subtree of [s]. *)
169 if zero_bit q m then
170 Branch (p, m, merge (s0,t), s1)
171 else
172 Branch (p, m, s0, merge (s1,t))
173 else if m > n && match_prefix p q n then
174 (* [p] contains [q]. Merge [s] with a subtree of [t]. *)
175 if zero_bit p n then
176 Branch (q, n, merge (s,t0), t1)
177 else
178 Branch (q, n, t0, merge (s,t1))
179 else
180 (* The prefixes disagree. *)
181 join (p, s, q, t)
182
183 let union s t = merge (s,t)
184
185 (*s When checking if [s1] is a subset of [s2] only two of the above
186 four cases are relevant: when the prefixes are the same and when the
187 prefix of [s1] contains the one of [s2], and then the recursion is
188 obvious. In the other two cases, the result is [false]. *)
189
190 let rec subset s1 s2 = match (s1,s2) with
191 | Empty, _ -> true
192 | _, Empty -> false
193 | Leaf k1, _ -> mem k1 s2
194 | Branch _, Leaf _ -> false
195 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
196 if m1 == m2 && p1 == p2 then
197 subset l1 l2 && subset r1 r2
198 else if m1 > m2 && match_prefix p1 p2 m2 then
199 if zero_bit p1 m2 then
200 subset l1 l2 && subset r1 l2
201 else
202 subset l1 r2 && subset r1 r2
203 else
204 false
205
206 (*s To compute the intersection and the difference of two sets, we
207 still examine the same four cases as in [merge]. The recursion is
208 then obvious. *)
209
210 let rec inter s1 s2 = match (s1,s2) with
211 | Empty, _ -> Empty
212 | _, Empty -> Empty
213 | Leaf k1, _ -> if mem k1 s2 then s1 else Empty
214 | _, Leaf k2 -> if mem k2 s1 then s2 else Empty
215 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
216 if m1 == m2 && p1 == p2 then
217 merge (inter l1 l2, inter r1 r2)
218 else if m1 < m2 && match_prefix p2 p1 m1 then
219 inter (if zero_bit p2 m1 then l1 else r1) s2
220 else if m1 > m2 && match_prefix p1 p2 m2 then
221 inter s1 (if zero_bit p1 m2 then l2 else r2)
222 else
223 Empty
224
225 let rec diff s1 s2 = match (s1,s2) with
226 | Empty, _ -> Empty
227 | _, Empty -> s1
228 | Leaf k1, _ -> if mem k1 s2 then Empty else s1
229 | _, Leaf k2 -> remove k2 s1
230 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
231 if m1 == m2 && p1 == p2 then
232 merge (diff l1 l2, diff r1 r2)
233 else if m1 < m2 && match_prefix p2 p1 m1 then
234 if zero_bit p2 m1 then
235 merge (diff l1 s2, r1)
236 else
237 merge (l1, diff r1 s2)
238 else if m1 > m2 && match_prefix p1 p2 m2 then
239 if zero_bit p1 m2 then diff s1 l2 else diff s1 r2
240 else
241 s1
242
243 (*s All the following operations ([cardinal], [iter], [fold], [for_all],
244 [exists], [filter], [partition], [choose], [elements]) are
245 implemented as for any other kind of binary trees. *)
246
247 let rec cardinal = function
248 | Empty -> 0
249 | Leaf _ -> 1
250 | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1
251
252 let rec iter f = function
253 | Empty -> ()
254 | Leaf k -> f k
255 | Branch (_,_,t0,t1) -> iter f t0; iter f t1
256
257 let rec fold f s accu = match s with
258 | Empty -> accu
259 | Leaf k -> f k accu
260 | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu)
261
262 let rec for_all p = function
263 | Empty -> true
264 | Leaf k -> p k
265 | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1
266
267 let rec exists p = function
268 | Empty -> false
269 | Leaf k -> p k
270 | Branch (_,_,t0,t1) -> exists p t0 || exists p t1
271
272 let filter p s =
273 let rec filt acc = function
274 | Empty -> acc
275 | Leaf k -> if p k then add k acc else acc
276 | Branch (_,_,t0,t1) -> filt (filt acc t0) t1
277 in
278 filt Empty s
279
280 let partition p s =
281 let rec part (t,f as acc) = function
282 | Empty -> acc
283 | Leaf k -> if p k then (add k t, f) else (t, add k f)
284 | Branch (_,_,t0,t1) -> part (part acc t0) t1
285 in
286 part (Empty, Empty) s
287
288 let rec choose = function
289 | Empty -> raise Not_found
290 | Leaf k -> k
291 | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *)
292
293 let elements s =
294 let rec elements_aux acc = function
295 | Empty -> acc
296 | Leaf k -> k :: acc
297 | Branch (_,_,l,r) -> elements_aux (elements_aux acc l) r
298 in
299 elements_aux [] s
300
301 (*s There is no way to give an efficient implementation of [min_elt]
302 and [max_elt], as with binary search trees. The following
303 implementation is a traversal of all elements, barely more
304 efficient than [fold min t (choose t)] (resp. [fold max t (choose
305 t)]). Note that we use the fact that there is no constructor
306 [Empty] under [Branch] and therefore always a minimal
307 (resp. maximal) element there. *)
308
309 let rec min_elt = function
310 | Empty -> raise Not_found
311 | Leaf k -> k
312 | Branch (_,_,s,t) -> min (min_elt s) (min_elt t)
313
314 let rec max_elt = function
315 | Empty -> raise Not_found
316 | Leaf k -> k
317 | Branch (_,_,s,t) -> max (max_elt s) (max_elt t)
318
319 (*s Another nice property of Patricia trees is to be independent of the
320 order of insertion. As a consequence, two Patricia trees have the
321 same elements if and only if they are structurally equal. *)
322
323 let equal = (=)
324
325 let compare = compare
326
327 (*i*)
328 let make l = List.fold_right add l empty
329 (*i*)
330
331 (*s Additional functions w.r.t to [Set.S]. *)
332
333 let rec intersect s1 s2 = match (s1,s2) with
334 | Empty, _ -> false
335 | _, Empty -> false
336 | Leaf k1, _ -> mem k1 s2
337 | _, Leaf k2 -> mem k2 s1
338 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
339 if m1 == m2 && p1 == p2 then
340 intersect l1 l2 || intersect r1 r2
341 else if m1 < m2 && match_prefix p2 p1 m1 then
342 intersect (if zero_bit p2 m1 then l1 else r1) s2
343 else if m1 > m2 && match_prefix p1 p2 m2 then
344 intersect s1 (if zero_bit p1 m2 then l2 else r2)
345 else
346 false