Add bitvector->intset.
[bpt/guile.git] / module / language / cps / intset.scm
1 ;;; Functional name maps
2 ;;; Copyright (C) 2014 Free Software Foundation, Inc.
3 ;;;
4 ;;; This library is free software: you can redistribute it and/or modify
5 ;;; it under the terms of the GNU Lesser General Public License as
6 ;;; published by the Free Software Foundation, either version 3 of the
7 ;;; License, or (at your option) any later version.
8 ;;;
9 ;;; This library is distributed in the hope that it will be useful, but
10 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Lesser General Public License for more details.
13 ;;;
14 ;;; You should have received a copy of the GNU Lesser General Public
15 ;;; License along with this program. If not, see
16 ;;; <http://www.gnu.org/licenses/>.
17
18 ;;; Commentary:
19 ;;;
20 ;;; A persistent, functional data structure representing a set of
21 ;;; integers as a tree whose branches are vectors and whose leaves are
22 ;;; fixnums. Intsets are careful to preserve sub-structure, in the
23 ;;; sense of eq?, whereever possible.
24 ;;;
25 ;;; Code:
26
27 (define-module (language cps intset)
28 #:use-module (rnrs bytevectors)
29 #:use-module (srfi srfi-9)
30 #:use-module (ice-9 match)
31 #:export (empty-intset
32 intset?
33 intset-add
34 intset-remove
35 intset-ref
36 intset-next
37 intset-union
38 intset-intersect
39 intset-subtract
40 bitvector->intset))
41
42 (define-syntax-rule (define-inline name val)
43 (define-syntax name (identifier-syntax val)))
44
45 (eval-when (expand)
46 (use-modules (system base target))
47 (define-syntax compile-time-cond
48 (lambda (x)
49 (syntax-case x (else)
50 ((_ (test body ...) rest ...)
51 (if (primitive-eval (syntax->datum #'test))
52 #'(begin body ...)
53 #'(begin (compile-time-cond rest ...))))
54 ((_ (else body ...))
55 #'(begin body ...))
56 ((_)
57 (error "no compile-time-cond expression matched"))))))
58
59 (compile-time-cond
60 ((eqv? (target-word-size) 4)
61 (define-inline *leaf-bits* 4))
62 ((eqv? (target-word-size) 8)
63 (define-inline *leaf-bits* 5)))
64
65 (define-inline *leaf-size* (ash 1 *leaf-bits*))
66 (define-inline *leaf-mask* (1- *leaf-size*))
67 (define-inline *branch-bits* 3)
68 (define-inline *branch-size* (ash 1 *branch-bits*))
69 (define-inline *branch-mask* (1- *branch-size*))
70
71 (define-record-type <intset>
72 (make-intset min shift root)
73 intset?
74 (min intset-min)
75 (shift intset-shift)
76 (root intset-root))
77
78 (define (new-leaf) 0)
79 (define-inlinable (clone-leaf-and-set leaf i val)
80 (if val
81 (if leaf
82 (logior leaf (ash 1 i))
83 (ash 1 i))
84 (if leaf
85 (logand leaf (lognot (ash 1 i)))
86 #f)))
87 (define (leaf-empty? leaf)
88 (zero? leaf))
89
90 (define (new-branch)
91 (make-vector *branch-size* #f))
92 (define (clone-branch-and-set branch i elt)
93 (let ((new (new-branch)))
94 (when branch (vector-move-left! branch 0 *branch-size* new 0))
95 (vector-set! new i elt)
96 new))
97 (define (branch-empty? branch)
98 (let lp ((i 0))
99 (or (= i *branch-size*)
100 (and (not (vector-ref branch i))
101 (lp (1+ i))))))
102
103 (define (round-down min shift)
104 (logand min (lognot (1- (ash 1 shift)))))
105
106 (define empty-intset (make-intset 0 *leaf-bits* #f))
107
108 (define (add-level min shift root)
109 (let* ((shift* (+ shift *branch-bits*))
110 (min* (round-down min shift*))
111 (idx (logand (ash (- min min*) (- shift)) *branch-mask*)))
112 (make-intset min* shift* (clone-branch-and-set #f idx root))))
113
114 (define (make-intset/prune min shift root)
115 (cond
116 ((not root)
117 empty-intset)
118 ((= shift *leaf-bits*)
119 (make-intset min shift root))
120 (else
121 (let lp ((i 0) (elt #f))
122 (cond
123 ((< i *branch-size*)
124 (if (vector-ref root i)
125 (if elt
126 (make-intset min shift root)
127 (lp (1+ i) i))
128 (lp (1+ i) elt)))
129 (elt
130 (let ((shift (- shift *branch-bits*)))
131 (make-intset/prune (+ min (ash elt shift))
132 shift
133 (vector-ref root elt))))
134 ;; Shouldn't be reached...
135 (else empty-intset))))))
136
137 (define (intset-add bs i)
138 (define (adjoin i shift root)
139 (cond
140 ((= shift *leaf-bits*)
141 (let ((idx (logand i *leaf-mask*)))
142 (if (and root (logbit? idx root))
143 root
144 (clone-leaf-and-set root idx #t))))
145 (else
146 (let* ((shift (- shift *branch-bits*))
147 (idx (logand (ash i (- shift)) *branch-mask*))
148 (node (and root (vector-ref root idx)))
149 (new-node (adjoin i shift node)))
150 (if (eq? node new-node)
151 root
152 (clone-branch-and-set root idx new-node))))))
153 (match bs
154 (($ <intset> min shift root)
155 (cond
156 ((< i 0)
157 ;; The power-of-two spanning trick doesn't work across 0.
158 (error "Intsets can only hold non-negative integers." i))
159 ((not root)
160 ;; Add first element.
161 (let ((min (round-down i shift)))
162 (make-intset min *leaf-bits*
163 (adjoin (- i min) *leaf-bits* root))))
164 ((and (<= min i) (< i (+ min (ash 1 shift))))
165 ;; Add element to set; level will not change.
166 (let ((old-root root)
167 (root (adjoin (- i min) shift root)))
168 (if (eq? root old-root)
169 bs
170 (make-intset min shift root))))
171 ((< i min)
172 ;; Rebuild the tree by unioning two intsets.
173 (intset-union (intset-add empty-intset i) bs))
174 (else
175 ;; Add a new level and try again.
176 (intset-add (add-level min shift root) i))))))
177
178 (define (intset-remove bs i)
179 (define (remove i shift root)
180 (cond
181 ((= shift *leaf-bits*)
182 (let ((idx (logand i *leaf-mask*)))
183 (if (logbit? idx root)
184 (let ((root (clone-leaf-and-set root idx #f)))
185 (and (not (leaf-empty? root)) root))
186 root)))
187 (else
188 (let* ((shift (- shift *branch-bits*))
189 (idx (logand (ash i (- shift)) *branch-mask*)))
190 (cond
191 ((vector-ref root idx)
192 => (lambda (node)
193 (let ((new-node (remove i shift node)))
194 (if (eq? node new-node)
195 root
196 (let ((root (clone-branch-and-set root idx new-node)))
197 (and (or new-node (not (branch-empty? root)))
198 root))))))
199 (else root))))))
200 (match bs
201 (($ <intset> min shift root)
202 (cond
203 ((not root) bs)
204 ((and (<= min i) (< i (+ min (ash 1 shift))))
205 ;; Add element to set; level will not change.
206 (let ((old-root root)
207 (root (remove (- i min) shift root)))
208 (if (eq? root old-root)
209 bs
210 (make-intset/prune min shift root))))
211 (else bs)))))
212
213 (define (intset-ref bs i)
214 (match bs
215 (($ <intset> min shift root)
216 (and (<= min i) (< i (+ min (ash 1 shift)))
217 (let ((i (- i min)))
218 (let lp ((node root) (shift shift))
219 (and node
220 (if (= shift *leaf-bits*)
221 (logbit? (logand i *leaf-mask*) node)
222 (let* ((shift (- shift *branch-bits*))
223 (idx (logand (ash i (- shift)) *branch-mask*)))
224 (lp (vector-ref node idx) shift))))))))))
225
226 (define (intset-next bs i)
227 (define (visit-leaf node i)
228 (let lp ((idx (logand i *leaf-mask*)))
229 (if (logbit? idx node)
230 (logior (logand i (lognot *leaf-mask*)) idx)
231 (let ((idx (1+ idx)))
232 (and (< idx *leaf-size*)
233 (lp idx))))))
234 (define (visit-branch node shift i)
235 (let lp ((i i) (idx (logand (ash i (- shift)) *branch-mask*)))
236 (and (< idx *branch-size*)
237 (or (visit-node (vector-ref node idx) shift i)
238 (let ((inc (ash 1 shift)))
239 (lp (+ (round-down i shift) inc) (1+ idx)))))))
240 (define (visit-node node shift i)
241 (and node
242 (if (= shift *leaf-bits*)
243 (visit-leaf node i)
244 (visit-branch node (- shift *branch-bits*) i))))
245 (match bs
246 (($ <intset> min shift root)
247 (let ((i (if (and i (< min i))
248 (- i min)
249 0)))
250 (and (< i (ash 1 shift))
251 (let ((i (visit-node root shift i)))
252 (and i (+ min i))))))))
253
254 (define (intset-size shift root)
255 (cond
256 ((not root) 0)
257 ((= *leaf-bits* shift) *leaf-size*)
258 (else
259 (let lp ((i (1- *branch-size*)))
260 (let ((node (vector-ref root i)))
261 (if node
262 (let ((shift (- shift *branch-bits*)))
263 (+ (intset-size shift node)
264 (* i (ash 1 shift))))
265 (lp (1- i))))))))
266
267 (define (intset-union a b)
268 ;; Union leaves.
269 (define (union-leaves a b)
270 (logior (or a 0) (or b 0)))
271 ;; Union A and B from index I; the result will be fresh.
272 (define (union-branches/fresh shift a b i fresh)
273 (let lp ((i 0))
274 (cond
275 ((< i *branch-size*)
276 (let* ((a-child (vector-ref a i))
277 (b-child (vector-ref b i)))
278 (vector-set! fresh i (union shift a-child b-child))
279 (lp (1+ i))))
280 (else fresh))))
281 ;; Union A and B from index I; the result may be eq? to A.
282 (define (union-branches/a shift a b i)
283 (let lp ((i i))
284 (cond
285 ((< i *branch-size*)
286 (let* ((a-child (vector-ref a i))
287 (b-child (vector-ref b i)))
288 (if (eq? a-child b-child)
289 (lp (1+ i))
290 (let ((child (union shift a-child b-child)))
291 (cond
292 ((eq? a-child child)
293 (lp (1+ i)))
294 (else
295 (let ((result (clone-branch-and-set a i child)))
296 (union-branches/fresh shift a b (1+ i) result))))))))
297 (else a))))
298 ;; Union A and B; the may could be eq? to either.
299 (define (union-branches shift a b)
300 (let lp ((i 0))
301 (cond
302 ((< i *branch-size*)
303 (let* ((a-child (vector-ref a i))
304 (b-child (vector-ref b i)))
305 (if (eq? a-child b-child)
306 (lp (1+ i))
307 (let ((child (union shift a-child b-child)))
308 (cond
309 ((eq? a-child child)
310 (union-branches/a shift a b (1+ i)))
311 ((eq? b-child child)
312 (union-branches/a shift b a (1+ i)))
313 (else
314 (let ((result (clone-branch-and-set a i child)))
315 (union-branches/fresh shift a b (1+ i) result))))))))
316 ;; Seems they are the same but not eq?. Odd.
317 (else a))))
318 (define (union shift a-node b-node)
319 (cond
320 ((not a-node) b-node)
321 ((not b-node) a-node)
322 ((eq? a-node b-node) a-node)
323 ((= shift *leaf-bits*) (union-leaves a-node b-node))
324 (else (union-branches (- shift *branch-bits*) a-node b-node))))
325 (match (cons a b)
326 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
327 (cond
328 ((not (= b-shift a-shift))
329 ;; Hoist the set with the lowest shift to meet the one with the
330 ;; higher shift.
331 (if (< b-shift a-shift)
332 (intset-union a (add-level b-min b-shift b-root))
333 (intset-union (add-level a-min a-shift a-root) b)))
334 ((not (= b-min a-min))
335 ;; Nodes at the same shift but different minimums will cover
336 ;; disjoint ranges (due to the round-down call on min). Hoist
337 ;; both until they cover the same range.
338 (intset-union (add-level a-min a-shift a-root)
339 (add-level b-min b-shift b-root)))
340 (else
341 ;; At this point, A and B cover the same range.
342 (let ((root (union a-shift a-root b-root)))
343 (cond
344 ((eq? root a-root) a)
345 ((eq? root b-root) b)
346 (else (make-intset a-min a-shift root)))))))))
347
348 (define (intset-intersect a b)
349 (define tmp (new-leaf))
350 ;; Intersect leaves.
351 (define (intersect-leaves a b)
352 (logand a b))
353 ;; Intersect A and B from index I; the result will be fresh.
354 (define (intersect-branches/fresh shift a b i fresh)
355 (let lp ((i 0))
356 (cond
357 ((< i *branch-size*)
358 (let* ((a-child (vector-ref a i))
359 (b-child (vector-ref b i)))
360 (vector-set! fresh i (intersect shift a-child b-child))
361 (lp (1+ i))))
362 ((branch-empty? fresh) #f)
363 (else fresh))))
364 ;; Intersect A and B from index I; the result may be eq? to A.
365 (define (intersect-branches/a shift a b i)
366 (let lp ((i i))
367 (cond
368 ((< i *branch-size*)
369 (let* ((a-child (vector-ref a i))
370 (b-child (vector-ref b i)))
371 (if (eq? a-child b-child)
372 (lp (1+ i))
373 (let ((child (intersect shift a-child b-child)))
374 (cond
375 ((eq? a-child child)
376 (lp (1+ i)))
377 (else
378 (let ((result (clone-branch-and-set a i child)))
379 (intersect-branches/fresh shift a b (1+ i) result))))))))
380 (else a))))
381 ;; Intersect A and B; the may could be eq? to either.
382 (define (intersect-branches shift a b)
383 (let lp ((i 0))
384 (cond
385 ((< i *branch-size*)
386 (let* ((a-child (vector-ref a i))
387 (b-child (vector-ref b i)))
388 (if (eq? a-child b-child)
389 (lp (1+ i))
390 (let ((child (intersect shift a-child b-child)))
391 (cond
392 ((eq? a-child child)
393 (intersect-branches/a shift a b (1+ i)))
394 ((eq? b-child child)
395 (intersect-branches/a shift b a (1+ i)))
396 (else
397 (let ((result (clone-branch-and-set a i child)))
398 (intersect-branches/fresh shift a b (1+ i) result))))))))
399 ;; Seems they are the same but not eq?. Odd.
400 (else a))))
401 (define (intersect shift a-node b-node)
402 (cond
403 ((or (not a-node) (not b-node)) #f)
404 ((eq? a-node b-node) a-node)
405 ((= shift *leaf-bits*) (intersect-leaves a-node b-node))
406 (else (intersect-branches (- shift *branch-bits*) a-node b-node))))
407
408 (define (different-mins lo-min lo-shift lo-root hi-min hi-shift hi lo-is-a?)
409 (cond
410 ((<= lo-shift hi-shift)
411 ;; If LO has a lower shift and a lower min, it is disjoint. If
412 ;; it has the same shift and a different min, it is also
413 ;; disjoint.
414 empty-intset)
415 (else
416 (let* ((lo-shift (- lo-shift *branch-bits*))
417 (lo-idx (ash (- hi-min lo-min) (- lo-shift))))
418 (cond
419 ((>= lo-idx *branch-size*)
420 ;; HI has a lower shift, but it not within LO.
421 empty-intset)
422 ((vector-ref lo-root lo-idx)
423 => (lambda (lo-root)
424 (let ((lo (make-intset (+ lo-min (ash lo-idx lo-shift))
425 lo-shift
426 lo-root)))
427 (if lo-is-a?
428 (intset-intersect lo hi)
429 (intset-intersect hi lo)))))
430 (else empty-intset))))))
431
432 (define (different-shifts-same-min min hi-shift hi-root lo lo-is-a?)
433 (cond
434 ((vector-ref hi-root 0)
435 => (lambda (hi-root)
436 (let ((hi (make-intset min
437 (- hi-shift *branch-bits*)
438 hi-root)))
439 (if lo-is-a?
440 (intset-intersect lo hi)
441 (intset-intersect hi lo)))))
442 (else empty-intset)))
443
444 (match (cons a b)
445 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
446 (cond
447 ((< a-min b-min)
448 (different-mins a-min a-shift a-root b-min b-shift b #t))
449 ((< b-min a-min)
450 (different-mins b-min b-shift b-root a-min a-shift a #f))
451 ((< a-shift b-shift)
452 (different-shifts-same-min b-min b-shift b-root a #t))
453 ((< b-shift a-shift)
454 (different-shifts-same-min a-min a-shift a-root b #f))
455 (else
456 ;; At this point, A and B cover the same range.
457 (let ((root (intersect a-shift a-root b-root)))
458 (cond
459 ((eq? root a-root) a)
460 ((eq? root b-root) b)
461 (else (make-intset/prune a-min a-shift root)))))))))
462
463 (define (intset-subtract a b)
464 (define tmp (new-leaf))
465 ;; Intersect leaves.
466 (define (subtract-leaves a b)
467 (logand a (lognot b)))
468 ;; Subtract B from A starting at index I; the result will be fresh.
469 (define (subtract-branches/fresh shift a b i fresh)
470 (let lp ((i 0))
471 (cond
472 ((< i *branch-size*)
473 (let* ((a-child (vector-ref a i))
474 (b-child (vector-ref b i)))
475 (vector-set! fresh i (subtract-nodes shift a-child b-child))
476 (lp (1+ i))))
477 ((branch-empty? fresh) #f)
478 (else fresh))))
479 ;; Subtract B from A. The result may be eq? to A.
480 (define (subtract-branches shift a b)
481 (let lp ((i 0))
482 (cond
483 ((< i *branch-size*)
484 (let* ((a-child (vector-ref a i))
485 (b-child (vector-ref b i)))
486 (let ((child (subtract-nodes shift a-child b-child)))
487 (cond
488 ((eq? a-child child)
489 (lp (1+ i)))
490 (else
491 (let ((result (clone-branch-and-set a i child)))
492 (subtract-branches/fresh shift a b (1+ i) result)))))))
493 (else a))))
494 (define (subtract-nodes shift a-node b-node)
495 (cond
496 ((or (not a-node) (not b-node)) a-node)
497 ((eq? a-node b-node) #f)
498 ((= shift *leaf-bits*) (subtract-leaves a-node b-node))
499 (else (subtract-branches (- shift *branch-bits*) a-node b-node))))
500
501 (match (cons a b)
502 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
503 (define (return root)
504 (cond
505 ((eq? root a-root) a)
506 (else (make-intset/prune a-min a-shift root))))
507 (cond
508 ((<= a-shift b-shift)
509 (let lp ((b-min b-min) (b-shift b-shift) (b-root b-root))
510 (if (= a-shift b-shift)
511 (if (= a-min b-min)
512 (return (subtract-nodes a-shift a-root b-root))
513 a)
514 (let* ((b-shift (- b-shift *branch-bits*))
515 (b-idx (ash (- a-min b-min) (- b-shift)))
516 (b-min (+ b-min (ash b-idx b-shift)))
517 (b-root (and b-root
518 (<= 0 b-idx)
519 (< b-idx *branch-size*)
520 (vector-ref b-root b-idx))))
521 (lp b-min b-shift b-root)))))
522 (else
523 (return
524 (let lp ((a-min a-min) (a-shift a-shift) (a-root a-root))
525 (if (= a-shift b-shift)
526 (if (= a-min b-min)
527 (subtract-nodes a-shift a-root b-root)
528 a-root)
529 (let* ((a-shift (- a-shift *branch-bits*))
530 (a-idx (ash (- b-min a-min) (- a-shift)))
531 (a-min (+ a-min (ash a-idx a-shift)))
532 (old (and a-root
533 (<= 0 a-idx)
534 (< a-idx *branch-size*)
535 (vector-ref a-root a-idx)))
536 (new (lp a-min a-shift old)))
537 (if (eq? old new)
538 a-root
539 (clone-branch-and-set a-root a-idx new)))))))))))
540
541 (define (bitvector->intset bv)
542 (define (finish-tail out min tail)
543 (if (zero? tail)
544 out
545 (intset-union out (make-intset min *leaf-bits* tail))))
546 (let lp ((out empty-intset) (min 0) (pos 0) (tail 0))
547 (let ((pos (bit-position #t bv pos)))
548 (cond
549 ((not pos)
550 (finish-tail out min tail))
551 ((< pos (+ min *leaf-size*))
552 (lp out min (1+ pos) (logior tail (ash 1 (- pos min)))))
553 (else
554 (let ((min* (round-down pos *leaf-bits*)))
555 (lp (finish-tail out min tail)
556 min* pos (ash 1 (- pos min*)))))))))