175b9e5b2a0e08ca4313a5889b3618c60b5c6ab2
[bpt/guile.git] / module / language / cps / intset.scm
1 ;;; Functional name maps
2 ;;; Copyright (C) 2014, 2015 Free Software Foundation, Inc.
3 ;;;
4 ;;; This library is free software: you can redistribute it and/or modify
5 ;;; it under the terms of the GNU Lesser General Public License as
6 ;;; published by the Free Software Foundation, either version 3 of the
7 ;;; License, or (at your option) any later version.
8 ;;;
9 ;;; This library is distributed in the hope that it will be useful, but
10 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Lesser General Public License for more details.
13 ;;;
14 ;;; You should have received a copy of the GNU Lesser General Public
15 ;;; License along with this program. If not, see
16 ;;; <http://www.gnu.org/licenses/>.
17
18 ;;; Commentary:
19 ;;;
20 ;;; A persistent, functional data structure representing a set of
21 ;;; integers as a tree whose branches are vectors and whose leaves are
22 ;;; fixnums. Intsets are careful to preserve sub-structure, in the
23 ;;; sense of eq?, whereever possible.
24 ;;;
25 ;;; Code:
26
27 (define-module (language cps intset)
28 #:use-module (rnrs bytevectors)
29 #:use-module (srfi srfi-9)
30 #:use-module (ice-9 match)
31 #:export (empty-intset
32 intset?
33 intset-add
34 intset-remove
35 intset-ref
36 intset-next
37 intset-fold
38 intset-fold2
39 intset-union
40 intset-intersect
41 intset-subtract
42 bitvector->intset))
43
44 (define-syntax-rule (define-inline name val)
45 (define-syntax name (identifier-syntax val)))
46
47 (eval-when (expand)
48 (use-modules (system base target))
49 (define-syntax compile-time-cond
50 (lambda (x)
51 (syntax-case x (else)
52 ((_ (test body ...) rest ...)
53 (if (primitive-eval (syntax->datum #'test))
54 #'(begin body ...)
55 #'(begin (compile-time-cond rest ...))))
56 ((_ (else body ...))
57 #'(begin body ...))
58 ((_)
59 (error "no compile-time-cond expression matched"))))))
60
61 (compile-time-cond
62 ((eqv? (target-word-size) 4)
63 (define-inline *leaf-bits* 4))
64 ((eqv? (target-word-size) 8)
65 (define-inline *leaf-bits* 5)))
66
67 (define-inline *leaf-size* (ash 1 *leaf-bits*))
68 (define-inline *leaf-mask* (1- *leaf-size*))
69 (define-inline *branch-bits* 3)
70 (define-inline *branch-size* (ash 1 *branch-bits*))
71 (define-inline *branch-mask* (1- *branch-size*))
72
73 (define-record-type <intset>
74 (make-intset min shift root)
75 intset?
76 (min intset-min)
77 (shift intset-shift)
78 (root intset-root))
79
80 (define (new-leaf) 0)
81 (define-inlinable (clone-leaf-and-set leaf i val)
82 (if val
83 (if leaf
84 (logior leaf (ash 1 i))
85 (ash 1 i))
86 (if leaf
87 (logand leaf (lognot (ash 1 i)))
88 #f)))
89 (define (leaf-empty? leaf)
90 (zero? leaf))
91
92 (define (new-branch)
93 (make-vector *branch-size* #f))
94 (define (clone-branch-and-set branch i elt)
95 (let ((new (new-branch)))
96 (when branch (vector-move-left! branch 0 *branch-size* new 0))
97 (vector-set! new i elt)
98 new))
99 (define (branch-empty? branch)
100 (let lp ((i 0))
101 (or (= i *branch-size*)
102 (and (not (vector-ref branch i))
103 (lp (1+ i))))))
104
105 (define (round-down min shift)
106 (logand min (lognot (1- (ash 1 shift)))))
107
108 (define empty-intset (make-intset 0 *leaf-bits* #f))
109
110 (define (add-level min shift root)
111 (let* ((shift* (+ shift *branch-bits*))
112 (min* (round-down min shift*))
113 (idx (logand (ash (- min min*) (- shift)) *branch-mask*)))
114 (make-intset min* shift* (clone-branch-and-set #f idx root))))
115
116 (define (make-intset/prune min shift root)
117 (cond
118 ((not root)
119 empty-intset)
120 ((= shift *leaf-bits*)
121 (make-intset min shift root))
122 (else
123 (let lp ((i 0) (elt #f))
124 (cond
125 ((< i *branch-size*)
126 (if (vector-ref root i)
127 (if elt
128 (make-intset min shift root)
129 (lp (1+ i) i))
130 (lp (1+ i) elt)))
131 (elt
132 (let ((shift (- shift *branch-bits*)))
133 (make-intset/prune (+ min (ash elt shift))
134 shift
135 (vector-ref root elt))))
136 ;; Shouldn't be reached...
137 (else empty-intset))))))
138
139 (define (intset-add bs i)
140 (define (adjoin i shift root)
141 (cond
142 ((= shift *leaf-bits*)
143 (let ((idx (logand i *leaf-mask*)))
144 (if (and root (logbit? idx root))
145 root
146 (clone-leaf-and-set root idx #t))))
147 (else
148 (let* ((shift (- shift *branch-bits*))
149 (idx (logand (ash i (- shift)) *branch-mask*))
150 (node (and root (vector-ref root idx)))
151 (new-node (adjoin i shift node)))
152 (if (eq? node new-node)
153 root
154 (clone-branch-and-set root idx new-node))))))
155 (match bs
156 (($ <intset> min shift root)
157 (cond
158 ((< i 0)
159 ;; The power-of-two spanning trick doesn't work across 0.
160 (error "Intsets can only hold non-negative integers." i))
161 ((not root)
162 ;; Add first element.
163 (let ((min (round-down i shift)))
164 (make-intset min *leaf-bits*
165 (adjoin (- i min) *leaf-bits* root))))
166 ((and (<= min i) (< i (+ min (ash 1 shift))))
167 ;; Add element to set; level will not change.
168 (let ((old-root root)
169 (root (adjoin (- i min) shift root)))
170 (if (eq? root old-root)
171 bs
172 (make-intset min shift root))))
173 ((< i min)
174 ;; Rebuild the tree by unioning two intsets.
175 (intset-union (intset-add empty-intset i) bs))
176 (else
177 ;; Add a new level and try again.
178 (intset-add (add-level min shift root) i))))))
179
180 (define (intset-remove bs i)
181 (define (remove i shift root)
182 (cond
183 ((= shift *leaf-bits*)
184 (let ((idx (logand i *leaf-mask*)))
185 (if (logbit? idx root)
186 (let ((root (clone-leaf-and-set root idx #f)))
187 (and (not (leaf-empty? root)) root))
188 root)))
189 (else
190 (let* ((shift (- shift *branch-bits*))
191 (idx (logand (ash i (- shift)) *branch-mask*)))
192 (cond
193 ((vector-ref root idx)
194 => (lambda (node)
195 (let ((new-node (remove i shift node)))
196 (if (eq? node new-node)
197 root
198 (let ((root (clone-branch-and-set root idx new-node)))
199 (and (or new-node (not (branch-empty? root)))
200 root))))))
201 (else root))))))
202 (match bs
203 (($ <intset> min shift root)
204 (cond
205 ((not root) bs)
206 ((and (<= min i) (< i (+ min (ash 1 shift))))
207 ;; Add element to set; level will not change.
208 (let ((old-root root)
209 (root (remove (- i min) shift root)))
210 (if (eq? root old-root)
211 bs
212 (make-intset/prune min shift root))))
213 (else bs)))))
214
215 (define (intset-ref bs i)
216 (match bs
217 (($ <intset> min shift root)
218 (and (<= min i) (< i (+ min (ash 1 shift)))
219 (let ((i (- i min)))
220 (let lp ((node root) (shift shift))
221 (and node
222 (if (= shift *leaf-bits*)
223 (logbit? (logand i *leaf-mask*) node)
224 (let* ((shift (- shift *branch-bits*))
225 (idx (logand (ash i (- shift)) *branch-mask*)))
226 (lp (vector-ref node idx) shift))))))))))
227
228 (define (intset-next bs i)
229 (define (visit-leaf node i)
230 (let lp ((idx (logand i *leaf-mask*)))
231 (if (logbit? idx node)
232 (logior (logand i (lognot *leaf-mask*)) idx)
233 (let ((idx (1+ idx)))
234 (and (< idx *leaf-size*)
235 (lp idx))))))
236 (define (visit-branch node shift i)
237 (let lp ((i i) (idx (logand (ash i (- shift)) *branch-mask*)))
238 (and (< idx *branch-size*)
239 (or (let ((node (vector-ref node idx)))
240 (and node (visit-node node shift i)))
241 (let ((inc (ash 1 shift)))
242 (lp (+ (round-down i shift) inc) (1+ idx)))))))
243 (define (visit-node node shift i)
244 (if (= shift *leaf-bits*)
245 (visit-leaf node i)
246 (visit-branch node (- shift *branch-bits*) i)))
247 (match bs
248 (($ <intset> min shift root)
249 (let ((i (if (and i (< min i))
250 (- i min)
251 0)))
252 (and root (< i (ash 1 shift))
253 (let ((i (visit-node root shift i)))
254 (and i (+ min i))))))))
255
256 (define (intset-fold f set seed)
257 (define (visit-branch node shift min seed)
258 (cond
259 ((= shift *leaf-bits*)
260 (let lp ((i 0) (seed seed))
261 (if (< i *leaf-size*)
262 (lp (1+ i)
263 (if (logbit? i node)
264 (f (+ i min) seed)
265 seed))
266 seed)))
267 (else
268 (let ((shift (- shift *branch-bits*)))
269 (let lp ((i 0) (seed seed))
270 (if (< i *branch-size*)
271 (let ((elt (vector-ref node i)))
272 (lp (1+ i)
273 (if elt
274 (visit-branch elt shift (+ min (ash i shift)) seed)
275 seed)))
276 seed))))))
277 (match set
278 (($ <intset> min shift root)
279 (cond
280 ((not root) seed)
281 (else (visit-branch root shift min seed))))))
282
283 (define (intset-fold2 f set s0 s1)
284 (define (visit-branch node shift min s0 s1)
285 (cond
286 ((= shift *leaf-bits*)
287 (let lp ((i 0) (s0 s0) (s1 s1))
288 (if (< i *leaf-size*)
289 (if (logbit? i node)
290 (call-with-values (lambda () (f (+ i min) s0 s1))
291 (lambda (s0 s1)
292 (lp (1+ i) s0 s1)))
293 (lp (1+ i) s0 s1))
294 (values s0 s1))))
295 (else
296 (let ((shift (- shift *branch-bits*)))
297 (let lp ((i 0) (s0 s0) (s1 s1))
298 (if (< i *branch-size*)
299 (let ((elt (vector-ref node i)))
300 (if elt
301 (call-with-values
302 (lambda ()
303 (visit-branch elt shift (+ min (ash i shift)) s0 s1))
304 (lambda (s0 s1)
305 (lp (1+ i) s0 s1)))
306 (lp (1+ i) s0 s1)))
307 (values s0 s1)))))))
308 (match set
309 (($ <intset> min shift root)
310 (cond
311 ((not root) (values s0 s1))
312 (else (visit-branch root shift min s0 s1))))))
313
314 (define (intset-size shift root)
315 (cond
316 ((not root) 0)
317 ((= *leaf-bits* shift) *leaf-size*)
318 (else
319 (let lp ((i (1- *branch-size*)))
320 (let ((node (vector-ref root i)))
321 (if node
322 (let ((shift (- shift *branch-bits*)))
323 (+ (intset-size shift node)
324 (* i (ash 1 shift))))
325 (lp (1- i))))))))
326
327 (define (intset-union a b)
328 ;; Union leaves.
329 (define (union-leaves a b)
330 (logior (or a 0) (or b 0)))
331 ;; Union A and B from index I; the result will be fresh.
332 (define (union-branches/fresh shift a b i fresh)
333 (let lp ((i 0))
334 (cond
335 ((< i *branch-size*)
336 (let* ((a-child (vector-ref a i))
337 (b-child (vector-ref b i)))
338 (vector-set! fresh i (union shift a-child b-child))
339 (lp (1+ i))))
340 (else fresh))))
341 ;; Union A and B from index I; the result may be eq? to A.
342 (define (union-branches/a shift a b i)
343 (let lp ((i i))
344 (cond
345 ((< i *branch-size*)
346 (let* ((a-child (vector-ref a i))
347 (b-child (vector-ref b i)))
348 (if (eq? a-child b-child)
349 (lp (1+ i))
350 (let ((child (union shift a-child b-child)))
351 (cond
352 ((eq? a-child child)
353 (lp (1+ i)))
354 (else
355 (let ((result (clone-branch-and-set a i child)))
356 (union-branches/fresh shift a b (1+ i) result))))))))
357 (else a))))
358 ;; Union A and B; the may could be eq? to either.
359 (define (union-branches shift a b)
360 (let lp ((i 0))
361 (cond
362 ((< i *branch-size*)
363 (let* ((a-child (vector-ref a i))
364 (b-child (vector-ref b i)))
365 (if (eq? a-child b-child)
366 (lp (1+ i))
367 (let ((child (union shift a-child b-child)))
368 (cond
369 ((eq? a-child child)
370 (union-branches/a shift a b (1+ i)))
371 ((eq? b-child child)
372 (union-branches/a shift b a (1+ i)))
373 (else
374 (let ((result (clone-branch-and-set a i child)))
375 (union-branches/fresh shift a b (1+ i) result))))))))
376 ;; Seems they are the same but not eq?. Odd.
377 (else a))))
378 (define (union shift a-node b-node)
379 (cond
380 ((not a-node) b-node)
381 ((not b-node) a-node)
382 ((eq? a-node b-node) a-node)
383 ((= shift *leaf-bits*) (union-leaves a-node b-node))
384 (else (union-branches (- shift *branch-bits*) a-node b-node))))
385 (match (cons a b)
386 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
387 (cond
388 ((not (= b-shift a-shift))
389 ;; Hoist the set with the lowest shift to meet the one with the
390 ;; higher shift.
391 (if (< b-shift a-shift)
392 (intset-union a (add-level b-min b-shift b-root))
393 (intset-union (add-level a-min a-shift a-root) b)))
394 ((not (= b-min a-min))
395 ;; Nodes at the same shift but different minimums will cover
396 ;; disjoint ranges (due to the round-down call on min). Hoist
397 ;; both until they cover the same range.
398 (intset-union (add-level a-min a-shift a-root)
399 (add-level b-min b-shift b-root)))
400 (else
401 ;; At this point, A and B cover the same range.
402 (let ((root (union a-shift a-root b-root)))
403 (cond
404 ((eq? root a-root) a)
405 ((eq? root b-root) b)
406 (else (make-intset a-min a-shift root)))))))))
407
408 (define (intset-intersect a b)
409 (define tmp (new-leaf))
410 ;; Intersect leaves.
411 (define (intersect-leaves a b)
412 (logand a b))
413 ;; Intersect A and B from index I; the result will be fresh.
414 (define (intersect-branches/fresh shift a b i fresh)
415 (let lp ((i 0))
416 (cond
417 ((< i *branch-size*)
418 (let* ((a-child (vector-ref a i))
419 (b-child (vector-ref b i)))
420 (vector-set! fresh i (intersect shift a-child b-child))
421 (lp (1+ i))))
422 ((branch-empty? fresh) #f)
423 (else fresh))))
424 ;; Intersect A and B from index I; the result may be eq? to A.
425 (define (intersect-branches/a shift a b i)
426 (let lp ((i i))
427 (cond
428 ((< i *branch-size*)
429 (let* ((a-child (vector-ref a i))
430 (b-child (vector-ref b i)))
431 (if (eq? a-child b-child)
432 (lp (1+ i))
433 (let ((child (intersect shift a-child b-child)))
434 (cond
435 ((eq? a-child child)
436 (lp (1+ i)))
437 (else
438 (let ((result (clone-branch-and-set a i child)))
439 (intersect-branches/fresh shift a b (1+ i) result))))))))
440 (else a))))
441 ;; Intersect A and B; the may could be eq? to either.
442 (define (intersect-branches shift a b)
443 (let lp ((i 0))
444 (cond
445 ((< i *branch-size*)
446 (let* ((a-child (vector-ref a i))
447 (b-child (vector-ref b i)))
448 (if (eq? a-child b-child)
449 (lp (1+ i))
450 (let ((child (intersect shift a-child b-child)))
451 (cond
452 ((eq? a-child child)
453 (intersect-branches/a shift a b (1+ i)))
454 ((eq? b-child child)
455 (intersect-branches/a shift b a (1+ i)))
456 (else
457 (let ((result (clone-branch-and-set a i child)))
458 (intersect-branches/fresh shift a b (1+ i) result))))))))
459 ;; Seems they are the same but not eq?. Odd.
460 (else a))))
461 (define (intersect shift a-node b-node)
462 (cond
463 ((or (not a-node) (not b-node)) #f)
464 ((eq? a-node b-node) a-node)
465 ((= shift *leaf-bits*) (intersect-leaves a-node b-node))
466 (else (intersect-branches (- shift *branch-bits*) a-node b-node))))
467
468 (define (different-mins lo-min lo-shift lo-root hi-min hi-shift hi lo-is-a?)
469 (cond
470 ((<= lo-shift hi-shift)
471 ;; If LO has a lower shift and a lower min, it is disjoint. If
472 ;; it has the same shift and a different min, it is also
473 ;; disjoint.
474 empty-intset)
475 (else
476 (let* ((lo-shift (- lo-shift *branch-bits*))
477 (lo-idx (ash (- hi-min lo-min) (- lo-shift))))
478 (cond
479 ((>= lo-idx *branch-size*)
480 ;; HI has a lower shift, but it not within LO.
481 empty-intset)
482 ((vector-ref lo-root lo-idx)
483 => (lambda (lo-root)
484 (let ((lo (make-intset (+ lo-min (ash lo-idx lo-shift))
485 lo-shift
486 lo-root)))
487 (if lo-is-a?
488 (intset-intersect lo hi)
489 (intset-intersect hi lo)))))
490 (else empty-intset))))))
491
492 (define (different-shifts-same-min min hi-shift hi-root lo lo-is-a?)
493 (cond
494 ((vector-ref hi-root 0)
495 => (lambda (hi-root)
496 (let ((hi (make-intset min
497 (- hi-shift *branch-bits*)
498 hi-root)))
499 (if lo-is-a?
500 (intset-intersect lo hi)
501 (intset-intersect hi lo)))))
502 (else empty-intset)))
503
504 (match (cons a b)
505 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
506 (cond
507 ((< a-min b-min)
508 (different-mins a-min a-shift a-root b-min b-shift b #t))
509 ((< b-min a-min)
510 (different-mins b-min b-shift b-root a-min a-shift a #f))
511 ((< a-shift b-shift)
512 (different-shifts-same-min b-min b-shift b-root a #t))
513 ((< b-shift a-shift)
514 (different-shifts-same-min a-min a-shift a-root b #f))
515 (else
516 ;; At this point, A and B cover the same range.
517 (let ((root (intersect a-shift a-root b-root)))
518 (cond
519 ((eq? root a-root) a)
520 ((eq? root b-root) b)
521 (else (make-intset/prune a-min a-shift root)))))))))
522
523 (define (intset-subtract a b)
524 (define tmp (new-leaf))
525 ;; Intersect leaves.
526 (define (subtract-leaves a b)
527 (logand a (lognot b)))
528 ;; Subtract B from A starting at index I; the result will be fresh.
529 (define (subtract-branches/fresh shift a b i fresh)
530 (let lp ((i 0))
531 (cond
532 ((< i *branch-size*)
533 (let* ((a-child (vector-ref a i))
534 (b-child (vector-ref b i)))
535 (vector-set! fresh i (subtract-nodes shift a-child b-child))
536 (lp (1+ i))))
537 ((branch-empty? fresh) #f)
538 (else fresh))))
539 ;; Subtract B from A. The result may be eq? to A.
540 (define (subtract-branches shift a b)
541 (let lp ((i 0))
542 (cond
543 ((< i *branch-size*)
544 (let* ((a-child (vector-ref a i))
545 (b-child (vector-ref b i)))
546 (let ((child (subtract-nodes shift a-child b-child)))
547 (cond
548 ((eq? a-child child)
549 (lp (1+ i)))
550 (else
551 (let ((result (clone-branch-and-set a i child)))
552 (subtract-branches/fresh shift a b (1+ i) result)))))))
553 (else a))))
554 (define (subtract-nodes shift a-node b-node)
555 (cond
556 ((or (not a-node) (not b-node)) a-node)
557 ((eq? a-node b-node) #f)
558 ((= shift *leaf-bits*) (subtract-leaves a-node b-node))
559 (else (subtract-branches (- shift *branch-bits*) a-node b-node))))
560
561 (match (cons a b)
562 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
563 (define (return root)
564 (cond
565 ((eq? root a-root) a)
566 (else (make-intset/prune a-min a-shift root))))
567 (cond
568 ((<= a-shift b-shift)
569 (let lp ((b-min b-min) (b-shift b-shift) (b-root b-root))
570 (if (= a-shift b-shift)
571 (if (= a-min b-min)
572 (return (subtract-nodes a-shift a-root b-root))
573 a)
574 (let* ((b-shift (- b-shift *branch-bits*))
575 (b-idx (ash (- a-min b-min) (- b-shift)))
576 (b-min (+ b-min (ash b-idx b-shift)))
577 (b-root (and b-root
578 (<= 0 b-idx)
579 (< b-idx *branch-size*)
580 (vector-ref b-root b-idx))))
581 (lp b-min b-shift b-root)))))
582 (else
583 (return
584 (let lp ((a-min a-min) (a-shift a-shift) (a-root a-root))
585 (if (= a-shift b-shift)
586 (if (= a-min b-min)
587 (subtract-nodes a-shift a-root b-root)
588 a-root)
589 (let* ((a-shift (- a-shift *branch-bits*))
590 (a-idx (ash (- b-min a-min) (- a-shift)))
591 (a-min (+ a-min (ash a-idx a-shift)))
592 (old (and a-root
593 (<= 0 a-idx)
594 (< a-idx *branch-size*)
595 (vector-ref a-root a-idx)))
596 (new (lp a-min a-shift old)))
597 (if (eq? old new)
598 a-root
599 (clone-branch-and-set a-root a-idx new)))))))))))
600
601 (define (bitvector->intset bv)
602 (define (finish-tail out min tail)
603 (if (zero? tail)
604 out
605 (intset-union out (make-intset min *leaf-bits* tail))))
606 (let lp ((out empty-intset) (min 0) (pos 0) (tail 0))
607 (let ((pos (bit-position #t bv pos)))
608 (cond
609 ((not pos)
610 (finish-tail out min tail))
611 ((< pos (+ min *leaf-size*))
612 (lp out min (1+ pos) (logior tail (ash 1 (- pos min)))))
613 (else
614 (let ((min* (round-down pos *leaf-bits*)))
615 (lp (finish-tail out min tail)
616 min* pos (ash 1 (- pos min*)))))))))