Restrict intsets and maps to non-negative integers
[bpt/guile.git] / module / language / cps / intset.scm
1 ;;; Functional name maps
2 ;;; Copyright (C) 2014 Free Software Foundation, Inc.
3 ;;;
4 ;;; This library is free software: you can redistribute it and/or modify
5 ;;; it under the terms of the GNU Lesser General Public License as
6 ;;; published by the Free Software Foundation, either version 3 of the
7 ;;; License, or (at your option) any later version.
8 ;;;
9 ;;; This library is distributed in the hope that it will be useful, but
10 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Lesser General Public License for more details.
13 ;;;
14 ;;; You should have received a copy of the GNU Lesser General Public
15 ;;; License along with this program. If not, see
16 ;;; <http://www.gnu.org/licenses/>.
17
18 ;;; Commentary:
19 ;;;
20 ;;; A persistent, functional data structure representing a set of
21 ;;; integers as a tree whose branches are vectors and whose leaves are
22 ;;; fixnums. Intsets are careful to preserve sub-structure, in the
23 ;;; sense of eq?, whereever possible.
24 ;;;
25 ;;; Code:
26
27 (define-module (language cps intset)
28 #:use-module (rnrs bytevectors)
29 #:use-module (srfi srfi-9)
30 #:use-module (ice-9 match)
31 #:export (empty-intset
32 intset?
33 intset-add
34 intset-remove
35 intset-ref
36 intset-next
37 intset-union
38 intset-intersect
39 intset-subtract))
40
41 (define-syntax-rule (define-inline name val)
42 (define-syntax name (identifier-syntax val)))
43
44 (eval-when (expand)
45 (use-modules (system base target))
46 (define-syntax compile-time-cond
47 (lambda (x)
48 (syntax-case x (else)
49 ((_ (test body ...) rest ...)
50 (if (primitive-eval (syntax->datum #'test))
51 #'(begin body ...)
52 #'(begin (compile-time-cond rest ...))))
53 ((_ (else body ...))
54 #'(begin body ...))
55 ((_)
56 (error "no compile-time-cond expression matched"))))))
57
58 (compile-time-cond
59 ((eqv? (target-word-size) 4)
60 (define-inline *leaf-bits* 4))
61 ((eqv? (target-word-size) 8)
62 (define-inline *leaf-bits* 5)))
63
64 (define-inline *leaf-size* (ash 1 *leaf-bits*))
65 (define-inline *leaf-mask* (1- *leaf-size*))
66 (define-inline *branch-bits* 3)
67 (define-inline *branch-size* (ash 1 *branch-bits*))
68 (define-inline *branch-mask* (1- *branch-size*))
69
70 (define-record-type <intset>
71 (make-intset min shift root)
72 intset?
73 (min intset-min)
74 (shift intset-shift)
75 (root intset-root))
76
77 (define (new-leaf) 0)
78 (define-inlinable (clone-leaf-and-set leaf i val)
79 (if val
80 (if leaf
81 (logior leaf (ash 1 i))
82 (ash 1 i))
83 (if leaf
84 (logand leaf (lognot (ash 1 i)))
85 #f)))
86 (define (leaf-empty? leaf)
87 (zero? leaf))
88
89 (define (new-branch)
90 (make-vector *branch-size* #f))
91 (define (clone-branch-and-set branch i elt)
92 (let ((new (new-branch)))
93 (when branch (vector-move-left! branch 0 *branch-size* new 0))
94 (vector-set! new i elt)
95 new))
96 (define (branch-empty? branch)
97 (let lp ((i 0))
98 (or (= i *branch-size*)
99 (and (not (vector-ref branch i))
100 (lp (1+ i))))))
101
102 (define (round-down min shift)
103 (logand min (lognot (1- (ash 1 shift)))))
104
105 (define empty-intset (make-intset 0 *leaf-bits* #f))
106
107 (define (add-level min shift root)
108 (let* ((shift* (+ shift *branch-bits*))
109 (min* (round-down min shift*))
110 (idx (logand (ash (- min min*) (- shift)) *branch-mask*)))
111 (make-intset min* shift* (clone-branch-and-set #f idx root))))
112
113 (define (make-intset/prune min shift root)
114 (cond
115 ((not root)
116 empty-intset)
117 ((= shift *leaf-bits*)
118 (make-intset min shift root))
119 (else
120 (let lp ((i 0) (elt #f))
121 (cond
122 ((< i *branch-size*)
123 (if (vector-ref root i)
124 (if elt
125 (make-intset min shift root)
126 (lp (1+ i) i))
127 (lp (1+ i) elt)))
128 (elt
129 (let ((shift (- shift *branch-bits*)))
130 (make-intset/prune (+ min (ash elt shift))
131 shift
132 (vector-ref root elt))))
133 ;; Shouldn't be reached...
134 (else empty-intset))))))
135
136 (define (intset-add bs i)
137 (define (adjoin i shift root)
138 (cond
139 ((= shift *leaf-bits*)
140 (let ((idx (logand i *leaf-mask*)))
141 (if (and root (logbit? idx root))
142 root
143 (clone-leaf-and-set root idx #t))))
144 (else
145 (let* ((shift (- shift *branch-bits*))
146 (idx (logand (ash i (- shift)) *branch-mask*))
147 (node (and root (vector-ref root idx)))
148 (new-node (adjoin i shift node)))
149 (if (eq? node new-node)
150 root
151 (clone-branch-and-set root idx new-node))))))
152 (match bs
153 (($ <intset> min shift root)
154 (cond
155 ((< i 0)
156 ;; The power-of-two spanning trick doesn't work across 0.
157 (error "Intsets can only hold non-negative integers." i))
158 ((not root)
159 ;; Add first element.
160 (let ((min (round-down i shift)))
161 (make-intset min *leaf-bits*
162 (adjoin (- i min) *leaf-bits* root))))
163 ((and (<= min i) (< i (+ min (ash 1 shift))))
164 ;; Add element to set; level will not change.
165 (let ((old-root root)
166 (root (adjoin (- i min) shift root)))
167 (if (eq? root old-root)
168 bs
169 (make-intset min shift root))))
170 ((< i min)
171 ;; Rebuild the tree by unioning two intsets.
172 (intset-union (intset-add empty-intset i) bs))
173 (else
174 ;; Add a new level and try again.
175 (intset-add (add-level min shift root) i))))))
176
177 (define (intset-remove bs i)
178 (define (remove i shift root)
179 (cond
180 ((= shift *leaf-bits*)
181 (let ((idx (logand i *leaf-mask*)))
182 (if (logbit? idx root)
183 (let ((root (clone-leaf-and-set root idx #f)))
184 (and (not (leaf-empty? root)) root))
185 root)))
186 (else
187 (let* ((shift (- shift *branch-bits*))
188 (idx (logand (ash i (- shift)) *branch-mask*)))
189 (cond
190 ((vector-ref root idx)
191 => (lambda (node)
192 (let ((new-node (remove i shift node)))
193 (if (eq? node new-node)
194 root
195 (let ((root (clone-branch-and-set root idx new-node)))
196 (and (or new-node (not (branch-empty? root)))
197 root))))))
198 (else root))))))
199 (match bs
200 (($ <intset> min shift root)
201 (cond
202 ((not root) bs)
203 ((and (<= min i) (< i (+ min (ash 1 shift))))
204 ;; Add element to set; level will not change.
205 (let ((old-root root)
206 (root (remove (- i min) shift root)))
207 (if (eq? root old-root)
208 bs
209 (make-intset/prune min shift root))))
210 (else bs)))))
211
212 (define (intset-ref bs i)
213 (match bs
214 (($ <intset> min shift root)
215 (and (<= min i) (< i (+ min (ash 1 shift)))
216 (let ((i (- i min)))
217 (let lp ((node root) (shift shift))
218 (and node
219 (if (= shift *leaf-bits*)
220 (logbit? (logand i *leaf-mask*) node)
221 (let* ((shift (- shift *branch-bits*))
222 (idx (logand (ash i (- shift)) *branch-mask*)))
223 (lp (vector-ref node idx) shift))))))))))
224
225 (define (intset-next bs i)
226 (define (visit-leaf node i)
227 (let lp ((idx (logand i *leaf-mask*)))
228 (if (logbit? idx node)
229 (logior (logand i (lognot *leaf-mask*)) idx)
230 (let ((idx (1+ idx)))
231 (and (< idx *leaf-size*)
232 (lp idx))))))
233 (define (visit-branch node shift i)
234 (let lp ((i i) (idx (logand (ash i (- shift)) *branch-mask*)))
235 (and (< idx *branch-size*)
236 (or (visit-node (vector-ref node idx) shift i)
237 (let ((inc (ash 1 shift)))
238 (lp (+ (round-down i shift) inc) (1+ idx)))))))
239 (define (visit-node node shift i)
240 (and node
241 (if (= shift *leaf-bits*)
242 (visit-leaf node i)
243 (visit-branch node (- shift *branch-bits*) i))))
244 (match bs
245 (($ <intset> min shift root)
246 (let ((i (if (and i (< min i))
247 (- i min)
248 0)))
249 (and (< i (ash 1 shift))
250 (let ((i (visit-node root shift i)))
251 (and i (+ min i))))))))
252
253 (define (intset-size shift root)
254 (cond
255 ((not root) 0)
256 ((= *leaf-bits* shift) *leaf-size*)
257 (else
258 (let lp ((i (1- *branch-size*)))
259 (let ((node (vector-ref root i)))
260 (if node
261 (let ((shift (- shift *branch-bits*)))
262 (+ (intset-size shift node)
263 (* i (ash 1 shift))))
264 (lp (1- i))))))))
265
266 (define (intset-union a b)
267 ;; Union leaves.
268 (define (union-leaves a b)
269 (logior (or a 0) (or b 0)))
270 ;; Union A and B from index I; the result will be fresh.
271 (define (union-branches/fresh shift a b i fresh)
272 (let lp ((i 0))
273 (cond
274 ((< i *branch-size*)
275 (let* ((a-child (vector-ref a i))
276 (b-child (vector-ref b i)))
277 (vector-set! fresh i (union shift a-child b-child))
278 (lp (1+ i))))
279 (else fresh))))
280 ;; Union A and B from index I; the result may be eq? to A.
281 (define (union-branches/a shift a b i)
282 (let lp ((i i))
283 (cond
284 ((< i *branch-size*)
285 (let* ((a-child (vector-ref a i))
286 (b-child (vector-ref b i)))
287 (if (eq? a-child b-child)
288 (lp (1+ i))
289 (let ((child (union shift a-child b-child)))
290 (cond
291 ((eq? a-child child)
292 (lp (1+ i)))
293 (else
294 (let ((result (clone-branch-and-set a i child)))
295 (union-branches/fresh shift a b (1+ i) result))))))))
296 (else a))))
297 ;; Union A and B; the may could be eq? to either.
298 (define (union-branches shift a b)
299 (let lp ((i 0))
300 (cond
301 ((< i *branch-size*)
302 (let* ((a-child (vector-ref a i))
303 (b-child (vector-ref b i)))
304 (if (eq? a-child b-child)
305 (lp (1+ i))
306 (let ((child (union shift a-child b-child)))
307 (cond
308 ((eq? a-child child)
309 (union-branches/a shift a b (1+ i)))
310 ((eq? b-child child)
311 (union-branches/a shift b a (1+ i)))
312 (else
313 (let ((result (clone-branch-and-set a i child)))
314 (union-branches/fresh shift a b (1+ i) result))))))))
315 ;; Seems they are the same but not eq?. Odd.
316 (else a))))
317 (define (union shift a-node b-node)
318 (cond
319 ((not a-node) b-node)
320 ((not b-node) a-node)
321 ((eq? a-node b-node) a-node)
322 ((= shift *leaf-bits*) (union-leaves a-node b-node))
323 (else (union-branches (- shift *branch-bits*) a-node b-node))))
324 (match (cons a b)
325 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
326 (cond
327 ((not (= b-shift a-shift))
328 ;; Hoist the set with the lowest shift to meet the one with the
329 ;; higher shift.
330 (if (< b-shift a-shift)
331 (intset-union a (add-level b-min b-shift b-root))
332 (intset-union (add-level a-min a-shift a-root) b)))
333 ((not (= b-min a-min))
334 ;; Nodes at the same shift but different minimums will cover
335 ;; disjoint ranges (due to the round-down call on min). Hoist
336 ;; both until they cover the same range.
337 (intset-union (add-level a-min a-shift a-root)
338 (add-level b-min b-shift b-root)))
339 (else
340 ;; At this point, A and B cover the same range.
341 (let ((root (union a-shift a-root b-root)))
342 (cond
343 ((eq? root a-root) a)
344 ((eq? root b-root) b)
345 (else (make-intset a-min a-shift root)))))))))
346
347 (define (intset-intersect a b)
348 (define tmp (new-leaf))
349 ;; Intersect leaves.
350 (define (intersect-leaves a b)
351 (logand a b))
352 ;; Intersect A and B from index I; the result will be fresh.
353 (define (intersect-branches/fresh shift a b i fresh)
354 (let lp ((i 0))
355 (cond
356 ((< i *branch-size*)
357 (let* ((a-child (vector-ref a i))
358 (b-child (vector-ref b i)))
359 (vector-set! fresh i (intersect shift a-child b-child))
360 (lp (1+ i))))
361 ((branch-empty? fresh) #f)
362 (else fresh))))
363 ;; Intersect A and B from index I; the result may be eq? to A.
364 (define (intersect-branches/a shift a b i)
365 (let lp ((i i))
366 (cond
367 ((< i *branch-size*)
368 (let* ((a-child (vector-ref a i))
369 (b-child (vector-ref b i)))
370 (if (eq? a-child b-child)
371 (lp (1+ i))
372 (let ((child (intersect shift a-child b-child)))
373 (cond
374 ((eq? a-child child)
375 (lp (1+ i)))
376 (else
377 (let ((result (clone-branch-and-set a i child)))
378 (intersect-branches/fresh shift a b (1+ i) result))))))))
379 (else a))))
380 ;; Intersect A and B; the may could be eq? to either.
381 (define (intersect-branches shift a b)
382 (let lp ((i 0))
383 (cond
384 ((< i *branch-size*)
385 (let* ((a-child (vector-ref a i))
386 (b-child (vector-ref b i)))
387 (if (eq? a-child b-child)
388 (lp (1+ i))
389 (let ((child (intersect shift a-child b-child)))
390 (cond
391 ((eq? a-child child)
392 (intersect-branches/a shift a b (1+ i)))
393 ((eq? b-child child)
394 (intersect-branches/a shift b a (1+ i)))
395 (else
396 (let ((result (clone-branch-and-set a i child)))
397 (intersect-branches/fresh shift a b (1+ i) result))))))))
398 ;; Seems they are the same but not eq?. Odd.
399 (else a))))
400 (define (intersect shift a-node b-node)
401 (cond
402 ((or (not a-node) (not b-node)) #f)
403 ((eq? a-node b-node) a-node)
404 ((= shift *leaf-bits*) (intersect-leaves a-node b-node))
405 (else (intersect-branches (- shift *branch-bits*) a-node b-node))))
406
407 (define (different-mins lo-min lo-shift lo-root hi-min hi-shift hi lo-is-a?)
408 (cond
409 ((<= lo-shift hi-shift)
410 ;; If LO has a lower shift and a lower min, it is disjoint. If
411 ;; it has the same shift and a different min, it is also
412 ;; disjoint.
413 empty-intset)
414 (else
415 (let* ((lo-shift (- lo-shift *branch-bits*))
416 (lo-idx (ash (- hi-min lo-min) (- lo-shift))))
417 (cond
418 ((>= lo-idx *branch-size*)
419 ;; HI has a lower shift, but it not within LO.
420 empty-intset)
421 ((vector-ref lo-root lo-idx)
422 => (lambda (lo-root)
423 (let ((lo (make-intset (+ lo-min (ash lo-idx lo-shift))
424 lo-shift
425 lo-root)))
426 (if lo-is-a?
427 (intset-intersect lo hi)
428 (intset-intersect hi lo)))))
429 (else empty-intset))))))
430
431 (define (different-shifts-same-min min hi-shift hi-root lo lo-is-a?)
432 (cond
433 ((vector-ref hi-root 0)
434 => (lambda (hi-root)
435 (let ((hi (make-intset min
436 (- hi-shift *branch-bits*)
437 hi-root)))
438 (if lo-is-a?
439 (intset-intersect lo hi)
440 (intset-intersect hi lo)))))
441 (else empty-intset)))
442
443 (match (cons a b)
444 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
445 (cond
446 ((< a-min b-min)
447 (different-mins a-min a-shift a-root b-min b-shift b #t))
448 ((< b-min a-min)
449 (different-mins b-min b-shift b-root a-min a-shift a #f))
450 ((< a-shift b-shift)
451 (different-shifts-same-min b-min b-shift b-root a #t))
452 ((< b-shift a-shift)
453 (different-shifts-same-min a-min a-shift a-root b #f))
454 (else
455 ;; At this point, A and B cover the same range.
456 (let ((root (intersect a-shift a-root b-root)))
457 (cond
458 ((eq? root a-root) a)
459 ((eq? root b-root) b)
460 (else (make-intset/prune a-min a-shift root)))))))))
461
462 (define (intset-subtract a b)
463 (define tmp (new-leaf))
464 ;; Intersect leaves.
465 (define (subtract-leaves a b)
466 (logand a (lognot b)))
467 ;; Subtract B from A starting at index I; the result will be fresh.
468 (define (subtract-branches/fresh shift a b i fresh)
469 (let lp ((i 0))
470 (cond
471 ((< i *branch-size*)
472 (let* ((a-child (vector-ref a i))
473 (b-child (vector-ref b i)))
474 (vector-set! fresh i (subtract-nodes shift a-child b-child))
475 (lp (1+ i))))
476 ((branch-empty? fresh) #f)
477 (else fresh))))
478 ;; Subtract B from A. The result may be eq? to A.
479 (define (subtract-branches shift a b)
480 (let lp ((i 0))
481 (cond
482 ((< i *branch-size*)
483 (let* ((a-child (vector-ref a i))
484 (b-child (vector-ref b i)))
485 (let ((child (subtract-nodes shift a-child b-child)))
486 (cond
487 ((eq? a-child child)
488 (lp (1+ i)))
489 (else
490 (let ((result (clone-branch-and-set a i child)))
491 (subtract-branches/fresh shift a b (1+ i) result)))))))
492 (else a))))
493 (define (subtract-nodes shift a-node b-node)
494 (cond
495 ((or (not a-node) (not b-node)) a-node)
496 ((eq? a-node b-node) #f)
497 ((= shift *leaf-bits*) (subtract-leaves a-node b-node))
498 (else (subtract-branches (- shift *branch-bits*) a-node b-node))))
499
500 (match (cons a b)
501 ((($ <intset> a-min a-shift a-root) . ($ <intset> b-min b-shift b-root))
502 (define (return root)
503 (cond
504 ((eq? root a-root) a)
505 (else (make-intset/prune a-min a-shift root))))
506 (cond
507 ((<= a-shift b-shift)
508 (let lp ((b-min b-min) (b-shift b-shift) (b-root b-root))
509 (if (= a-shift b-shift)
510 (if (= a-min b-min)
511 (return (subtract-nodes a-shift a-root b-root))
512 a)
513 (let* ((b-shift (- b-shift *branch-bits*))
514 (b-idx (ash (- a-min b-min) (- b-shift)))
515 (b-min (+ b-min (ash b-idx b-shift)))
516 (b-root (and b-root
517 (<= 0 b-idx)
518 (< b-idx *branch-size*)
519 (vector-ref b-root b-idx))))
520 (lp b-min b-shift b-root)))))
521 (else
522 (return
523 (let lp ((a-min a-min) (a-shift a-shift) (a-root a-root))
524 (if (= a-shift b-shift)
525 (if (= a-min b-min)
526 (subtract-nodes a-shift a-root b-root)
527 a-root)
528 (let* ((a-shift (- a-shift *branch-bits*))
529 (a-idx (ash (- b-min a-min) (- a-shift)))
530 (a-min (+ a-min (ash a-idx a-shift)))
531 (old (and a-root
532 (<= 0 a-idx)
533 (< a-idx *branch-size*)
534 (vector-ref a-root a-idx)))
535 (new (lp a-min a-shift old)))
536 (if (eq? old new)
537 a-root
538 (clone-branch-and-set a-root a-idx new)))))))))))