Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / module / ice-9 / vlist.scm
1 ;;; -*- mode: scheme; coding: utf-8; -*-
2 ;;;
3 ;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 (define-module (ice-9 vlist)
20 #:use-module (srfi srfi-1)
21 #:use-module (srfi srfi-9)
22 #:use-module (srfi srfi-9 gnu)
23 #:use-module (srfi srfi-26)
24 #:use-module (ice-9 format)
25
26 #:export (vlist? vlist-cons vlist-head vlist-tail vlist-null?
27 vlist-null list->vlist vlist-ref vlist-drop vlist-take
28 vlist-length vlist-fold vlist-fold-right vlist-map
29 vlist-unfold vlist-unfold-right vlist-append
30 vlist-reverse vlist-filter vlist-delete vlist->list
31 vlist-for-each
32 block-growth-factor
33
34 vhash? vhash-cons vhash-consq vhash-consv
35 vhash-assoc vhash-assq vhash-assv
36 vhash-delete vhash-delq vhash-delv
37 vhash-fold vhash-fold-right
38 vhash-fold* vhash-foldq* vhash-foldv*
39 alist->vhash))
40
41 ;;; Author: Ludovic Courtès <ludo@gnu.org>
42 ;;;
43 ;;; Commentary:
44 ;;;
45 ;;; This module provides an implementations of vlists, a functional list-like
46 ;;; data structure described by Phil Bagwell in "Fast Functional Lists,
47 ;;; Hash-Lists, Dequeues and Variable-Length Arrays", EPFL Technical Report,
48 ;;; 2002.
49 ;;;
50 ;;; The idea is to store vlist elements in increasingly large contiguous blocks
51 ;;; (implemented as vectors here). These blocks are linked to one another using
52 ;;; a pointer to the next block (called `block-base' here) and an offset within
53 ;;; that block (`block-offset' here). The size of these blocks form a geometric
54 ;;; series with ratio `block-growth-factor'.
55 ;;;
56 ;;; In the best case (e.g., using a vlist returned by `list->vlist'),
57 ;;; elements from the first half of an N-element vlist are accessed in O(1)
58 ;;; (assuming `block-growth-factor' is 2), and `vlist-length' takes only
59 ;;; O(ln(N)). Furthermore, the data structure improves data locality since
60 ;;; vlist elements are adjacent, which plays well with caches.
61 ;;;
62 ;;; Code:
63
64 \f
65 ;;;
66 ;;; VList Blocks and Block Descriptors.
67 ;;;
68
69 (define block-growth-factor
70 (make-fluid 2))
71
72 (define-inlinable (make-block base offset size hash-tab?)
73 ;; Return a block (and block descriptor) of SIZE elements pointing to
74 ;; BASE at OFFSET. If HASH-TAB? is true, we also reserve space for a
75 ;; "hash table". Note: We use `next-free' instead of `last-used' as
76 ;; suggested by Bagwell.
77 (if hash-tab?
78 (vector (make-vector (* size 3) #f)
79 base offset size 0)
80 (vector (make-vector size)
81 base offset size 0)))
82
83 (define-syntax-rule (define-block-accessor name index)
84 (define-inlinable (name block)
85 (vector-ref block index)))
86
87 (define-block-accessor block-content 0)
88 (define-block-accessor block-base 1)
89 (define-block-accessor block-offset 2)
90 (define-block-accessor block-size 3)
91 (define-block-accessor block-next-free 4)
92
93 (define-inlinable (block-hash-table? block)
94 (< (block-size block) (vector-length (block-content block))))
95
96 (define-inlinable (set-block-next-free! block next-free)
97 (vector-set! block 4 next-free))
98
99 (define-inlinable (block-append! block value offset)
100 ;; This is not thread-safe. To fix it, see Section 2.8 of the paper.
101 (and (< offset (block-size block))
102 (= offset (block-next-free block))
103 (begin
104 (set-block-next-free! block (1+ offset))
105 (vector-set! (block-content block) offset value)
106 #t)))
107
108 ;; Return the item at slot OFFSET.
109 (define-inlinable (block-ref content offset)
110 (vector-ref content offset))
111
112 ;; Return the offset of the next item in the hash bucket, after the one
113 ;; at OFFSET.
114 (define-inlinable (block-hash-table-next-offset content size offset)
115 (vector-ref content (+ size size offset)))
116
117 ;; Save the offset of the next item in the hash bucket, after the one
118 ;; at OFFSET.
119 (define-inlinable (block-hash-table-set-next-offset! content size offset
120 next-offset)
121 (vector-set! content (+ size size offset) next-offset))
122
123 ;; Returns the index of the last entry stored in CONTENT with
124 ;; SIZE-modulo hash value KHASH.
125 (define-inlinable (block-hash-table-ref content size khash)
126 (vector-ref content (+ size khash)))
127
128 (define-inlinable (block-hash-table-set! content size khash offset)
129 (vector-set! content (+ size khash) offset))
130
131 ;; Add hash table information for the item recently added at OFFSET,
132 ;; with SIZE-modulo hash KHASH.
133 (define-inlinable (block-hash-table-add! content size khash offset)
134 (block-hash-table-set-next-offset! content size offset
135 (block-hash-table-ref content size khash))
136 (block-hash-table-set! content size khash offset))
137
138 (define block-null
139 ;; The null block.
140 (make-block #f 0 0 #f))
141
142 \f
143 ;;;
144 ;;; VLists.
145 ;;;
146
147 (define-record-type <vlist>
148 ;; A vlist is just a base+offset pair pointing to a block.
149
150 ;; XXX: Allocating a <vlist> record in addition to the block at each
151 ;; `vlist-cons' call is inefficient. However, Bagwell's hack to avoid it
152 ;; (Section 2.2) would require GC_ALL_INTERIOR_POINTERS, which would be a
153 ;; performance hit for everyone.
154 (make-vlist base offset)
155 vlist?
156 (base vlist-base)
157 (offset vlist-offset))
158
159 (set-record-type-printer! <vlist>
160 (lambda (vl port)
161 (cond ((vlist-null? vl)
162 (format port "#<vlist ()>"))
163 ((vhash? vl)
164 (format port "#<vhash ~x ~a pairs>"
165 (object-address vl)
166 (vlist-length vl)))
167 (else
168 (format port "#<vlist ~a>"
169 (vlist->list vl))))))
170
171
172 (define vlist-null
173 ;; The empty vlist.
174 (make-vlist block-null 0))
175
176 ;; Asserting that something is a vlist is actually a win if your next
177 ;; step is to call record accessors, because that causes CSE to
178 ;; eliminate the type checks in those accessors.
179 ;;
180 (define-inlinable (assert-vlist val)
181 (unless (vlist? val)
182 (throw 'wrong-type-arg
183 #f
184 "Not a vlist: ~S"
185 (list val)
186 (list val))))
187
188 (define-inlinable (block-cons item vlist hash-tab?)
189 (let ((base (vlist-base vlist))
190 (offset (1+ (vlist-offset vlist))))
191 (cond
192 ((block-append! base item offset)
193 ;; Fast path: We added the item directly to the block.
194 (make-vlist base offset))
195 (else
196 ;; Slow path: Allocate a new block.
197 (let* ((size (block-size base))
198 (base (make-block
199 base
200 (1- offset)
201 (cond
202 ((zero? size) 1)
203 ((< offset size) 1) ;; new vlist head
204 (else (* (fluid-ref block-growth-factor) size)))
205 hash-tab?)))
206 (set-block-next-free! base 1)
207 (vector-set! (block-content base) 0 item)
208 (make-vlist base 0))))))
209
210 (define (vlist-cons item vlist)
211 "Return a new vlist with ITEM as its head and VLIST as its
212 tail."
213 ;; Note: Although the result of `vlist-cons' on a vhash is a valid
214 ;; vlist, it is not a valid vhash. The new item does not get a hash
215 ;; table entry. If we allocate a new block, the new block will not
216 ;; have a hash table. Perhaps we can do something more sensible here,
217 ;; but this is a hot function, so there are performance impacts.
218 (assert-vlist vlist)
219 (block-cons item vlist #f))
220
221 (define (vlist-head vlist)
222 "Return the head of VLIST."
223 (assert-vlist vlist)
224 (let ((base (vlist-base vlist))
225 (offset (vlist-offset vlist)))
226 (block-ref (block-content base) offset)))
227
228 (define (vlist-tail vlist)
229 "Return the tail of VLIST."
230 (assert-vlist vlist)
231 (let ((base (vlist-base vlist))
232 (offset (vlist-offset vlist)))
233 (if (> offset 0)
234 (make-vlist base (- offset 1))
235 (make-vlist (block-base base)
236 (block-offset base)))))
237
238 (define (vlist-null? vlist)
239 "Return true if VLIST is empty."
240 (assert-vlist vlist)
241 (let ((base (vlist-base vlist)))
242 (and (not (block-base base))
243 (= 0 (block-size base)))))
244
245 \f
246 ;;;
247 ;;; VList Utilities.
248 ;;;
249
250 (define (list->vlist lst)
251 "Return a new vlist whose contents correspond to LST."
252 (vlist-reverse (fold vlist-cons vlist-null lst)))
253
254 (define (vlist-fold proc init vlist)
255 "Fold over VLIST, calling PROC for each element."
256 ;; FIXME: Handle multiple lists.
257 (assert-vlist vlist)
258 (let loop ((base (vlist-base vlist))
259 (offset (vlist-offset vlist))
260 (result init))
261 (if (eq? base block-null)
262 result
263 (let* ((next (- offset 1))
264 (done? (< next 0)))
265 (loop (if done? (block-base base) base)
266 (if done? (block-offset base) next)
267 (proc (block-ref (block-content base) offset) result))))))
268
269 (define (vlist-fold-right proc init vlist)
270 "Fold over VLIST, calling PROC for each element, starting from
271 the last element."
272 (assert-vlist vlist)
273 (let loop ((index (1- (vlist-length vlist)))
274 (result init))
275 (if (< index 0)
276 result
277 (loop (1- index)
278 (proc (vlist-ref vlist index) result)))))
279
280 (define (vlist-reverse vlist)
281 "Return a new VLIST whose content are those of VLIST in reverse
282 order."
283 (vlist-fold vlist-cons vlist-null vlist))
284
285 (define (vlist-map proc vlist)
286 "Map PROC over the elements of VLIST and return a new vlist."
287 (vlist-fold (lambda (item result)
288 (vlist-cons (proc item) result))
289 vlist-null
290 (vlist-reverse vlist)))
291
292 (define (vlist->list vlist)
293 "Return a new list whose contents match those of VLIST."
294 (vlist-fold-right cons '() vlist))
295
296 (define (vlist-ref vlist index)
297 "Return the element at index INDEX in VLIST."
298 (assert-vlist vlist)
299 (let loop ((index index)
300 (base (vlist-base vlist))
301 (offset (vlist-offset vlist)))
302 (if (<= index offset)
303 (block-ref (block-content base) (- offset index))
304 (loop (- index offset 1)
305 (block-base base)
306 (block-offset base)))))
307
308 (define (vlist-drop vlist count)
309 "Return a new vlist that does not contain the COUNT first elements of
310 VLIST."
311 (assert-vlist vlist)
312 (let loop ((count count)
313 (base (vlist-base vlist))
314 (offset (vlist-offset vlist)))
315 (if (<= count offset)
316 (make-vlist base (- offset count))
317 (loop (- count offset 1)
318 (block-base base)
319 (block-offset base)))))
320
321 (define (vlist-take vlist count)
322 "Return a new vlist that contains only the COUNT first elements of
323 VLIST."
324 (let loop ((count count)
325 (vlist vlist)
326 (result vlist-null))
327 (if (= 0 count)
328 (vlist-reverse result)
329 (loop (- count 1)
330 (vlist-tail vlist)
331 (vlist-cons (vlist-head vlist) result)))))
332
333 (define (vlist-filter pred vlist)
334 "Return a new vlist containing all the elements from VLIST that
335 satisfy PRED."
336 (vlist-fold-right (lambda (e v)
337 (if (pred e)
338 (vlist-cons e v)
339 v))
340 vlist-null
341 vlist))
342
343 (define* (vlist-delete x vlist #:optional (equal? equal?))
344 "Return a new vlist corresponding to VLIST without the elements
345 EQUAL? to X."
346 (vlist-filter (lambda (e)
347 (not (equal? e x)))
348 vlist))
349
350 (define (vlist-length vlist)
351 "Return the length of VLIST."
352 (assert-vlist vlist)
353 (let loop ((base (vlist-base vlist))
354 (len (vlist-offset vlist)))
355 (if (eq? base block-null)
356 len
357 (loop (block-base base)
358 (+ len 1 (block-offset base))))))
359
360 (define* (vlist-unfold p f g seed
361 #:optional (tail-gen (lambda (x) vlist-null)))
362 "Return a new vlist. See the description of SRFI-1 `unfold' for details."
363 (let uf ((seed seed))
364 (if (p seed)
365 (tail-gen seed)
366 (vlist-cons (f seed)
367 (uf (g seed))))))
368
369 (define* (vlist-unfold-right p f g seed #:optional (tail vlist-null))
370 "Return a new vlist. See the description of SRFI-1 `unfold-right' for
371 details."
372 (let uf ((seed seed) (lis tail))
373 (if (p seed)
374 lis
375 (uf (g seed) (vlist-cons (f seed) lis)))))
376
377 (define (vlist-append . vlists)
378 "Append the given lists."
379 (if (null? vlists)
380 vlist-null
381 (fold-right (lambda (vlist result)
382 (vlist-fold-right (lambda (e v)
383 (vlist-cons e v))
384 result
385 vlist))
386 vlist-null
387 vlists)))
388
389 (define (vlist-for-each proc vlist)
390 "Call PROC on each element of VLIST. The result is unspecified."
391 (vlist-fold (lambda (item x)
392 (proc item))
393 (if #f #f)
394 vlist))
395
396 \f
397 ;;;
398 ;;; Hash Lists, aka. `VHash'.
399 ;;;
400
401 ;; Assume keys K1 and K2, H = hash(K1) = hash(K2), and two values V1 and V2
402 ;; associated with K1 and K2, respectively. The resulting layout is a
403 ;; follows:
404 ;;
405 ;; ,--------------------.
406 ;; 0| ,-> (K1 . V1) | Vlist array
407 ;; 1| | |
408 ;; 2| | (K2 . V2) |
409 ;; 3| | |
410 ;; size +-|------------------+
411 ;; 0| | | Hash table
412 ;; 1| | |
413 ;; 2| +-- O <------------- H
414 ;; 3| | |
415 ;; size * 2 +-|------------------+
416 ;; 0| `-> 2 | Chain links
417 ;; 1| |
418 ;; 2| #f |
419 ;; 3| |
420 ;; size * 3 `--------------------'
421 ;;
422 ;; The backing store for the vhash is partitioned into three areas: the
423 ;; vlist part, the hash table part, and the chain links part. In this
424 ;; example we have a hash H which, when indexed into the hash table
425 ;; part, indicates that a value with this hash can be found at offset 0
426 ;; in the vlist part. The corresponding index (in this case, 0) of the
427 ;; chain links array holds the index of the next element in this block
428 ;; with this hash value, or #f if we reached the end of the chain.
429 ;;
430 ;; This API potentially requires users to repeat which hash function and
431 ;; which equality predicate to use. This can lead to unpredictable
432 ;; results if they are used in consistenly, e.g., between `vhash-cons'
433 ;; and `vhash-assoc', which is undesirable, as argued in
434 ;; http://savannah.gnu.org/bugs/?22159 . OTOH, two arguments can be
435 ;; made in favor of this API:
436 ;;
437 ;; - It's consistent with how alists are handled in SRFI-1.
438 ;;
439 ;; - In practice, users will probably consistenly use either the `q',
440 ;; the `v', or the plain variant (`vlist-cons' and `vlist-assoc'
441 ;; without any optional argument), i.e., they will rarely explicitly
442 ;; pass a hash function or equality predicate.
443
444 (define (vhash? obj)
445 "Return true if OBJ is a hash list."
446 (and (vlist? obj)
447 (block-hash-table? (vlist-base obj))))
448
449 (define* (vhash-cons key value vhash #:optional (hash hash))
450 "Return a new hash list based on VHASH where KEY is associated
451 with VALUE. Use HASH to compute KEY's hash."
452 (assert-vlist vhash)
453 ;; We should also assert that it is a hash table. Need to check the
454 ;; performance impacts of that. Also, vlist-null is a valid hash
455 ;; table, which does not pass vhash?. A bug, perhaps.
456 (let* ((vhash (block-cons (cons key value) vhash #t))
457 (base (vlist-base vhash))
458 (offset (vlist-offset vhash))
459 (size (block-size base))
460 (khash (hash key size))
461 (content (block-content base)))
462 (block-hash-table-add! content size khash offset)
463 vhash))
464
465 (define vhash-consq (cut vhash-cons <> <> <> hashq))
466 (define vhash-consv (cut vhash-cons <> <> <> hashv))
467
468 (define-inlinable (%vhash-fold* proc init key vhash equal? hash)
469 ;; Fold over all the values associated with KEY in VHASH.
470 (define (visit-block base max-offset result)
471 (let* ((size (block-size base))
472 (content (block-content base))
473 (khash (hash key size)))
474 (let loop ((offset (block-hash-table-ref content size khash))
475 (result result))
476 (if offset
477 (loop (block-hash-table-next-offset content size offset)
478 (if (and (<= offset max-offset)
479 (equal? key (car (block-ref content offset))))
480 (proc (cdr (block-ref content offset)) result)
481 result))
482 (let ((next-block (block-base base)))
483 (if (> (block-size next-block) 0)
484 (visit-block next-block (block-offset base) result)
485 result))))))
486
487 (assert-vlist vhash)
488 (if (> (block-size (vlist-base vhash)) 0)
489 (visit-block (vlist-base vhash)
490 (vlist-offset vhash)
491 init)
492 init))
493
494 (define* (vhash-fold* proc init key vhash
495 #:optional (equal? equal?) (hash hash))
496 "Fold over all the values associated with KEY in VHASH, with each
497 call to PROC having the form ‘(proc value result)’, where
498 RESULT is the result of the previous call to PROC and INIT the
499 value of RESULT for the first call to PROC."
500 (%vhash-fold* proc init key vhash equal? hash))
501
502 (define (vhash-foldq* proc init key vhash)
503 "Same as ‘vhash-fold*’, but using ‘hashq’ and ‘eq?’."
504 (%vhash-fold* proc init key vhash eq? hashq))
505
506 (define (vhash-foldv* proc init key vhash)
507 "Same as ‘vhash-fold*’, but using ‘hashv’ and ‘eqv?’."
508 (%vhash-fold* proc init key vhash eqv? hashv))
509
510 (define-inlinable (%vhash-assoc key vhash equal? hash)
511 ;; A specialization of `vhash-fold*' that stops when the first value
512 ;; associated with KEY is found or when the end-of-list is reached. Inline to
513 ;; make sure `vhash-assq' gets to use the `eq?' instruction instead of calling
514 ;; the `eq?' subr.
515 (define (visit-block base max-offset)
516 (let* ((size (block-size base))
517 (content (block-content base))
518 (khash (hash key size)))
519 (let loop ((offset (block-hash-table-ref content size khash)))
520 (if offset
521 (if (and (<= offset max-offset)
522 (equal? key (car (block-ref content offset))))
523 (block-ref content offset)
524 (loop (block-hash-table-next-offset content size offset)))
525 (let ((next-block (block-base base)))
526 (and (> (block-size next-block) 0)
527 (visit-block next-block (block-offset base))))))))
528
529 (assert-vlist vhash)
530 (and (> (block-size (vlist-base vhash)) 0)
531 (visit-block (vlist-base vhash)
532 (vlist-offset vhash))))
533
534 (define* (vhash-assoc key vhash #:optional (equal? equal?) (hash hash))
535 "Return the first key/value pair from VHASH whose key is equal to
536 KEY according to the EQUAL? equality predicate."
537 (%vhash-assoc key vhash equal? hash))
538
539 (define (vhash-assq key vhash)
540 "Return the first key/value pair from VHASH whose key is ‘eq?’ to
541 KEY."
542 (%vhash-assoc key vhash eq? hashq))
543
544 (define (vhash-assv key vhash)
545 "Return the first key/value pair from VHASH whose key is ‘eqv?’ to
546 KEY."
547 (%vhash-assoc key vhash eqv? hashv))
548
549 (define* (vhash-delete key vhash #:optional (equal? equal?) (hash hash))
550 "Remove all associations from VHASH with KEY, comparing keys
551 with EQUAL?."
552 (if (vhash-assoc key vhash equal? hash)
553 (vlist-fold (lambda (k+v result)
554 (let ((k (car k+v))
555 (v (cdr k+v)))
556 (if (equal? k key)
557 result
558 (vhash-cons k v result hash))))
559 vlist-null
560 vhash)
561 vhash))
562
563 (define vhash-delq (cut vhash-delete <> <> eq? hashq))
564 (define vhash-delv (cut vhash-delete <> <> eqv? hashv))
565
566 (define (vhash-fold proc init vhash)
567 "Fold over the key/pair elements of VHASH from left to right, with
568 each call to PROC having the form ‘(PROC key value result)’,
569 where RESULT is the result of the previous call to PROC and
570 INIT the value of RESULT for the first call to PROC."
571 (vlist-fold (lambda (key+value result)
572 (proc (car key+value) (cdr key+value)
573 result))
574 init
575 vhash))
576
577 (define (vhash-fold-right proc init vhash)
578 "Fold over the key/pair elements of VHASH from right to left, with
579 each call to PROC having the form ‘(PROC key value result)’,
580 where RESULT is the result of the previous call to PROC and
581 INIT the value of RESULT for the first call to PROC."
582 (vlist-fold-right (lambda (key+value result)
583 (proc (car key+value) (cdr key+value)
584 result))
585 init
586 vhash))
587
588 (define* (alist->vhash alist #:optional (hash hash))
589 "Return the vhash corresponding to ALIST, an association list."
590 (fold-right (lambda (pair result)
591 (vhash-cons (car pair) (cdr pair) result hash))
592 vlist-null
593 alist))
594
595 ;;; vlist.scm ends here