Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / module / ice-9 / vlist.scm
1 ;;; -*- mode: scheme; coding: utf-8; -*-
2 ;;;
3 ;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 (define-module (ice-9 vlist)
20 #:use-module (srfi srfi-1)
21 #:use-module (srfi srfi-9)
22 #:use-module (srfi srfi-9 gnu)
23 #:use-module (srfi srfi-26)
24 #:use-module (ice-9 format)
25
26 #:export (vlist? vlist-cons vlist-head vlist-tail vlist-null?
27 vlist-null list->vlist vlist-ref vlist-drop vlist-take
28 vlist-length vlist-fold vlist-fold-right vlist-map
29 vlist-unfold vlist-unfold-right vlist-append
30 vlist-reverse vlist-filter vlist-delete vlist->list
31 vlist-for-each
32 block-growth-factor
33
34 vhash? vhash-cons vhash-consq vhash-consv
35 vhash-assoc vhash-assq vhash-assv
36 vhash-delete vhash-delq vhash-delv
37 vhash-fold vhash-fold-right
38 vhash-fold* vhash-foldq* vhash-foldv*
39 alist->vhash))
40
41 ;;; Author: Ludovic Courtès <ludo@gnu.org>
42 ;;;
43 ;;; Commentary:
44 ;;;
45 ;;; This module provides an implementations of vlists, a functional list-like
46 ;;; data structure described by Phil Bagwell in "Fast Functional Lists,
47 ;;; Hash-Lists, Dequeues and Variable-Length Arrays", EPFL Technical Report,
48 ;;; 2002.
49 ;;;
50 ;;; The idea is to store vlist elements in increasingly large contiguous blocks
51 ;;; (implemented as vectors here). These blocks are linked to one another using
52 ;;; a pointer to the next block (called `block-base' here) and an offset within
53 ;;; that block (`block-offset' here). The size of these blocks form a geometric
54 ;;; series with ratio `block-growth-factor'.
55 ;;;
56 ;;; In the best case (e.g., using a vlist returned by `list->vlist'),
57 ;;; elements from the first half of an N-element vlist are accessed in O(1)
58 ;;; (assuming `block-growth-factor' is 2), and `vlist-length' takes only
59 ;;; O(ln(N)). Furthermore, the data structure improves data locality since
60 ;;; vlist elements are adjacent, which plays well with caches.
61 ;;;
62 ;;; Code:
63
64 \f
65 ;;;
66 ;;; VList Blocks and Block Descriptors.
67 ;;;
68
69 (define block-growth-factor
70 (make-fluid 2))
71
72 (define-inlinable (make-block base offset size hash-tab?)
73 ;; Return a block (and block descriptor) of SIZE elements pointing to
74 ;; BASE at OFFSET. If HASH-TAB? is true, we also reserve space for a
75 ;; "hash table". Note: We use `next-free' instead of `last-used' as
76 ;; suggested by Bagwell.
77 (if hash-tab?
78 (vector (make-vector (* size 3) #f)
79 base offset size 0)
80 (vector (make-vector size)
81 base offset size 0)))
82
83 (define-syntax-rule (define-block-accessor name index)
84 (define-inlinable (name block)
85 (vector-ref block index)))
86
87 (define-block-accessor block-content 0)
88 (define-block-accessor block-base 1)
89 (define-block-accessor block-offset 2)
90 (define-block-accessor block-size 3)
91 (define-block-accessor block-next-free 4)
92
93 (define-inlinable (block-hash-table? block)
94 (< (block-size block) (vector-length (block-content block))))
95
96 (define-inlinable (set-block-next-free! block next-free)
97 (vector-set! block 4 next-free))
98
99 (define-inlinable (block-append! block value offset)
100 ;; This is not thread-safe. To fix it, see Section 2.8 of the paper.
101 (and (< offset (block-size block))
102 (= offset (block-next-free block))
103 (begin
104 (set-block-next-free! block (1+ offset))
105 (vector-set! (block-content block) offset value)
106 #t)))
107
108 ;; Return the item at slot OFFSET.
109 (define-inlinable (block-ref content offset)
110 (vector-ref content offset))
111
112 ;; Return the offset of the next item in the hash bucket, after the one
113 ;; at OFFSET.
114 (define-inlinable (block-hash-table-next-offset content size offset)
115 (vector-ref content (+ size size offset)))
116
117 ;; Save the offset of the next item in the hash bucket, after the one
118 ;; at OFFSET.
119 (define-inlinable (block-hash-table-set-next-offset! content size offset
120 next-offset)
121 (vector-set! content (+ size size offset) next-offset))
122
123 ;; Returns the index of the last entry stored in CONTENT with
124 ;; SIZE-modulo hash value KHASH.
125 (define-inlinable (block-hash-table-ref content size khash)
126 (vector-ref content (+ size khash)))
127
128 (define-inlinable (block-hash-table-set! content size khash offset)
129 (vector-set! content (+ size khash) offset))
130
131 ;; Add hash table information for the item recently added at OFFSET,
132 ;; with SIZE-modulo hash KHASH.
133 (define-inlinable (block-hash-table-add! content size khash offset)
134 (block-hash-table-set-next-offset! content size offset
135 (block-hash-table-ref content size khash))
136 (block-hash-table-set! content size khash offset))
137
138 (define block-null
139 ;; The null block.
140 (make-block #f 0 0 #f))
141
142 \f
143 ;;;
144 ;;; VLists.
145 ;;;
146
147 (define-record-type <vlist>
148 ;; A vlist is just a base+offset pair pointing to a block.
149
150 ;; XXX: Allocating a <vlist> record in addition to the block at each
151 ;; `vlist-cons' call is inefficient. However, Bagwell's hack to avoid it
152 ;; (Section 2.2) would require GC_ALL_INTERIOR_POINTERS, which would be a
153 ;; performance hit for everyone.
154 (make-vlist base offset)
155 vlist?
156 (base vlist-base)
157 (offset vlist-offset))
158
159 (set-record-type-printer! <vlist>
160 (lambda (vl port)
161 (cond ((vlist-null? vl)
162 (format port "#<vlist ()>"))
163 ((vhash? vl)
164 (format port "#<vhash ~x ~a pairs>"
165 (object-address vl)
166 (vlist-length vl)))
167 (else
168 (format port "#<vlist ~a>"
169 (vlist->list vl))))))
170
171
172 (define vlist-null
173 ;; The empty vlist.
174 (make-vlist block-null 0))
175
176 ;; Asserting that something is a vlist is actually a win if your next
177 ;; step is to call record accessors, because that causes CSE to
178 ;; eliminate the type checks in those accessors.
179 ;;
180 (define-inlinable (assert-vlist val)
181 (unless (vlist? val)
182 (throw 'wrong-type-arg
183 #f
184 "Not a vlist: ~S"
185 (list val)
186 (list val))))
187
188 (define-inlinable (block-cons item vlist hash-tab?)
189 (let ((base (vlist-base vlist))
190 (offset (1+ (vlist-offset vlist))))
191 (cond
192 ((block-append! base item offset)
193 ;; Fast path: We added the item directly to the block.
194 (make-vlist base offset))
195 (else
196 ;; Slow path: Allocate a new block.
197 (let* ((size (block-size base))
198 (base (make-block
199 base
200 (1- offset)
201 (cond
202 ((zero? size) 1)
203 ((< offset size) 1) ;; new vlist head
204 (else (* (fluid-ref block-growth-factor) size)))
205 hash-tab?)))
206 (set-block-next-free! base 1)
207 (vector-set! (block-content base) 0 item)
208 (make-vlist base 0))))))
209
210 (define (vlist-cons item vlist)
211 "Return a new vlist with @var{item} as its head and @var{vlist} as its
212 tail."
213 ;; Note: Although the result of `vlist-cons' on a vhash is a valid
214 ;; vlist, it is not a valid vhash. The new item does not get a hash
215 ;; table entry. If we allocate a new block, the new block will not
216 ;; have a hash table. Perhaps we can do something more sensible here,
217 ;; but this is a hot function, so there are performance impacts.
218 (assert-vlist vlist)
219 (block-cons item vlist #f))
220
221 (define (vlist-head vlist)
222 "Return the head of @var{vlist}."
223 (assert-vlist vlist)
224 (let ((base (vlist-base vlist))
225 (offset (vlist-offset vlist)))
226 (block-ref (block-content base) offset)))
227
228 (define (vlist-tail vlist)
229 "Return the tail of @var{vlist}."
230 (assert-vlist vlist)
231 (let ((base (vlist-base vlist))
232 (offset (vlist-offset vlist)))
233 (if (> offset 0)
234 (make-vlist base (- offset 1))
235 (make-vlist (block-base base)
236 (block-offset base)))))
237
238 (define (vlist-null? vlist)
239 "Return true if @var{vlist} is empty."
240 (assert-vlist vlist)
241 (let ((base (vlist-base vlist)))
242 (and (not (block-base base))
243 (= 0 (block-size base)))))
244
245 \f
246 ;;;
247 ;;; VList Utilities.
248 ;;;
249
250 (define (list->vlist lst)
251 "Return a new vlist whose contents correspond to @var{lst}."
252 (vlist-reverse (fold vlist-cons vlist-null lst)))
253
254 (define (vlist-fold proc init vlist)
255 "Fold over @var{vlist}, calling @var{proc} for each element."
256 ;; FIXME: Handle multiple lists.
257 (assert-vlist vlist)
258 (let loop ((base (vlist-base vlist))
259 (offset (vlist-offset vlist))
260 (result init))
261 (if (eq? base block-null)
262 result
263 (let* ((next (- offset 1))
264 (done? (< next 0)))
265 (loop (if done? (block-base base) base)
266 (if done? (block-offset base) next)
267 (proc (block-ref (block-content base) offset) result))))))
268
269 (define (vlist-fold-right proc init vlist)
270 "Fold over @var{vlist}, calling @var{proc} for each element, starting from
271 the last element."
272 (assert-vlist vlist)
273 (let loop ((index (1- (vlist-length vlist)))
274 (result init))
275 (if (< index 0)
276 result
277 (loop (1- index)
278 (proc (vlist-ref vlist index) result)))))
279
280 (define (vlist-reverse vlist)
281 "Return a new @var{vlist} whose content are those of @var{vlist} in reverse
282 order."
283 (vlist-fold vlist-cons vlist-null vlist))
284
285 (define (vlist-map proc vlist)
286 "Map @var{proc} over the elements of @var{vlist} and return a new vlist."
287 (vlist-fold (lambda (item result)
288 (vlist-cons (proc item) result))
289 vlist-null
290 (vlist-reverse vlist)))
291
292 (define (vlist->list vlist)
293 "Return a new list whose contents match those of @var{vlist}."
294 (vlist-fold-right cons '() vlist))
295
296 (define (vlist-ref vlist index)
297 "Return the element at index @var{index} in @var{vlist}."
298 (assert-vlist vlist)
299 (let loop ((index index)
300 (base (vlist-base vlist))
301 (offset (vlist-offset vlist)))
302 (if (<= index offset)
303 (block-ref (block-content base) (- offset index))
304 (loop (- index offset 1)
305 (block-base base)
306 (block-offset base)))))
307
308 (define (vlist-drop vlist count)
309 "Return a new vlist that does not contain the @var{count} first elements of
310 @var{vlist}."
311 (assert-vlist vlist)
312 (let loop ((count count)
313 (base (vlist-base vlist))
314 (offset (vlist-offset vlist)))
315 (if (<= count offset)
316 (make-vlist base (- offset count))
317 (loop (- count offset 1)
318 (block-base base)
319 (block-offset base)))))
320
321 (define (vlist-take vlist count)
322 "Return a new vlist that contains only the @var{count} first elements of
323 @var{vlist}."
324 (let loop ((count count)
325 (vlist vlist)
326 (result vlist-null))
327 (if (= 0 count)
328 (vlist-reverse result)
329 (loop (- count 1)
330 (vlist-tail vlist)
331 (vlist-cons (vlist-head vlist) result)))))
332
333 (define (vlist-filter pred vlist)
334 "Return a new vlist containing all the elements from @var{vlist} that
335 satisfy @var{pred}."
336 (vlist-fold-right (lambda (e v)
337 (if (pred e)
338 (vlist-cons e v)
339 v))
340 vlist-null
341 vlist))
342
343 (define* (vlist-delete x vlist #:optional (equal? equal?))
344 "Return a new vlist corresponding to @var{vlist} without the elements
345 @var{equal?} to @var{x}."
346 (vlist-filter (lambda (e)
347 (not (equal? e x)))
348 vlist))
349
350 (define (vlist-length vlist)
351 "Return the length of @var{vlist}."
352 (assert-vlist vlist)
353 (let loop ((base (vlist-base vlist))
354 (len (vlist-offset vlist)))
355 (if (eq? base block-null)
356 len
357 (loop (block-base base)
358 (+ len 1 (block-offset base))))))
359
360 (define* (vlist-unfold p f g seed
361 #:optional (tail-gen (lambda (x) vlist-null)))
362 "Return a new vlist. See the description of SRFI-1 `unfold' for details."
363 (let uf ((seed seed))
364 (if (p seed)
365 (tail-gen seed)
366 (vlist-cons (f seed)
367 (uf (g seed))))))
368
369 (define* (vlist-unfold-right p f g seed #:optional (tail vlist-null))
370 "Return a new vlist. See the description of SRFI-1 `unfold-right' for
371 details."
372 (let uf ((seed seed) (lis tail))
373 (if (p seed)
374 lis
375 (uf (g seed) (vlist-cons (f seed) lis)))))
376
377 (define (vlist-append . vlists)
378 "Append the given lists."
379 (if (null? vlists)
380 vlist-null
381 (fold-right (lambda (vlist result)
382 (vlist-fold-right (lambda (e v)
383 (vlist-cons e v))
384 result
385 vlist))
386 vlist-null
387 vlists)))
388
389 (define (vlist-for-each proc vlist)
390 "Call @var{proc} on each element of @var{vlist}. The result is unspecified."
391 (vlist-fold (lambda (item x)
392 (proc item))
393 (if #f #f)
394 vlist))
395
396 \f
397 ;;;
398 ;;; Hash Lists, aka. `VHash'.
399 ;;;
400
401 ;; Assume keys K1 and K2, H = hash(K1) = hash(K2), and two values V1 and V2
402 ;; associated with K1 and K2, respectively. The resulting layout is a
403 ;; follows:
404 ;;
405 ;; ,--------------------.
406 ;; 0| ,-> (K1 . V1) | Vlist array
407 ;; 1| | |
408 ;; 2| | (K2 . V2) |
409 ;; 3| | |
410 ;; size +-|------------------+
411 ;; 0| | | Hash table
412 ;; 1| | |
413 ;; 2| +-- O <------------- H
414 ;; 3| | |
415 ;; size * 2 +-|------------------+
416 ;; 0| `-> 2 | Chain links
417 ;; 1| |
418 ;; 2| #f |
419 ;; 3| |
420 ;; size * 3 `--------------------'
421 ;;
422 ;; The backing store for the vhash is partitioned into three areas: the
423 ;; vlist part, the hash table part, and the chain links part. In this
424 ;; example we have a hash H which, when indexed into the hash table
425 ;; part, indicates that a value with this hash can be found at offset 0
426 ;; in the vlist part. The corresponding index (in this case, 0) of the
427 ;; chain links array holds the index of the next element in this block
428 ;; with this hash value, or #f if we reached the end of the chain.
429 ;;
430 ;; This API potentially requires users to repeat which hash function and
431 ;; which equality predicate to use. This can lead to unpredictable
432 ;; results if they are used in consistenly, e.g., between `vhash-cons'
433 ;; and `vhash-assoc', which is undesirable, as argued in
434 ;; http://savannah.gnu.org/bugs/?22159 . OTOH, two arguments can be
435 ;; made in favor of this API:
436 ;;
437 ;; - It's consistent with how alists are handled in SRFI-1.
438 ;;
439 ;; - In practice, users will probably consistenly use either the `q',
440 ;; the `v', or the plain variant (`vlist-cons' and `vlist-assoc'
441 ;; without any optional argument), i.e., they will rarely explicitly
442 ;; pass a hash function or equality predicate.
443
444 (define (vhash? obj)
445 "Return true if @var{obj} is a hash list."
446 (and (vlist? obj)
447 (block-hash-table? (vlist-base obj))))
448
449 (define* (vhash-cons key value vhash #:optional (hash hash))
450 "Return a new hash list based on @var{vhash} where @var{key} is associated
451 with @var{value}. Use @var{hash} to compute @var{key}'s hash."
452 (assert-vlist vhash)
453 ;; We should also assert that it is a hash table. Need to check the
454 ;; performance impacts of that. Also, vlist-null is a valid hash
455 ;; table, which does not pass vhash?. A bug, perhaps.
456 (let* ((vhash (block-cons (cons key value) vhash #t))
457 (base (vlist-base vhash))
458 (offset (vlist-offset vhash))
459 (size (block-size base))
460 (khash (hash key size))
461 (content (block-content base)))
462 (block-hash-table-add! content size khash offset)
463 vhash))
464
465 (define vhash-consq (cut vhash-cons <> <> <> hashq))
466 (define vhash-consv (cut vhash-cons <> <> <> hashv))
467
468 (define-inlinable (%vhash-fold* proc init key vhash equal? hash)
469 ;; Fold over all the values associated with KEY in VHASH.
470 (define (visit-block base max-offset result)
471 (let* ((size (block-size base))
472 (content (block-content base))
473 (khash (hash key size)))
474 (let loop ((offset (block-hash-table-ref content size khash))
475 (result result))
476 (if offset
477 (loop (block-hash-table-next-offset content size offset)
478 (if (and (<= offset max-offset)
479 (equal? key (car (block-ref content offset))))
480 (proc (cdr (block-ref content offset)) result)
481 result))
482 (let ((next-block (block-base base)))
483 (if (> (block-size next-block) 0)
484 (visit-block next-block (block-offset base) result)
485 result))))))
486
487 (assert-vlist vhash)
488 (if (> (block-size (vlist-base vhash)) 0)
489 (visit-block (vlist-base vhash)
490 (vlist-offset vhash)
491 init)
492 init))
493
494 (define* (vhash-fold* proc init key vhash
495 #:optional (equal? equal?) (hash hash))
496 "Fold over all the values associated with @var{key} in @var{vhash}, with each
497 call to @var{proc} having the form @code{(proc value result)}, where
498 @var{result} is the result of the previous call to @var{proc} and @var{init} the
499 value of @var{result} for the first call to @var{proc}."
500 (%vhash-fold* proc init key vhash equal? hash))
501
502 (define (vhash-foldq* proc init key vhash)
503 "Same as @code{vhash-fold*}, but using @code{hashq} and @code{eq?}."
504 (%vhash-fold* proc init key vhash eq? hashq))
505
506 (define (vhash-foldv* proc init key vhash)
507 "Same as @code{vhash-fold*}, but using @code{hashv} and @code{eqv?}."
508 (%vhash-fold* proc init key vhash eqv? hashv))
509
510 (define-inlinable (%vhash-assoc key vhash equal? hash)
511 ;; A specialization of `vhash-fold*' that stops when the first value
512 ;; associated with KEY is found or when the end-of-list is reached. Inline to
513 ;; make sure `vhash-assq' gets to use the `eq?' instruction instead of calling
514 ;; the `eq?' subr.
515 (define (visit-block base max-offset)
516 (let* ((size (block-size base))
517 (content (block-content base))
518 (khash (hash key size)))
519 (let loop ((offset (block-hash-table-ref content size khash)))
520 (if offset
521 (if (and (<= offset max-offset)
522 (equal? key (car (block-ref content offset))))
523 (block-ref content offset)
524 (loop (block-hash-table-next-offset content size offset)))
525 (let ((next-block (block-base base)))
526 (and (> (block-size next-block) 0)
527 (visit-block next-block (block-offset base))))))))
528
529 (assert-vlist vhash)
530 (and (> (block-size (vlist-base vhash)) 0)
531 (visit-block (vlist-base vhash)
532 (vlist-offset vhash))))
533
534 (define* (vhash-assoc key vhash #:optional (equal? equal?) (hash hash))
535 "Return the first key/value pair from @var{vhash} whose key is equal to
536 @var{key} according to the @var{equal?} equality predicate."
537 (%vhash-assoc key vhash equal? hash))
538
539 (define (vhash-assq key vhash)
540 "Return the first key/value pair from @var{vhash} whose key is @code{eq?} to
541 @var{key}."
542 (%vhash-assoc key vhash eq? hashq))
543
544 (define (vhash-assv key vhash)
545 "Return the first key/value pair from @var{vhash} whose key is @code{eqv?} to
546 @var{key}."
547 (%vhash-assoc key vhash eqv? hashv))
548
549 (define* (vhash-delete key vhash #:optional (equal? equal?) (hash hash))
550 "Remove all associations from @var{vhash} with @var{key}, comparing keys
551 with @var{equal?}."
552 (if (vhash-assoc key vhash equal? hash)
553 (vlist-fold (lambda (k+v result)
554 (let ((k (car k+v))
555 (v (cdr k+v)))
556 (if (equal? k key)
557 result
558 (vhash-cons k v result hash))))
559 vlist-null
560 vhash)
561 vhash))
562
563 (define vhash-delq (cut vhash-delete <> <> eq? hashq))
564 (define vhash-delv (cut vhash-delete <> <> eqv? hashv))
565
566 (define (vhash-fold proc init vhash)
567 "Fold over the key/pair elements of @var{vhash} from left to right, with
568 each call to @var{proc} having the form @code{(@var{proc} key value result)},
569 where @var{result} is the result of the previous call to @var{proc} and
570 @var{init} the value of @var{result} for the first call to @var{proc}."
571 (vlist-fold (lambda (key+value result)
572 (proc (car key+value) (cdr key+value)
573 result))
574 init
575 vhash))
576
577 (define (vhash-fold-right proc init vhash)
578 "Fold over the key/pair elements of @var{vhash} from right to left, with
579 each call to @var{proc} having the form @code{(@var{proc} key value result)},
580 where @var{result} is the result of the previous call to @var{proc} and
581 @var{init} the value of @var{result} for the first call to @var{proc}."
582 (vlist-fold-right (lambda (key+value result)
583 (proc (car key+value) (cdr key+value)
584 result))
585 init
586 vhash))
587
588 (define* (alist->vhash alist #:optional (hash hash))
589 "Return the vhash corresponding to @var{alist}, an association list."
590 (fold-right (lambda (pair result)
591 (vhash-cons (car pair) (cdr pair) result hash))
592 vlist-null
593 alist))
594
595 ;;; vlist.scm ends here