1 /* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "libguile/_scm.h"
28 #include "libguile/hash.h"
29 #include "libguile/eval.h"
30 #include "libguile/ports.h"
31 #include "libguile/bdw-gc.h"
33 #include "libguile/validate.h"
34 #include "libguile/weak-set.h"
39 This file implements weak sets. One example of a weak set is the
40 symbol table, where you want all instances of the `foo' symbol to map
41 to one object. So when you load a file and it wants a symbol with
42 the characters "foo", you one up in the table, using custom hash and
43 equality predicates. Only if one is not found will you bother to
44 cons one up and intern it.
46 Another use case for weak sets is the set of open ports. Guile needs
47 to be able to flush them all when the process exits, but the set
48 shouldn't prevent the GC from collecting the port (and thus closing
51 Weak sets are implemented using an open-addressed hash table.
52 Basically this means that there is an array of entries, and the item
53 is expected to be found the slot corresponding to its hash code,
54 modulo the length of the array.
56 Collisions are handled using linear probing with the Robin Hood
57 technique. See Pedro Celis' paper, "Robin Hood Hashing":
59 http://www.cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
61 The vector of entries is allocated as an "atomic" piece of memory, so
62 that the GC doesn't trace it. When an item is added to the set, a
63 disappearing link is registered to its location. If the item is
64 collected, then that link will be zeroed out.
66 An entry is not just an item, though; the hash code is also stored in
67 the entry. We munge hash codes so that they are never 0. In this
68 way we can detect removed entries (key of zero but nonzero hash
69 code), and can then reshuffle elements as needed to maintain the
72 Compared to buckets-and-chains hash tables, open addressing has the
73 advantage that it is very cache-friendly. It also uses less memory.
75 Implementation-wise, there are two things to note.
77 1. We assume that hash codes are evenly distributed across the
78 range of unsigned longs. The actual hash code stored in the
79 entry is left-shifted by 1 bit (losing 1 bit of hash precision),
80 and then or'd with 1. In this way we ensure that the hash field
81 of an occupied entry is nonzero. To map to an index, we
82 right-shift the hash by one, divide by the size, and take the
85 2. Since the "keys" (the objects in the set) are stored in an
86 atomic region with disappearing links, they need to be accessed
87 with the GC alloc lock. `copy_weak_entry' will do that for
88 you. The hash code itself can be read outside the lock,
99 struct weak_entry_data
{
100 scm_t_weak_entry
*in
;
101 scm_t_weak_entry
*out
;
105 do_copy_weak_entry (void *data
)
107 struct weak_entry_data
*e
= data
;
109 e
->out
->hash
= e
->in
->hash
;
110 e
->out
->key
= e
->in
->key
;
116 copy_weak_entry (scm_t_weak_entry
*src
, scm_t_weak_entry
*dst
)
118 struct weak_entry_data data
;
123 GC_call_with_alloc_lock (do_copy_weak_entry
, &data
);
128 scm_t_weak_entry
*entries
; /* the data */
129 scm_i_pthread_mutex_t lock
; /* the lock */
130 unsigned long size
; /* total number of slots. */
131 unsigned long n_items
; /* number of items in set */
132 unsigned long lower
; /* when to shrink */
133 unsigned long upper
; /* when to grow */
134 int size_index
; /* index into hashset_size */
135 int min_size_index
; /* minimum size_index */
139 #define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
140 #define SCM_VALIDATE_WEAK_SET(pos, arg) \
141 SCM_MAKE_VALIDATE_MSG (pos, arg, WEAK_SET_P, "weak-set")
142 #define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD_1 (x))
146 hash_to_index (unsigned long hash
, unsigned long size
)
148 return (hash
>> 1) % size
;
152 entry_distance (unsigned long hash
, unsigned long k
, unsigned long size
)
154 unsigned long origin
= hash_to_index (hash
, size
);
159 /* The other key was displaced and wrapped around. */
160 return size
- origin
+ k
;
164 move_weak_entry (scm_t_weak_entry
*from
, scm_t_weak_entry
*to
)
168 scm_t_weak_entry copy
;
170 copy_weak_entry (from
, ©
);
171 to
->hash
= copy
.hash
;
174 if (copy
.key
&& SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
176 #ifdef HAVE_GC_MOVE_DISAPPEARING_LINK
177 GC_move_disappearing_link ((void **) &from
->key
, (void **) &to
->key
);
179 GC_unregister_disappearing_link ((void **) &from
->key
);
180 SCM_I_REGISTER_DISAPPEARING_LINK ((void **) &to
->key
,
193 rob_from_rich (scm_t_weak_set
*set
, unsigned long k
)
195 unsigned long empty
, size
;
199 /* If we are to free up slot K in the set, we need room to do so. */
200 assert (set
->n_items
< size
);
204 empty
= (empty
+ 1) % size
;
205 /* Here we access key outside the lock. Is this a problem? At first
206 glance, I wouldn't think so. */
207 while (set
->entries
[empty
].key
);
211 unsigned long last
= empty
? (empty
- 1) : (size
- 1);
212 move_weak_entry (&set
->entries
[last
], &set
->entries
[empty
]);
217 /* Just for sanity. */
218 set
->entries
[empty
].hash
= 0;
219 set
->entries
[empty
].key
= 0;
223 give_to_poor (scm_t_weak_set
*set
, unsigned long k
)
225 /* Slot K was just freed up; possibly shuffle others down. */
226 unsigned long size
= set
->size
;
230 unsigned long next
= (k
+ 1) % size
;
232 scm_t_weak_entry copy
;
234 hash
= set
->entries
[next
].hash
;
236 if (!hash
|| hash_to_index (hash
, size
) == next
)
239 copy_weak_entry (&set
->entries
[next
], ©
);
242 /* Lost weak reference. */
244 give_to_poor (set
, next
);
249 move_weak_entry (&set
->entries
[next
], &set
->entries
[k
]);
254 /* We have shuffled down any entries that should be shuffled down; now
256 set
->entries
[k
].hash
= 0;
257 set
->entries
[k
].key
= 0;
263 /* Growing or shrinking is triggered when the load factor
265 * L = N / S (N: number of items in set, S: bucket vector length)
267 * passes an upper limit of 0.9 or a lower limit of 0.2.
269 * The implementation stores the upper and lower number of items which
270 * trigger a resize in the hashset object.
272 * Possible hash set sizes (primes) are stored in the array
276 static unsigned long hashset_size
[] = {
277 31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
278 224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
279 57524111, 115048217, 230096423
282 #define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
285 compute_size_index (scm_t_weak_set
*set
)
287 int i
= set
->size_index
;
289 if (set
->n_items
< set
->lower
)
291 /* rehashing is not triggered when i <= min_size */
294 while (i
> set
->min_size_index
295 && set
->n_items
< hashset_size
[i
] / 5);
297 else if (set
->n_items
> set
->upper
)
300 if (i
>= HASHSET_SIZE_N
)
301 /* The biggest size currently is 230096423, which for a 32-bit
302 machine will occupy 1.5GB of memory at a load of 80%. There
303 is probably something better to do here, but if you have a
304 weak map of that size, you are hosed in any case. */
312 is_acceptable_size_index (scm_t_weak_set
*set
, int size_index
)
314 int computed
= compute_size_index (set
);
316 if (size_index
== computed
)
317 /* We were going to grow or shrink, and allocating the new vector
318 didn't change the target size. */
321 if (size_index
== computed
+ 1)
323 /* We were going to enlarge the set, but allocating the new
324 vector finalized some objects, making an enlargement
325 unnecessary. It might still be a good idea to use the larger
326 set, though. (This branch also gets hit if, while allocating
327 the vector, some other thread was actively removing items from
328 the set. That is less likely, though.) */
329 unsigned long new_lower
= hashset_size
[size_index
] / 5;
331 return set
->size
> new_lower
;
334 if (size_index
== computed
- 1)
336 /* We were going to shrink the set, but when we dropped the lock
337 to allocate the new vector, some other thread added elements to
342 /* The computed size differs from our newly allocated size by more
343 than one size index -- recalculate. */
348 resize_set (scm_t_weak_set
*set
)
350 scm_t_weak_entry
*old_entries
, *new_entries
;
352 unsigned long old_size
, new_size
, old_k
;
356 new_size_index
= compute_size_index (set
);
357 if (new_size_index
== set
->size_index
)
359 new_size
= hashset_size
[new_size_index
];
360 scm_i_pthread_mutex_unlock (&set
->lock
);
361 /* Allocating memory might cause finalizers to run, which could
362 run anything, so drop our lock to avoid deadlocks. */
363 new_entries
= scm_gc_malloc_pointerless (new_size
* sizeof(scm_t_weak_entry
),
365 scm_i_pthread_mutex_lock (&set
->lock
);
367 while (!is_acceptable_size_index (set
, new_size_index
));
369 old_entries
= set
->entries
;
370 old_size
= set
->size
;
372 memset (new_entries
, 0, new_size
* sizeof(scm_t_weak_entry
));
374 set
->size_index
= new_size_index
;
375 set
->size
= new_size
;
376 if (new_size_index
<= set
->min_size_index
)
379 set
->lower
= new_size
/ 5;
380 set
->upper
= 9 * new_size
/ 10;
382 set
->entries
= new_entries
;
384 for (old_k
= 0; old_k
< old_size
; old_k
++)
386 scm_t_weak_entry copy
;
387 unsigned long new_k
, distance
;
389 if (!old_entries
[old_k
].hash
)
392 copy_weak_entry (&old_entries
[old_k
], ©
);
397 new_k
= hash_to_index (copy
.hash
, new_size
);
399 for (distance
= 0; ; distance
++, new_k
= (new_k
+ 1) % new_size
)
401 unsigned long other_hash
= new_entries
[new_k
].hash
;
404 /* Found an empty entry. */
407 /* Displace the entry if our distance is less, otherwise keep
409 if (entry_distance (other_hash
, new_k
, new_size
) < distance
)
411 rob_from_rich (set
, new_k
);
417 new_entries
[new_k
].hash
= copy
.hash
;
418 new_entries
[new_k
].key
= copy
.key
;
420 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
421 SCM_I_REGISTER_DISAPPEARING_LINK ((void **) &new_entries
[new_k
].key
,
422 (void *) new_entries
[new_k
].key
);
426 /* Run after GC via do_vacuum_weak_set, this function runs over the
427 whole table, removing lost weak references, reshuffling the set as it
428 goes. It might resize the set if it reaps enough entries. */
430 vacuum_weak_set (scm_t_weak_set
*set
)
432 scm_t_weak_entry
*entries
= set
->entries
;
433 unsigned long size
= set
->size
;
436 for (k
= 0; k
< size
; k
++)
438 unsigned long hash
= entries
[k
].hash
;
442 scm_t_weak_entry copy
;
444 copy_weak_entry (&entries
[k
], ©
);
447 /* Lost weak reference; reshuffle. */
449 give_to_poor (set
, k
);
455 if (set
->n_items
< set
->lower
)
463 weak_set_lookup (scm_t_weak_set
*set
, unsigned long hash
,
464 scm_t_set_predicate_fn pred
, void *closure
,
467 unsigned long k
, distance
, size
;
468 scm_t_weak_entry
*entries
;
471 entries
= set
->entries
;
473 hash
= (hash
<< 1) | 0x1;
474 k
= hash_to_index (hash
, size
);
476 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
478 unsigned long other_hash
;
481 other_hash
= entries
[k
].hash
;
487 if (hash
== other_hash
)
489 scm_t_weak_entry copy
;
491 copy_weak_entry (&entries
[k
], ©
);
494 /* Lost weak reference; reshuffle. */
496 give_to_poor (set
, k
);
501 if (pred (SCM_PACK (copy
.key
), closure
))
503 return SCM_PACK (copy
.key
);
506 /* If the entry's distance is less, our key is not in the set. */
507 if (entry_distance (other_hash
, k
, size
) < distance
)
511 /* If we got here, then we were unfortunate enough to loop through the
512 whole set. Shouldn't happen, but hey. */
518 weak_set_add_x (scm_t_weak_set
*set
, unsigned long hash
,
519 scm_t_set_predicate_fn pred
, void *closure
,
522 unsigned long k
, distance
, size
;
523 scm_t_weak_entry
*entries
;
526 entries
= set
->entries
;
528 hash
= (hash
<< 1) | 0x1;
529 k
= hash_to_index (hash
, size
);
531 for (distance
= 0; ; distance
++, k
= (k
+ 1) % size
)
533 unsigned long other_hash
;
536 other_hash
= entries
[k
].hash
;
539 /* Found an empty entry. */
542 if (other_hash
== hash
)
544 scm_t_weak_entry copy
;
546 copy_weak_entry (&entries
[k
], ©
);
549 /* Lost weak reference; reshuffle. */
551 give_to_poor (set
, k
);
556 if (pred (SCM_PACK (copy
.key
), closure
))
557 /* Found an entry with this key. */
558 return SCM_PACK (copy
.key
);
561 if (set
->n_items
> set
->upper
)
562 /* Full set, time to resize. */
565 return weak_set_add_x (set
, hash
>> 1, pred
, closure
, obj
);
568 /* Displace the entry if our distance is less, otherwise keep
570 if (entry_distance (other_hash
, k
, size
) < distance
)
572 rob_from_rich (set
, k
);
578 entries
[k
].hash
= hash
;
579 entries
[k
].key
= SCM_UNPACK (obj
);
581 if (SCM_HEAP_OBJECT_P (obj
))
582 SCM_I_REGISTER_DISAPPEARING_LINK ((void **) &entries
[k
].key
,
583 (void *) SCM2PTR (obj
));
590 weak_set_remove_x (scm_t_weak_set
*set
, unsigned long hash
,
591 scm_t_set_predicate_fn pred
, void *closure
)
593 unsigned long k
, distance
, size
;
594 scm_t_weak_entry
*entries
;
597 entries
= set
->entries
;
599 hash
= (hash
<< 1) | 0x1;
600 k
= hash_to_index (hash
, size
);
602 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
604 unsigned long other_hash
;
607 other_hash
= entries
[k
].hash
;
613 if (other_hash
== hash
)
615 scm_t_weak_entry copy
;
617 copy_weak_entry (&entries
[k
], ©
);
620 /* Lost weak reference; reshuffle. */
622 give_to_poor (set
, k
);
627 if (pred (SCM_PACK (copy
.key
), closure
))
628 /* Found an entry with this key. */
633 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
634 GC_unregister_disappearing_link ((void **) &entries
[k
].key
);
636 if (--set
->n_items
< set
->lower
)
639 give_to_poor (set
, k
);
645 /* If the entry's distance is less, our key is not in the set. */
646 if (entry_distance (other_hash
, k
, size
) < distance
)
654 make_weak_set (unsigned long k
)
658 int i
= 0, n
= k
? k
: 31;
659 while (i
+ 1 < HASHSET_SIZE_N
&& n
> hashset_size
[i
])
663 set
= scm_gc_malloc (sizeof (*set
), "weak-set");
664 set
->entries
= scm_gc_malloc_pointerless (n
* sizeof(scm_t_weak_entry
),
666 memset (set
->entries
, 0, n
* sizeof(scm_t_weak_entry
));
670 set
->upper
= 9 * n
/ 10;
672 set
->min_size_index
= i
;
673 scm_i_pthread_mutex_init (&set
->lock
, NULL
);
675 return scm_cell (scm_tc7_weak_set
, (scm_t_bits
)set
);
679 scm_i_weak_set_print (SCM exp
, SCM port
, scm_print_state
*pstate
)
681 scm_puts_unlocked ("#<", port
);
682 scm_puts_unlocked ("weak-set ", port
);
683 scm_uintprint (SCM_WEAK_SET (exp
)->n_items
, 10, port
);
684 scm_putc_unlocked ('/', port
);
685 scm_uintprint (SCM_WEAK_SET (exp
)->size
, 10, port
);
686 scm_puts_unlocked (">", port
);
690 do_vacuum_weak_set (SCM set
)
694 s
= SCM_WEAK_SET (set
);
696 if (scm_i_pthread_mutex_trylock (&s
->lock
) == 0)
699 scm_i_pthread_mutex_unlock (&s
->lock
);
705 /* The before-gc C hook only runs if GC_set_start_callback is available,
706 so if not, fall back on a finalizer-based implementation. */
708 weak_gc_callback (void **weak
)
711 void (*callback
) (SCM
) = weak
[1];
716 callback (SCM_PACK_POINTER (val
));
721 #ifdef HAVE_GC_SET_START_CALLBACK
723 weak_gc_hook (void *hook_data
, void *fn_data
, void *data
)
725 if (!weak_gc_callback (fn_data
))
726 scm_c_hook_remove (&scm_before_gc_c_hook
, weak_gc_hook
, fn_data
);
732 weak_gc_finalizer (void *ptr
, void *data
)
734 if (weak_gc_callback (ptr
))
735 scm_i_set_finalizer (ptr
, weak_gc_finalizer
, data
);
740 scm_c_register_weak_gc_callback (SCM obj
, void (*callback
) (SCM
))
742 void **weak
= GC_MALLOC_ATOMIC (sizeof (void*) * 2);
744 weak
[0] = SCM_UNPACK_POINTER (obj
);
745 weak
[1] = (void*)callback
;
746 GC_GENERAL_REGISTER_DISAPPEARING_LINK (weak
, SCM2PTR (obj
));
748 #ifdef HAVE_GC_SET_START_CALLBACK
749 scm_c_hook_add (&scm_after_gc_c_hook
, weak_gc_hook
, weak
, 0);
751 scm_i_set_finalizer (weak
, weak_gc_finalizer
, NULL
);
756 scm_c_make_weak_set (unsigned long k
)
760 ret
= make_weak_set (k
);
762 scm_c_register_weak_gc_callback (ret
, do_vacuum_weak_set
);
768 scm_weak_set_p (SCM obj
)
770 return scm_from_bool (SCM_WEAK_SET_P (obj
));
774 scm_weak_set_clear_x (SCM set
)
776 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
778 scm_i_pthread_mutex_lock (&s
->lock
);
780 memset (s
->entries
, 0, sizeof (scm_t_weak_entry
) * s
->size
);
783 scm_i_pthread_mutex_unlock (&s
->lock
);
785 return SCM_UNSPECIFIED
;
789 scm_c_weak_set_lookup (SCM set
, unsigned long raw_hash
,
790 scm_t_set_predicate_fn pred
,
791 void *closure
, SCM dflt
)
794 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
796 scm_i_pthread_mutex_lock (&s
->lock
);
798 ret
= weak_set_lookup (s
, raw_hash
, pred
, closure
, dflt
);
800 scm_i_pthread_mutex_unlock (&s
->lock
);
806 scm_c_weak_set_add_x (SCM set
, unsigned long raw_hash
,
807 scm_t_set_predicate_fn pred
,
808 void *closure
, SCM obj
)
811 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
813 scm_i_pthread_mutex_lock (&s
->lock
);
815 ret
= weak_set_add_x (s
, raw_hash
, pred
, closure
, obj
);
817 scm_i_pthread_mutex_unlock (&s
->lock
);
823 scm_c_weak_set_remove_x (SCM set
, unsigned long raw_hash
,
824 scm_t_set_predicate_fn pred
,
827 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
829 scm_i_pthread_mutex_lock (&s
->lock
);
831 weak_set_remove_x (s
, raw_hash
, pred
, closure
);
833 scm_i_pthread_mutex_unlock (&s
->lock
);
837 eq_predicate (SCM x
, void *closure
)
839 return scm_is_eq (x
, SCM_PACK_POINTER (closure
));
843 scm_weak_set_add_x (SCM set
, SCM obj
)
845 return scm_c_weak_set_add_x (set
, scm_ihashq (obj
, -1),
846 eq_predicate
, SCM_UNPACK_POINTER (obj
), obj
);
850 scm_weak_set_remove_x (SCM set
, SCM obj
)
852 scm_c_weak_set_remove_x (set
, scm_ihashq (obj
, -1),
853 eq_predicate
, SCM_UNPACK_POINTER (obj
));
855 return SCM_UNSPECIFIED
;
859 scm_c_weak_set_fold (scm_t_set_fold_fn proc
, void *closure
,
863 scm_t_weak_entry
*entries
;
864 unsigned long k
, size
;
866 s
= SCM_WEAK_SET (set
);
868 scm_i_pthread_mutex_lock (&s
->lock
);
871 entries
= s
->entries
;
873 for (k
= 0; k
< size
; k
++)
877 scm_t_weak_entry copy
;
879 copy_weak_entry (&entries
[k
], ©
);
883 /* Release set lock while we call the function. */
884 scm_i_pthread_mutex_unlock (&s
->lock
);
885 init
= proc (closure
, SCM_PACK (copy
.key
), init
);
886 scm_i_pthread_mutex_lock (&s
->lock
);
891 scm_i_pthread_mutex_unlock (&s
->lock
);
897 fold_trampoline (void *closure
, SCM item
, SCM init
)
899 return scm_call_2 (SCM_PACK_POINTER (closure
), item
, init
);
903 scm_weak_set_fold (SCM proc
, SCM init
, SCM set
)
905 return scm_c_weak_set_fold (fold_trampoline
, SCM_UNPACK_POINTER (proc
), init
, set
);
909 for_each_trampoline (void *closure
, SCM item
, SCM seed
)
911 scm_call_1 (SCM_PACK_POINTER (closure
), item
);
916 scm_weak_set_for_each (SCM proc
, SCM set
)
918 scm_c_weak_set_fold (for_each_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_BOOL_F
, set
);
920 return SCM_UNSPECIFIED
;
924 map_trampoline (void *closure
, SCM item
, SCM seed
)
926 return scm_cons (scm_call_1 (SCM_PACK_POINTER (closure
), item
), seed
);
930 scm_weak_set_map_to_list (SCM proc
, SCM set
)
932 return scm_c_weak_set_fold (map_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_EOL
, set
);
939 #include "libguile/weak-set.x"