1 /* Copyright (C) 2011 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "libguile/_scm.h"
28 #include "libguile/hash.h"
29 #include "libguile/eval.h"
30 #include "libguile/ports.h"
31 #include "libguile/bdw-gc.h"
33 #include "libguile/validate.h"
34 #include "libguile/weak-set.h"
39 This file implements weak sets. One example of a weak set is the
40 symbol table, where you want all instances of the `foo' symbol to map
41 to one object. So when you load a file and it wants a symbol with
42 the characters "foo", you one up in the table, using custom hash and
43 equality predicates. Only if one is not found will you bother to
44 cons one up and intern it.
46 Another use case for weak sets is the set of open ports. Guile needs
47 to be able to flush them all when the process exits, but the set
48 shouldn't prevent the GC from collecting the port (and thus closing
51 Weak sets are implemented using an open-addressed hash table.
52 Basically this means that there is an array of entries, and the item
53 is expected to be found the slot corresponding to its hash code,
54 modulo the length of the array.
56 Collisions are handled using linear probing with the Robin Hood
57 technique. See Pedro Celis' paper, "Robin Hood Hashing":
59 http://www.cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
61 The vector of entries is allocated as an "atomic" piece of memory, so
62 that the GC doesn't trace it. When an item is added to the set, a
63 disappearing link is registered to its location. If the item is
64 collected, then that link will be zeroed out.
66 An entry is not just an item, though; the hash code is also stored in
67 the entry. We munge hash codes so that they are never 0. In this
68 way we can detect removed entries (key of zero but nonzero hash
69 code), and can then reshuffle elements as needed to maintain the
72 Compared to buckets-and-chains hash tables, open addressing has the
73 advantage that it is very cache-friendly. It also uses less memory.
75 Implementation-wise, there are two things to note.
77 1. We assume that hash codes are evenly distributed across the
78 range of unsigned longs. The actual hash code stored in the
79 entry is left-shifted by 1 bit (losing 1 bit of hash precision),
80 and then or'd with 1. In this way we ensure that the hash field
81 of an occupied entry is nonzero. To map to an index, we
82 right-shift the hash by one, divide by the size, and take the
85 2. Since the "keys" (the objects in the set) are stored in an
86 atomic region with disappearing links, they need to be accessed
87 with the GC alloc lock. `copy_weak_entry' will do that for
88 you. The hash code itself can be read outside the lock,
99 struct weak_entry_data
{
100 scm_t_weak_entry
*in
;
101 scm_t_weak_entry
*out
;
105 do_copy_weak_entry (void *data
)
107 struct weak_entry_data
*e
= data
;
109 e
->out
->hash
= e
->in
->hash
;
110 e
->out
->key
= e
->in
->key
;
116 copy_weak_entry (scm_t_weak_entry
*src
, scm_t_weak_entry
*dst
)
118 struct weak_entry_data data
;
123 GC_call_with_alloc_lock (do_copy_weak_entry
, &data
);
128 scm_t_weak_entry
*entries
; /* the data */
129 scm_i_pthread_mutex_t lock
; /* the lock */
130 unsigned long size
; /* total number of slots. */
131 unsigned long n_items
; /* number of items in set */
132 unsigned long lower
; /* when to shrink */
133 unsigned long upper
; /* when to grow */
134 int size_index
; /* index into hashset_size */
135 int min_size_index
; /* minimum size_index */
139 #define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
140 #define SCM_VALIDATE_WEAK_SET(pos, arg) \
141 SCM_MAKE_VALIDATE_MSG (pos, arg, WEAK_SET_P, "weak-set")
142 #define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD_1 (x))
146 hash_to_index (unsigned long hash
, unsigned long size
)
148 return (hash
>> 1) % size
;
152 entry_distance (unsigned long hash
, unsigned long k
, unsigned long size
)
154 unsigned long origin
= hash_to_index (hash
, size
);
159 /* The other key was displaced and wrapped around. */
160 return size
- origin
+ k
;
164 move_weak_entry (scm_t_weak_entry
*from
, scm_t_weak_entry
*to
)
168 scm_t_weak_entry copy
;
170 copy_weak_entry (from
, ©
);
171 to
->hash
= copy
.hash
;
174 if (copy
.key
&& SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
176 #ifdef HAVE_GC_MOVE_DISAPPEARING_LINK
177 GC_move_disappearing_link ((GC_PTR
) &from
->key
, (GC_PTR
) &to
->key
);
179 GC_unregister_disappearing_link ((GC_PTR
) &from
->key
);
180 SCM_I_REGISTER_DISAPPEARING_LINK ((GC_PTR
) &to
->key
,
193 rob_from_rich (scm_t_weak_set
*set
, unsigned long k
)
195 unsigned long empty
, size
;
199 /* If we are to free up slot K in the set, we need room to do so. */
200 assert (set
->n_items
< size
);
204 empty
= (empty
+ 1) % size
;
205 /* Here we access key outside the lock. Is this a problem? At first
206 glance, I wouldn't think so. */
207 while (set
->entries
[empty
].key
);
211 unsigned long last
= empty
? (empty
- 1) : (size
- 1);
212 move_weak_entry (&set
->entries
[last
], &set
->entries
[empty
]);
217 /* Just for sanity. */
218 set
->entries
[empty
].hash
= 0;
219 set
->entries
[empty
].key
= 0;
223 give_to_poor (scm_t_weak_set
*set
, unsigned long k
)
225 /* Slot K was just freed up; possibly shuffle others down. */
226 unsigned long size
= set
->size
;
230 unsigned long next
= (k
+ 1) % size
;
232 scm_t_weak_entry copy
;
234 hash
= set
->entries
[next
].hash
;
236 if (!hash
|| hash_to_index (hash
, size
) == next
)
239 copy_weak_entry (&set
->entries
[next
], ©
);
242 /* Lost weak reference. */
244 give_to_poor (set
, next
);
249 move_weak_entry (&set
->entries
[next
], &set
->entries
[k
]);
254 /* We have shuffled down any entries that should be shuffled down; now
256 set
->entries
[k
].hash
= 0;
257 set
->entries
[k
].key
= 0;
263 /* Growing or shrinking is triggered when the load factor
265 * L = N / S (N: number of items in set, S: bucket vector length)
267 * passes an upper limit of 0.9 or a lower limit of 0.2.
269 * The implementation stores the upper and lower number of items which
270 * trigger a resize in the hashset object.
272 * Possible hash set sizes (primes) are stored in the array
276 static unsigned long hashset_size
[] = {
277 31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
278 224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
279 57524111, 115048217, 230096423
282 #define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
285 resize_set (scm_t_weak_set
*set
)
287 scm_t_weak_entry
*old_entries
, *new_entries
;
289 unsigned long old_size
, new_size
, old_k
;
291 old_entries
= set
->entries
;
292 old_size
= set
->size
;
294 if (set
->n_items
< set
->lower
)
296 /* rehashing is not triggered when i <= min_size */
300 while (i
> set
->min_size_index
301 && set
->n_items
< hashset_size
[i
] / 4);
305 i
= set
->size_index
+ 1;
306 if (i
>= HASHSET_SIZE_N
)
307 /* The biggest size currently is 230096423, which for a 32-bit
308 machine will occupy 1.5GB of memory at a load of 80%. There
309 is probably something better to do here, but if you have a
310 weak map of that size, you are hosed in any case. */
314 new_size
= hashset_size
[i
];
315 new_entries
= scm_gc_malloc_pointerless (new_size
* sizeof(scm_t_weak_entry
),
317 memset (new_entries
, 0, new_size
* sizeof(scm_t_weak_entry
));
320 set
->size
= new_size
;
321 if (i
<= set
->min_size_index
)
324 set
->lower
= new_size
/ 5;
325 set
->upper
= 9 * new_size
/ 10;
327 set
->entries
= new_entries
;
329 for (old_k
= 0; old_k
< old_size
; old_k
++)
331 scm_t_weak_entry copy
;
332 unsigned long new_k
, distance
;
334 if (!old_entries
[old_k
].hash
)
337 copy_weak_entry (&old_entries
[old_k
], ©
);
342 new_k
= hash_to_index (copy
.hash
, new_size
);
344 for (distance
= 0; ; distance
++, new_k
= (new_k
+ 1) % new_size
)
346 unsigned long other_hash
= new_entries
[new_k
].hash
;
349 /* Found an empty entry. */
352 /* Displace the entry if our distance is less, otherwise keep
354 if (entry_distance (other_hash
, new_k
, new_size
) < distance
)
356 rob_from_rich (set
, new_k
);
362 new_entries
[new_k
].hash
= copy
.hash
;
363 new_entries
[new_k
].key
= copy
.key
;
365 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
366 SCM_I_REGISTER_DISAPPEARING_LINK ((GC_PTR
) &new_entries
[new_k
].key
,
367 (GC_PTR
) new_entries
[new_k
].key
);
371 /* Run after GC via do_vacuum_weak_set, this function runs over the
372 whole table, removing lost weak references, reshuffling the set as it
373 goes. It might resize the set if it reaps enough entries. */
375 vacuum_weak_set (scm_t_weak_set
*set
)
377 scm_t_weak_entry
*entries
= set
->entries
;
378 unsigned long size
= set
->size
;
381 for (k
= 0; k
< size
; k
++)
383 unsigned long hash
= entries
[k
].hash
;
387 scm_t_weak_entry copy
;
389 copy_weak_entry (&entries
[k
], ©
);
392 /* Lost weak reference; reshuffle. */
394 give_to_poor (set
, k
);
400 if (set
->n_items
< set
->lower
)
408 weak_set_lookup (scm_t_weak_set
*set
, unsigned long hash
,
409 scm_t_set_predicate_fn pred
, void *closure
,
412 unsigned long k
, distance
, size
;
413 scm_t_weak_entry
*entries
;
416 entries
= set
->entries
;
418 hash
= (hash
<< 1) | 0x1;
419 k
= hash_to_index (hash
, size
);
421 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
423 unsigned long other_hash
;
426 other_hash
= entries
[k
].hash
;
432 if (hash
== other_hash
)
434 scm_t_weak_entry copy
;
436 copy_weak_entry (&entries
[k
], ©
);
439 /* Lost weak reference; reshuffle. */
441 give_to_poor (set
, k
);
446 if (pred (SCM_PACK (copy
.key
), closure
))
448 return SCM_PACK (copy
.key
);
451 /* If the entry's distance is less, our key is not in the set. */
452 if (entry_distance (other_hash
, k
, size
) < distance
)
456 /* If we got here, then we were unfortunate enough to loop through the
457 whole set. Shouldn't happen, but hey. */
463 weak_set_add_x (scm_t_weak_set
*set
, unsigned long hash
,
464 scm_t_set_predicate_fn pred
, void *closure
,
467 unsigned long k
, distance
, size
;
468 scm_t_weak_entry
*entries
;
471 entries
= set
->entries
;
473 hash
= (hash
<< 1) | 0x1;
474 k
= hash_to_index (hash
, size
);
476 for (distance
= 0; ; distance
++, k
= (k
+ 1) % size
)
478 unsigned long other_hash
;
481 other_hash
= entries
[k
].hash
;
484 /* Found an empty entry. */
487 if (other_hash
== hash
)
489 scm_t_weak_entry copy
;
491 copy_weak_entry (&entries
[k
], ©
);
494 /* Lost weak reference; reshuffle. */
496 give_to_poor (set
, k
);
501 if (pred (SCM_PACK (copy
.key
), closure
))
502 /* Found an entry with this key. */
503 return SCM_PACK (copy
.key
);
506 if (set
->n_items
> set
->upper
)
507 /* Full set, time to resize. */
510 return weak_set_add_x (set
, hash
>> 1, pred
, closure
, obj
);
513 /* Displace the entry if our distance is less, otherwise keep
515 if (entry_distance (other_hash
, k
, size
) < distance
)
517 rob_from_rich (set
, k
);
523 entries
[k
].hash
= hash
;
524 entries
[k
].key
= SCM_UNPACK (obj
);
526 if (SCM_HEAP_OBJECT_P (obj
))
527 SCM_I_REGISTER_DISAPPEARING_LINK ((GC_PTR
) &entries
[k
].key
,
528 (GC_PTR
) SCM_HEAP_OBJECT_BASE (obj
));
535 weak_set_remove_x (scm_t_weak_set
*set
, unsigned long hash
,
536 scm_t_set_predicate_fn pred
, void *closure
)
538 unsigned long k
, distance
, size
;
539 scm_t_weak_entry
*entries
;
542 entries
= set
->entries
;
544 hash
= (hash
<< 1) | 0x1;
545 k
= hash_to_index (hash
, size
);
547 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
549 unsigned long other_hash
;
552 other_hash
= entries
[k
].hash
;
558 if (other_hash
== hash
)
560 scm_t_weak_entry copy
;
562 copy_weak_entry (&entries
[k
], ©
);
565 /* Lost weak reference; reshuffle. */
567 give_to_poor (set
, k
);
572 if (pred (SCM_PACK (copy
.key
), closure
))
573 /* Found an entry with this key. */
578 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
579 GC_unregister_disappearing_link ((GC_PTR
) &entries
[k
].key
);
581 if (--set
->n_items
< set
->lower
)
584 give_to_poor (set
, k
);
590 /* If the entry's distance is less, our key is not in the set. */
591 if (entry_distance (other_hash
, k
, size
) < distance
)
599 make_weak_set (unsigned long k
)
603 int i
= 0, n
= k
? k
: 31;
604 while (i
+ 1 < HASHSET_SIZE_N
&& n
> hashset_size
[i
])
608 set
= scm_gc_malloc (sizeof (*set
), "weak-set");
609 set
->entries
= scm_gc_malloc_pointerless (n
* sizeof(scm_t_weak_entry
),
611 memset (set
->entries
, 0, n
* sizeof(scm_t_weak_entry
));
615 set
->upper
= 9 * n
/ 10;
617 set
->min_size_index
= i
;
618 scm_i_pthread_mutex_init (&set
->lock
, NULL
);
620 return scm_cell (scm_tc7_weak_set
, (scm_t_bits
)set
);
624 scm_i_weak_set_print (SCM exp
, SCM port
, scm_print_state
*pstate
)
626 scm_puts_unlocked ("#<", port
);
627 scm_puts_unlocked ("weak-set ", port
);
628 scm_uintprint (SCM_WEAK_SET (exp
)->n_items
, 10, port
);
629 scm_putc_unlocked ('/', port
);
630 scm_uintprint (SCM_WEAK_SET (exp
)->size
, 10, port
);
631 scm_puts_unlocked (">", port
);
635 do_vacuum_weak_set (SCM set
)
639 s
= SCM_WEAK_SET (set
);
641 if (scm_i_pthread_mutex_trylock (&s
->lock
) == 0)
644 scm_i_pthread_mutex_unlock (&s
->lock
);
650 /* The before-gc C hook only runs if GC_set_start_callback is available,
651 so if not, fall back on a finalizer-based implementation. */
653 weak_gc_callback (void **weak
)
656 void (*callback
) (SCM
) = weak
[1];
661 callback (SCM_PACK_POINTER (val
));
666 #ifdef HAVE_GC_SET_START_CALLBACK
668 weak_gc_hook (void *hook_data
, void *fn_data
, void *data
)
670 if (!weak_gc_callback (fn_data
))
671 scm_c_hook_remove (&scm_before_gc_c_hook
, weak_gc_hook
, fn_data
);
677 weak_gc_finalizer (void *ptr
, void *data
)
679 if (weak_gc_callback (ptr
))
680 GC_REGISTER_FINALIZER_NO_ORDER (ptr
, weak_gc_finalizer
, data
, NULL
, NULL
);
685 scm_c_register_weak_gc_callback (SCM obj
, void (*callback
) (SCM
))
687 void **weak
= GC_MALLOC_ATOMIC (sizeof (void*) * 2);
689 weak
[0] = SCM_UNPACK_POINTER (obj
);
690 weak
[1] = (void*)callback
;
691 GC_GENERAL_REGISTER_DISAPPEARING_LINK (weak
, SCM_HEAP_OBJECT_BASE (obj
));
693 #ifdef HAVE_GC_SET_START_CALLBACK
694 scm_c_hook_add (&scm_after_gc_c_hook
, weak_gc_hook
, weak
, 0);
696 GC_REGISTER_FINALIZER_NO_ORDER (weak
, weak_gc_finalizer
, NULL
, NULL
, NULL
);
701 scm_c_make_weak_set (unsigned long k
)
705 ret
= make_weak_set (k
);
707 scm_c_register_weak_gc_callback (ret
, do_vacuum_weak_set
);
713 scm_weak_set_p (SCM obj
)
715 return scm_from_bool (SCM_WEAK_SET_P (obj
));
719 scm_weak_set_clear_x (SCM set
)
721 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
723 scm_i_pthread_mutex_lock (&s
->lock
);
725 memset (s
->entries
, 0, sizeof (scm_t_weak_entry
) * s
->size
);
728 scm_i_pthread_mutex_unlock (&s
->lock
);
730 return SCM_UNSPECIFIED
;
734 scm_c_weak_set_lookup (SCM set
, unsigned long raw_hash
,
735 scm_t_set_predicate_fn pred
,
736 void *closure
, SCM dflt
)
739 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
741 scm_i_pthread_mutex_lock (&s
->lock
);
743 ret
= weak_set_lookup (s
, raw_hash
, pred
, closure
, dflt
);
745 scm_i_pthread_mutex_unlock (&s
->lock
);
751 scm_c_weak_set_add_x (SCM set
, unsigned long raw_hash
,
752 scm_t_set_predicate_fn pred
,
753 void *closure
, SCM obj
)
756 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
758 scm_i_pthread_mutex_lock (&s
->lock
);
760 ret
= weak_set_add_x (s
, raw_hash
, pred
, closure
, obj
);
762 scm_i_pthread_mutex_unlock (&s
->lock
);
768 scm_c_weak_set_remove_x (SCM set
, unsigned long raw_hash
,
769 scm_t_set_predicate_fn pred
,
772 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
774 scm_i_pthread_mutex_lock (&s
->lock
);
776 weak_set_remove_x (s
, raw_hash
, pred
, closure
);
778 scm_i_pthread_mutex_unlock (&s
->lock
);
782 eq_predicate (SCM x
, void *closure
)
784 return scm_is_eq (x
, SCM_PACK_POINTER (closure
));
788 scm_weak_set_add_x (SCM set
, SCM obj
)
790 return scm_c_weak_set_add_x (set
, scm_ihashq (obj
, -1),
791 eq_predicate
, SCM_UNPACK_POINTER (obj
), obj
);
795 scm_weak_set_remove_x (SCM set
, SCM obj
)
797 scm_c_weak_set_remove_x (set
, scm_ihashq (obj
, -1),
798 eq_predicate
, SCM_UNPACK_POINTER (obj
));
800 return SCM_UNSPECIFIED
;
804 scm_c_weak_set_fold (scm_t_set_fold_fn proc
, void *closure
,
808 scm_t_weak_entry
*entries
;
809 unsigned long k
, size
;
811 s
= SCM_WEAK_SET (set
);
813 scm_i_pthread_mutex_lock (&s
->lock
);
816 entries
= s
->entries
;
818 for (k
= 0; k
< size
; k
++)
822 scm_t_weak_entry copy
;
824 copy_weak_entry (&entries
[k
], ©
);
828 /* Release set lock while we call the function. */
829 scm_i_pthread_mutex_unlock (&s
->lock
);
830 init
= proc (closure
, SCM_PACK (copy
.key
), init
);
831 scm_i_pthread_mutex_lock (&s
->lock
);
836 scm_i_pthread_mutex_unlock (&s
->lock
);
842 fold_trampoline (void *closure
, SCM item
, SCM init
)
844 return scm_call_2 (SCM_PACK_POINTER (closure
), item
, init
);
848 scm_weak_set_fold (SCM proc
, SCM init
, SCM set
)
850 return scm_c_weak_set_fold (fold_trampoline
, SCM_UNPACK_POINTER (proc
), init
, set
);
854 for_each_trampoline (void *closure
, SCM item
, SCM seed
)
856 scm_call_1 (SCM_PACK_POINTER (closure
), item
);
861 scm_weak_set_for_each (SCM proc
, SCM set
)
863 scm_c_weak_set_fold (for_each_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_BOOL_F
, set
);
865 return SCM_UNSPECIFIED
;
869 map_trampoline (void *closure
, SCM item
, SCM seed
)
871 return scm_cons (scm_call_1 (SCM_PACK_POINTER (closure
), item
), seed
);
875 scm_weak_set_map_to_list (SCM proc
, SCM set
)
877 return scm_c_weak_set_fold (map_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_EOL
, set
);
884 #include "libguile/weak-set.x"