1 /* Copyright (C) 2011 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "libguile/_scm.h"
28 #include "libguile/hash.h"
29 #include "libguile/eval.h"
30 #include "libguile/ports.h"
31 #include "libguile/bdw-gc.h"
33 #include "libguile/validate.h"
34 #include "libguile/weak-set.h"
39 This file implements weak sets. One example of a weak set is the
40 symbol table, where you want all instances of the `foo' symbol to map
41 to one object. So when you load a file and it wants a symbol with
42 the characters "foo", you one up in the table, using custom hash and
43 equality predicates. Only if one is not found will you bother to
44 cons one up and intern it.
46 Another use case for weak sets is the set of open ports. Guile needs
47 to be able to flush them all when the process exits, but the set
48 shouldn't prevent the GC from collecting the port (and thus closing
51 Weak sets are implemented using an open-addressed hash table.
52 Basically this means that there is an array of entries, and the item
53 is expected to be found the slot corresponding to its hash code,
54 modulo the length of the array.
56 Collisions are handled using linear probing with the Robin Hood
57 technique. See Pedro Celis' paper, "Robin Hood Hashing":
59 http://www.cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
61 The vector of entries is allocated as an "atomic" piece of memory, so
62 that the GC doesn't trace it. When an item is added to the set, a
63 disappearing link is registered to its location. If the item is
64 collected, then that link will be zeroed out.
66 An entry is not just an item, though; the hash code is also stored in
67 the entry. We munge hash codes so that they are never 0. In this
68 way we can detect removed entries (key of zero but nonzero hash
69 code), and can then reshuffle elements as needed to maintain the
72 Compared to buckets-and-chains hash tables, open addressing has the
73 advantage that it is very cache-friendly. It also uses less memory.
75 Implementation-wise, there are two things to note.
77 1. We assume that hash codes are evenly distributed across the
78 range of unsigned longs. The actual hash code stored in the
79 entry is left-shifted by 1 bit (losing 1 bit of hash precision),
80 and then or'd with 1. In this way we ensure that the hash field
81 of an occupied entry is nonzero. To map to an index, we
82 right-shift the hash by one, divide by the size, and take the
85 2. Since the "keys" (the objects in the set) are stored in an
86 atomic region with disappearing links, they need to be accessed
87 with the GC alloc lock. `copy_weak_entry' will do that for
88 you. The hash code itself can be read outside the lock,
99 struct weak_entry_data
{
100 scm_t_weak_entry
*in
;
101 scm_t_weak_entry
*out
;
105 do_copy_weak_entry (void *data
)
107 struct weak_entry_data
*e
= data
;
109 e
->out
->hash
= e
->in
->hash
;
110 e
->out
->key
= e
->in
->key
;
116 copy_weak_entry (scm_t_weak_entry
*src
, scm_t_weak_entry
*dst
)
118 struct weak_entry_data data
;
123 GC_call_with_alloc_lock (do_copy_weak_entry
, &data
);
128 scm_t_weak_entry
*entries
; /* the data */
129 scm_i_pthread_mutex_t lock
; /* the lock */
130 unsigned long size
; /* total number of slots. */
131 unsigned long n_items
; /* number of items in set */
132 unsigned long lower
; /* when to shrink */
133 unsigned long upper
; /* when to grow */
134 int size_index
; /* index into hashset_size */
135 int min_size_index
; /* minimum size_index */
139 #define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
140 #define SCM_VALIDATE_WEAK_SET(pos, arg) \
141 SCM_MAKE_VALIDATE_MSG (pos, arg, WEAK_SET_P, "weak-set")
142 #define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD_1 (x))
146 hash_to_index (unsigned long hash
, unsigned long size
)
148 return (hash
>> 1) % size
;
152 entry_distance (unsigned long hash
, unsigned long k
, unsigned long size
)
154 unsigned long origin
= hash_to_index (hash
, size
);
159 /* The other key was displaced and wrapped around. */
160 return size
- origin
+ k
;
164 move_weak_entry (scm_t_weak_entry
*from
, scm_t_weak_entry
*to
)
168 scm_t_weak_entry copy
;
170 copy_weak_entry (from
, ©
);
171 to
->hash
= copy
.hash
;
174 if (copy
.key
&& SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
176 GC_unregister_disappearing_link ((GC_PTR
) &from
->key
);
177 SCM_I_REGISTER_DISAPPEARING_LINK ((GC_PTR
) &to
->key
,
189 rob_from_rich (scm_t_weak_set
*set
, unsigned long k
)
191 unsigned long empty
, size
;
195 /* If we are to free up slot K in the set, we need room to do so. */
196 assert (set
->n_items
< size
);
200 empty
= (empty
+ 1) % size
;
201 /* Here we access key outside the lock. Is this a problem? At first
202 glance, I wouldn't think so. */
203 while (set
->entries
[empty
].key
);
207 unsigned long last
= empty
? (empty
- 1) : (size
- 1);
208 move_weak_entry (&set
->entries
[last
], &set
->entries
[empty
]);
213 /* Just for sanity. */
214 set
->entries
[empty
].hash
= 0;
215 set
->entries
[empty
].key
= 0;
219 give_to_poor (scm_t_weak_set
*set
, unsigned long k
)
221 /* Slot K was just freed up; possibly shuffle others down. */
222 unsigned long size
= set
->size
;
226 unsigned long next
= (k
+ 1) % size
;
228 scm_t_weak_entry copy
;
230 hash
= set
->entries
[next
].hash
;
232 if (!hash
|| hash_to_index (hash
, size
) == next
)
235 copy_weak_entry (&set
->entries
[next
], ©
);
238 /* Lost weak reference. */
240 give_to_poor (set
, next
);
245 move_weak_entry (&set
->entries
[next
], &set
->entries
[k
]);
250 /* We have shuffled down any entries that should be shuffled down; now
252 set
->entries
[k
].hash
= 0;
253 set
->entries
[k
].key
= 0;
259 /* Growing or shrinking is triggered when the load factor
261 * L = N / S (N: number of items in set, S: bucket vector length)
263 * passes an upper limit of 0.9 or a lower limit of 0.2.
265 * The implementation stores the upper and lower number of items which
266 * trigger a resize in the hashset object.
268 * Possible hash set sizes (primes) are stored in the array
272 static unsigned long hashset_size
[] = {
273 31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
274 224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
275 57524111, 115048217, 230096423
278 #define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
281 resize_set (scm_t_weak_set
*set
)
283 scm_t_weak_entry
*old_entries
, *new_entries
;
285 unsigned long old_size
, new_size
, old_k
;
287 old_entries
= set
->entries
;
288 old_size
= set
->size
;
290 if (set
->n_items
< set
->lower
)
292 /* rehashing is not triggered when i <= min_size */
296 while (i
> set
->min_size_index
297 && set
->n_items
< hashset_size
[i
] / 4);
301 i
= set
->size_index
+ 1;
302 if (i
>= HASHSET_SIZE_N
)
303 /* The biggest size currently is 230096423, which for a 32-bit
304 machine will occupy 1.5GB of memory at a load of 80%. There
305 is probably something better to do here, but if you have a
306 weak map of that size, you are hosed in any case. */
310 new_size
= hashset_size
[i
];
311 new_entries
= scm_gc_malloc_pointerless (new_size
* sizeof(scm_t_weak_entry
),
313 memset (new_entries
, 0, new_size
* sizeof(scm_t_weak_entry
));
316 set
->size
= new_size
;
317 if (i
<= set
->min_size_index
)
320 set
->lower
= new_size
/ 5;
321 set
->upper
= 9 * new_size
/ 10;
323 set
->entries
= new_entries
;
325 for (old_k
= 0; old_k
< old_size
; old_k
++)
327 scm_t_weak_entry copy
;
328 unsigned long new_k
, distance
;
330 if (!old_entries
[old_k
].hash
)
333 copy_weak_entry (&old_entries
[old_k
], ©
);
338 new_k
= hash_to_index (copy
.hash
, new_size
);
340 for (distance
= 0; ; distance
++, new_k
= (new_k
+ 1) % new_size
)
342 unsigned long other_hash
= new_entries
[new_k
].hash
;
345 /* Found an empty entry. */
348 /* Displace the entry if our distance is less, otherwise keep
350 if (entry_distance (other_hash
, new_k
, new_size
) < distance
)
352 rob_from_rich (set
, new_k
);
358 new_entries
[new_k
].hash
= copy
.hash
;
359 new_entries
[new_k
].key
= copy
.key
;
361 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
362 SCM_I_REGISTER_DISAPPEARING_LINK ((GC_PTR
) &new_entries
[new_k
].key
,
363 (GC_PTR
) new_entries
[new_k
].key
);
367 /* Run after GC via do_vacuum_weak_set, this function runs over the
368 whole table, removing lost weak references, reshuffling the set as it
369 goes. It might resize the set if it reaps enough entries. */
371 vacuum_weak_set (scm_t_weak_set
*set
)
373 scm_t_weak_entry
*entries
= set
->entries
;
374 unsigned long size
= set
->size
;
377 for (k
= 0; k
< size
; k
++)
379 unsigned long hash
= entries
[k
].hash
;
383 scm_t_weak_entry copy
;
385 copy_weak_entry (&entries
[k
], ©
);
388 /* Lost weak reference; reshuffle. */
390 give_to_poor (set
, k
);
396 if (set
->n_items
< set
->lower
)
404 weak_set_lookup (scm_t_weak_set
*set
, unsigned long hash
,
405 scm_t_set_predicate_fn pred
, void *closure
,
408 unsigned long k
, distance
, size
;
409 scm_t_weak_entry
*entries
;
412 entries
= set
->entries
;
414 hash
= (hash
<< 1) | 0x1;
415 k
= hash_to_index (hash
, size
);
417 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
419 unsigned long other_hash
;
422 other_hash
= entries
[k
].hash
;
428 if (hash
== other_hash
)
430 scm_t_weak_entry copy
;
432 copy_weak_entry (&entries
[k
], ©
);
435 /* Lost weak reference; reshuffle. */
437 give_to_poor (set
, k
);
442 if (pred (SCM_PACK (copy
.key
), closure
))
444 return SCM_PACK (copy
.key
);
447 /* If the entry's distance is less, our key is not in the set. */
448 if (entry_distance (other_hash
, k
, size
) < distance
)
452 /* If we got here, then we were unfortunate enough to loop through the
453 whole set. Shouldn't happen, but hey. */
459 weak_set_add_x (scm_t_weak_set
*set
, unsigned long hash
,
460 scm_t_set_predicate_fn pred
, void *closure
,
463 unsigned long k
, distance
, size
;
464 scm_t_weak_entry
*entries
;
467 entries
= set
->entries
;
469 hash
= (hash
<< 1) | 0x1;
470 k
= hash_to_index (hash
, size
);
472 for (distance
= 0; ; distance
++, k
= (k
+ 1) % size
)
474 unsigned long other_hash
;
477 other_hash
= entries
[k
].hash
;
480 /* Found an empty entry. */
483 if (other_hash
== hash
)
485 scm_t_weak_entry copy
;
487 copy_weak_entry (&entries
[k
], ©
);
490 /* Lost weak reference; reshuffle. */
492 give_to_poor (set
, k
);
497 if (pred (SCM_PACK (copy
.key
), closure
))
498 /* Found an entry with this key. */
499 return SCM_PACK (copy
.key
);
502 if (set
->n_items
> set
->upper
)
503 /* Full set, time to resize. */
506 return weak_set_add_x (set
, hash
>> 1, pred
, closure
, obj
);
509 /* Displace the entry if our distance is less, otherwise keep
511 if (entry_distance (other_hash
, k
, size
) < distance
)
513 rob_from_rich (set
, k
);
519 entries
[k
].hash
= hash
;
520 entries
[k
].key
= SCM_UNPACK (obj
);
522 if (SCM_HEAP_OBJECT_P (obj
))
523 SCM_I_REGISTER_DISAPPEARING_LINK ((GC_PTR
) &entries
[k
].key
,
524 (GC_PTR
) SCM_HEAP_OBJECT_BASE (obj
));
531 weak_set_remove_x (scm_t_weak_set
*set
, unsigned long hash
,
532 scm_t_set_predicate_fn pred
, void *closure
)
534 unsigned long k
, distance
, size
;
535 scm_t_weak_entry
*entries
;
538 entries
= set
->entries
;
540 hash
= (hash
<< 1) | 0x1;
541 k
= hash_to_index (hash
, size
);
543 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
545 unsigned long other_hash
;
548 other_hash
= entries
[k
].hash
;
554 if (other_hash
== hash
)
556 scm_t_weak_entry copy
;
558 copy_weak_entry (&entries
[k
], ©
);
561 /* Lost weak reference; reshuffle. */
563 give_to_poor (set
, k
);
568 if (pred (SCM_PACK (copy
.key
), closure
))
569 /* Found an entry with this key. */
574 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
575 GC_unregister_disappearing_link ((GC_PTR
) &entries
[k
].key
);
577 if (--set
->n_items
< set
->lower
)
580 give_to_poor (set
, k
);
586 /* If the entry's distance is less, our key is not in the set. */
587 if (entry_distance (other_hash
, k
, size
) < distance
)
595 make_weak_set (unsigned long k
)
599 int i
= 0, n
= k
? k
: 31;
600 while (i
+ 1 < HASHSET_SIZE_N
&& n
> hashset_size
[i
])
604 set
= scm_gc_malloc (sizeof (*set
), "weak-set");
605 set
->entries
= scm_gc_malloc_pointerless (n
* sizeof(scm_t_weak_entry
),
607 memset (set
->entries
, 0, n
* sizeof(scm_t_weak_entry
));
611 set
->upper
= 9 * n
/ 10;
613 set
->min_size_index
= i
;
614 scm_i_pthread_mutex_init (&set
->lock
, NULL
);
616 return scm_cell (scm_tc7_weak_set
, (scm_t_bits
)set
);
620 scm_i_weak_set_print (SCM exp
, SCM port
, scm_print_state
*pstate
)
622 scm_puts_unlocked ("#<", port
);
623 scm_puts_unlocked ("weak-set ", port
);
624 scm_uintprint (SCM_WEAK_SET (exp
)->n_items
, 10, port
);
625 scm_putc_unlocked ('/', port
);
626 scm_uintprint (SCM_WEAK_SET (exp
)->size
, 10, port
);
627 scm_puts_unlocked (">", port
);
631 do_vacuum_weak_set (SCM set
)
635 s
= SCM_WEAK_SET (set
);
637 if (scm_i_pthread_mutex_trylock (&s
->lock
) == 0)
640 scm_i_pthread_mutex_unlock (&s
->lock
);
646 /* The before-gc C hook only runs if GC_set_start_callback is available,
647 so if not, fall back on a finalizer-based implementation. */
649 weak_gc_callback (void **weak
)
652 void (*callback
) (SCM
) = weak
[1];
657 callback (SCM_PACK_POINTER (val
));
662 #ifdef HAVE_GC_SET_START_CALLBACK
664 weak_gc_hook (void *hook_data
, void *fn_data
, void *data
)
666 if (!weak_gc_callback (fn_data
))
667 scm_c_hook_remove (&scm_before_gc_c_hook
, weak_gc_hook
, fn_data
);
673 weak_gc_finalizer (void *ptr
, void *data
)
675 if (weak_gc_callback (ptr
))
676 GC_REGISTER_FINALIZER_NO_ORDER (ptr
, weak_gc_finalizer
, data
, NULL
, NULL
);
681 scm_c_register_weak_gc_callback (SCM obj
, void (*callback
) (SCM
))
683 void **weak
= GC_MALLOC_ATOMIC (sizeof (void*) * 2);
685 weak
[0] = SCM_UNPACK_POINTER (obj
);
686 weak
[1] = (void*)callback
;
687 GC_GENERAL_REGISTER_DISAPPEARING_LINK (weak
, SCM_HEAP_OBJECT_BASE (obj
));
689 #ifdef HAVE_GC_SET_START_CALLBACK
690 scm_c_hook_add (&scm_after_gc_c_hook
, weak_gc_hook
, weak
, 0);
692 GC_REGISTER_FINALIZER_NO_ORDER (weak
, weak_gc_finalizer
, NULL
, NULL
, NULL
);
697 scm_c_make_weak_set (unsigned long k
)
701 ret
= make_weak_set (k
);
703 scm_c_register_weak_gc_callback (ret
, do_vacuum_weak_set
);
709 scm_weak_set_p (SCM obj
)
711 return scm_from_bool (SCM_WEAK_SET_P (obj
));
715 scm_weak_set_clear_x (SCM set
)
717 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
719 scm_i_pthread_mutex_lock (&s
->lock
);
721 memset (s
->entries
, 0, sizeof (scm_t_weak_entry
) * s
->size
);
724 scm_i_pthread_mutex_unlock (&s
->lock
);
726 return SCM_UNSPECIFIED
;
730 scm_c_weak_set_lookup (SCM set
, unsigned long raw_hash
,
731 scm_t_set_predicate_fn pred
,
732 void *closure
, SCM dflt
)
735 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
737 scm_i_pthread_mutex_lock (&s
->lock
);
739 ret
= weak_set_lookup (s
, raw_hash
, pred
, closure
, dflt
);
741 scm_i_pthread_mutex_unlock (&s
->lock
);
747 scm_c_weak_set_add_x (SCM set
, unsigned long raw_hash
,
748 scm_t_set_predicate_fn pred
,
749 void *closure
, SCM obj
)
752 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
754 scm_i_pthread_mutex_lock (&s
->lock
);
756 ret
= weak_set_add_x (s
, raw_hash
, pred
, closure
, obj
);
758 scm_i_pthread_mutex_unlock (&s
->lock
);
764 scm_c_weak_set_remove_x (SCM set
, unsigned long raw_hash
,
765 scm_t_set_predicate_fn pred
,
768 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
770 scm_i_pthread_mutex_lock (&s
->lock
);
772 weak_set_remove_x (s
, raw_hash
, pred
, closure
);
774 scm_i_pthread_mutex_unlock (&s
->lock
);
778 eq_predicate (SCM x
, void *closure
)
780 return scm_is_eq (x
, SCM_PACK_POINTER (closure
));
784 scm_weak_set_add_x (SCM set
, SCM obj
)
786 return scm_c_weak_set_add_x (set
, scm_ihashq (obj
, -1),
787 eq_predicate
, SCM_UNPACK_POINTER (obj
), obj
);
791 scm_weak_set_remove_x (SCM set
, SCM obj
)
793 scm_c_weak_set_remove_x (set
, scm_ihashq (obj
, -1),
794 eq_predicate
, SCM_UNPACK_POINTER (obj
));
796 return SCM_UNSPECIFIED
;
800 scm_c_weak_set_fold (scm_t_set_fold_fn proc
, void *closure
,
804 scm_t_weak_entry
*entries
;
805 unsigned long k
, size
;
807 s
= SCM_WEAK_SET (set
);
809 scm_i_pthread_mutex_lock (&s
->lock
);
812 entries
= s
->entries
;
814 for (k
= 0; k
< size
; k
++)
818 scm_t_weak_entry copy
;
820 copy_weak_entry (&entries
[k
], ©
);
824 /* Release set lock while we call the function. */
825 scm_i_pthread_mutex_unlock (&s
->lock
);
826 init
= proc (closure
, SCM_PACK (copy
.key
), init
);
827 scm_i_pthread_mutex_lock (&s
->lock
);
832 scm_i_pthread_mutex_unlock (&s
->lock
);
838 fold_trampoline (void *closure
, SCM item
, SCM init
)
840 return scm_call_2 (SCM_PACK_POINTER (closure
), item
, init
);
844 scm_weak_set_fold (SCM proc
, SCM init
, SCM set
)
846 return scm_c_weak_set_fold (fold_trampoline
, SCM_UNPACK_POINTER (proc
), init
, set
);
850 for_each_trampoline (void *closure
, SCM item
, SCM seed
)
852 scm_call_1 (SCM_PACK_POINTER (closure
), item
);
857 scm_weak_set_for_each (SCM proc
, SCM set
)
859 scm_c_weak_set_fold (for_each_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_BOOL_F
, set
);
861 return SCM_UNSPECIFIED
;
865 map_trampoline (void *closure
, SCM item
, SCM seed
)
867 return scm_cons (scm_call_1 (SCM_PACK_POINTER (closure
), item
), seed
);
871 scm_weak_set_map_to_list (SCM proc
, SCM set
)
873 return scm_c_weak_set_fold (map_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_EOL
, set
);
880 #include "libguile/weak-set.x"