1 /* Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "libguile/_scm.h"
28 #include "libguile/hash.h"
29 #include "libguile/eval.h"
30 #include "libguile/ports.h"
31 #include "libguile/bdw-gc.h"
33 #include "libguile/validate.h"
34 #include "libguile/weak-set.h"
39 This file implements weak sets. One example of a weak set is the
40 symbol table, where you want all instances of the `foo' symbol to map
41 to one object. So when you load a file and it wants a symbol with
42 the characters "foo", you one up in the table, using custom hash and
43 equality predicates. Only if one is not found will you bother to
44 cons one up and intern it.
46 Another use case for weak sets is the set of open ports. Guile needs
47 to be able to flush them all when the process exits, but the set
48 shouldn't prevent the GC from collecting the port (and thus closing
51 Weak sets are implemented using an open-addressed hash table.
52 Basically this means that there is an array of entries, and the item
53 is expected to be found the slot corresponding to its hash code,
54 modulo the length of the array.
56 Collisions are handled using linear probing with the Robin Hood
57 technique. See Pedro Celis' paper, "Robin Hood Hashing":
59 http://www.cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
61 The vector of entries is allocated as an "atomic" piece of memory, so
62 that the GC doesn't trace it. When an item is added to the set, a
63 disappearing link is registered to its location. If the item is
64 collected, then that link will be zeroed out.
66 An entry is not just an item, though; the hash code is also stored in
67 the entry. We munge hash codes so that they are never 0. In this
68 way we can detect removed entries (key of zero but nonzero hash
69 code), and can then reshuffle elements as needed to maintain the
72 Compared to buckets-and-chains hash tables, open addressing has the
73 advantage that it is very cache-friendly. It also uses less memory.
75 Implementation-wise, there are two things to note.
77 1. We assume that hash codes are evenly distributed across the
78 range of unsigned longs. The actual hash code stored in the
79 entry is left-shifted by 1 bit (losing 1 bit of hash precision),
80 and then or'd with 1. In this way we ensure that the hash field
81 of an occupied entry is nonzero. To map to an index, we
82 right-shift the hash by one, divide by the size, and take the
85 2. Since the "keys" (the objects in the set) are stored in an
86 atomic region with disappearing links, they need to be accessed
87 with the GC alloc lock. `copy_weak_entry' will do that for
88 you. The hash code itself can be read outside the lock,
99 struct weak_entry_data
{
100 scm_t_weak_entry
*in
;
101 scm_t_weak_entry
*out
;
105 do_copy_weak_entry (void *data
)
107 struct weak_entry_data
*e
= data
;
109 e
->out
->hash
= e
->in
->hash
;
110 e
->out
->key
= e
->in
->key
;
116 copy_weak_entry (scm_t_weak_entry
*src
, scm_t_weak_entry
*dst
)
118 struct weak_entry_data data
;
123 GC_call_with_alloc_lock (do_copy_weak_entry
, &data
);
128 scm_t_weak_entry
*entries
; /* the data */
129 scm_i_pthread_mutex_t lock
; /* the lock */
130 unsigned long size
; /* total number of slots. */
131 unsigned long n_items
; /* number of items in set */
132 unsigned long lower
; /* when to shrink */
133 unsigned long upper
; /* when to grow */
134 int size_index
; /* index into hashset_size */
135 int min_size_index
; /* minimum size_index */
139 #define SCM_WEAK_SET_P(x) (SCM_HAS_TYP7 (x, scm_tc7_weak_set))
140 #define SCM_VALIDATE_WEAK_SET(pos, arg) \
141 SCM_MAKE_VALIDATE_MSG (pos, arg, WEAK_SET_P, "weak-set")
142 #define SCM_WEAK_SET(x) ((scm_t_weak_set *) SCM_CELL_WORD_1 (x))
146 hash_to_index (unsigned long hash
, unsigned long size
)
148 return (hash
>> 1) % size
;
152 entry_distance (unsigned long hash
, unsigned long k
, unsigned long size
)
154 unsigned long origin
= hash_to_index (hash
, size
);
159 /* The other key was displaced and wrapped around. */
160 return size
- origin
+ k
;
163 #ifndef HAVE_GC_MOVE_DISAPPEARING_LINK
165 GC_move_disappearing_link (void **from
, void **to
)
167 GC_unregister_disappearing_link (from
);
168 SCM_I_REGISTER_DISAPPEARING_LINK (to
, *to
);
173 move_weak_entry (scm_t_weak_entry
*from
, scm_t_weak_entry
*to
)
177 scm_t_weak_entry copy
;
179 copy_weak_entry (from
, ©
);
180 to
->hash
= copy
.hash
;
183 if (copy
.key
&& SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
184 GC_move_disappearing_link ((void **) &from
->key
, (void **) &to
->key
);
194 rob_from_rich (scm_t_weak_set
*set
, unsigned long k
)
196 unsigned long empty
, size
;
200 /* If we are to free up slot K in the set, we need room to do so. */
201 assert (set
->n_items
< size
);
205 empty
= (empty
+ 1) % size
;
206 /* Here we access key outside the lock. Is this a problem? At first
207 glance, I wouldn't think so. */
208 while (set
->entries
[empty
].key
);
212 unsigned long last
= empty
? (empty
- 1) : (size
- 1);
213 move_weak_entry (&set
->entries
[last
], &set
->entries
[empty
]);
218 /* Just for sanity. */
219 set
->entries
[empty
].hash
= 0;
220 set
->entries
[empty
].key
= 0;
224 give_to_poor (scm_t_weak_set
*set
, unsigned long k
)
226 /* Slot K was just freed up; possibly shuffle others down. */
227 unsigned long size
= set
->size
;
231 unsigned long next
= (k
+ 1) % size
;
233 scm_t_weak_entry copy
;
235 hash
= set
->entries
[next
].hash
;
237 if (!hash
|| hash_to_index (hash
, size
) == next
)
240 copy_weak_entry (&set
->entries
[next
], ©
);
243 /* Lost weak reference. */
245 give_to_poor (set
, next
);
250 move_weak_entry (&set
->entries
[next
], &set
->entries
[k
]);
255 /* We have shuffled down any entries that should be shuffled down; now
257 set
->entries
[k
].hash
= 0;
258 set
->entries
[k
].key
= 0;
264 /* Growing or shrinking is triggered when the load factor
266 * L = N / S (N: number of items in set, S: bucket vector length)
268 * passes an upper limit of 0.9 or a lower limit of 0.2.
270 * The implementation stores the upper and lower number of items which
271 * trigger a resize in the hashset object.
273 * Possible hash set sizes (primes) are stored in the array
277 static unsigned long hashset_size
[] = {
278 31, 61, 113, 223, 443, 883, 1759, 3517, 7027, 14051, 28099, 56197, 112363,
279 224717, 449419, 898823, 1797641, 3595271, 7190537, 14381041, 28762081,
280 57524111, 115048217, 230096423
283 #define HASHSET_SIZE_N (sizeof(hashset_size)/sizeof(unsigned long))
286 compute_size_index (scm_t_weak_set
*set
)
288 int i
= set
->size_index
;
290 if (set
->n_items
< set
->lower
)
292 /* rehashing is not triggered when i <= min_size */
295 while (i
> set
->min_size_index
296 && set
->n_items
< hashset_size
[i
] / 5);
298 else if (set
->n_items
> set
->upper
)
301 if (i
>= HASHSET_SIZE_N
)
302 /* The biggest size currently is 230096423, which for a 32-bit
303 machine will occupy 1.5GB of memory at a load of 80%. There
304 is probably something better to do here, but if you have a
305 weak map of that size, you are hosed in any case. */
313 is_acceptable_size_index (scm_t_weak_set
*set
, int size_index
)
315 int computed
= compute_size_index (set
);
317 if (size_index
== computed
)
318 /* We were going to grow or shrink, and allocating the new vector
319 didn't change the target size. */
322 if (size_index
== computed
+ 1)
324 /* We were going to enlarge the set, but allocating the new
325 vector finalized some objects, making an enlargement
326 unnecessary. It might still be a good idea to use the larger
327 set, though. (This branch also gets hit if, while allocating
328 the vector, some other thread was actively removing items from
329 the set. That is less likely, though.) */
330 unsigned long new_lower
= hashset_size
[size_index
] / 5;
332 return set
->size
> new_lower
;
335 if (size_index
== computed
- 1)
337 /* We were going to shrink the set, but when we dropped the lock
338 to allocate the new vector, some other thread added elements to
343 /* The computed size differs from our newly allocated size by more
344 than one size index -- recalculate. */
349 resize_set (scm_t_weak_set
*set
)
351 scm_t_weak_entry
*old_entries
, *new_entries
;
353 unsigned long old_size
, new_size
, old_k
;
357 new_size_index
= compute_size_index (set
);
358 if (new_size_index
== set
->size_index
)
360 new_size
= hashset_size
[new_size_index
];
361 new_entries
= scm_gc_malloc_pointerless (new_size
* sizeof(scm_t_weak_entry
),
364 while (!is_acceptable_size_index (set
, new_size_index
));
366 old_entries
= set
->entries
;
367 old_size
= set
->size
;
369 memset (new_entries
, 0, new_size
* sizeof(scm_t_weak_entry
));
371 set
->size_index
= new_size_index
;
372 set
->size
= new_size
;
373 if (new_size_index
<= set
->min_size_index
)
376 set
->lower
= new_size
/ 5;
377 set
->upper
= 9 * new_size
/ 10;
379 set
->entries
= new_entries
;
381 for (old_k
= 0; old_k
< old_size
; old_k
++)
383 scm_t_weak_entry copy
;
384 unsigned long new_k
, distance
;
386 if (!old_entries
[old_k
].hash
)
389 copy_weak_entry (&old_entries
[old_k
], ©
);
394 new_k
= hash_to_index (copy
.hash
, new_size
);
396 for (distance
= 0; ; distance
++, new_k
= (new_k
+ 1) % new_size
)
398 unsigned long other_hash
= new_entries
[new_k
].hash
;
401 /* Found an empty entry. */
404 /* Displace the entry if our distance is less, otherwise keep
406 if (entry_distance (other_hash
, new_k
, new_size
) < distance
)
408 rob_from_rich (set
, new_k
);
414 new_entries
[new_k
].hash
= copy
.hash
;
415 new_entries
[new_k
].key
= copy
.key
;
417 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
418 SCM_I_REGISTER_DISAPPEARING_LINK ((void **) &new_entries
[new_k
].key
,
419 (void *) new_entries
[new_k
].key
);
423 /* Run from a finalizer via do_vacuum_weak_set, this function runs over
424 the whole table, removing lost weak references, reshuffling the set
425 as it goes. It might resize the set if it reaps enough entries. */
427 vacuum_weak_set (scm_t_weak_set
*set
)
429 scm_t_weak_entry
*entries
= set
->entries
;
430 unsigned long size
= set
->size
;
433 for (k
= 0; k
< size
; k
++)
435 unsigned long hash
= entries
[k
].hash
;
439 scm_t_weak_entry copy
;
441 copy_weak_entry (&entries
[k
], ©
);
444 /* Lost weak reference; reshuffle. */
446 give_to_poor (set
, k
);
452 if (set
->n_items
< set
->lower
)
460 weak_set_lookup (scm_t_weak_set
*set
, unsigned long hash
,
461 scm_t_set_predicate_fn pred
, void *closure
,
464 unsigned long k
, distance
, size
;
465 scm_t_weak_entry
*entries
;
468 entries
= set
->entries
;
470 hash
= (hash
<< 1) | 0x1;
471 k
= hash_to_index (hash
, size
);
473 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
475 unsigned long other_hash
;
478 other_hash
= entries
[k
].hash
;
484 if (hash
== other_hash
)
486 scm_t_weak_entry copy
;
488 copy_weak_entry (&entries
[k
], ©
);
491 /* Lost weak reference; reshuffle. */
493 give_to_poor (set
, k
);
498 if (pred (SCM_PACK (copy
.key
), closure
))
500 return SCM_PACK (copy
.key
);
503 /* If the entry's distance is less, our key is not in the set. */
504 if (entry_distance (other_hash
, k
, size
) < distance
)
508 /* If we got here, then we were unfortunate enough to loop through the
509 whole set. Shouldn't happen, but hey. */
515 weak_set_add_x (scm_t_weak_set
*set
, unsigned long hash
,
516 scm_t_set_predicate_fn pred
, void *closure
,
519 unsigned long k
, distance
, size
;
520 scm_t_weak_entry
*entries
;
523 entries
= set
->entries
;
525 hash
= (hash
<< 1) | 0x1;
526 k
= hash_to_index (hash
, size
);
528 for (distance
= 0; ; distance
++, k
= (k
+ 1) % size
)
530 unsigned long other_hash
;
533 other_hash
= entries
[k
].hash
;
536 /* Found an empty entry. */
539 if (other_hash
== hash
)
541 scm_t_weak_entry copy
;
543 copy_weak_entry (&entries
[k
], ©
);
546 /* Lost weak reference; reshuffle. */
548 give_to_poor (set
, k
);
553 if (pred (SCM_PACK (copy
.key
), closure
))
554 /* Found an entry with this key. */
555 return SCM_PACK (copy
.key
);
558 if (set
->n_items
> set
->upper
)
559 /* Full set, time to resize. */
562 return weak_set_add_x (set
, hash
>> 1, pred
, closure
, obj
);
565 /* Displace the entry if our distance is less, otherwise keep
567 if (entry_distance (other_hash
, k
, size
) < distance
)
569 rob_from_rich (set
, k
);
575 entries
[k
].hash
= hash
;
576 entries
[k
].key
= SCM_UNPACK (obj
);
578 if (SCM_HEAP_OBJECT_P (obj
))
579 SCM_I_REGISTER_DISAPPEARING_LINK ((void **) &entries
[k
].key
,
580 (void *) SCM2PTR (obj
));
587 weak_set_remove_x (scm_t_weak_set
*set
, unsigned long hash
,
588 scm_t_set_predicate_fn pred
, void *closure
)
590 unsigned long k
, distance
, size
;
591 scm_t_weak_entry
*entries
;
594 entries
= set
->entries
;
596 hash
= (hash
<< 1) | 0x1;
597 k
= hash_to_index (hash
, size
);
599 for (distance
= 0; distance
< size
; distance
++, k
= (k
+ 1) % size
)
601 unsigned long other_hash
;
604 other_hash
= entries
[k
].hash
;
610 if (other_hash
== hash
)
612 scm_t_weak_entry copy
;
614 copy_weak_entry (&entries
[k
], ©
);
617 /* Lost weak reference; reshuffle. */
619 give_to_poor (set
, k
);
624 if (pred (SCM_PACK (copy
.key
), closure
))
625 /* Found an entry with this key. */
630 if (SCM_HEAP_OBJECT_P (SCM_PACK (copy
.key
)))
631 GC_unregister_disappearing_link ((void **) &entries
[k
].key
);
633 if (--set
->n_items
< set
->lower
)
636 give_to_poor (set
, k
);
642 /* If the entry's distance is less, our key is not in the set. */
643 if (entry_distance (other_hash
, k
, size
) < distance
)
651 make_weak_set (unsigned long k
)
655 int i
= 0, n
= k
? k
: 31;
656 while (i
+ 1 < HASHSET_SIZE_N
&& n
> hashset_size
[i
])
660 set
= scm_gc_malloc (sizeof (*set
), "weak-set");
661 set
->entries
= scm_gc_malloc_pointerless (n
* sizeof(scm_t_weak_entry
),
663 memset (set
->entries
, 0, n
* sizeof(scm_t_weak_entry
));
667 set
->upper
= 9 * n
/ 10;
669 set
->min_size_index
= i
;
670 scm_i_pthread_mutex_init (&set
->lock
, NULL
);
672 return scm_cell (scm_tc7_weak_set
, (scm_t_bits
)set
);
676 scm_i_weak_set_print (SCM exp
, SCM port
, scm_print_state
*pstate
)
678 scm_puts_unlocked ("#<", port
);
679 scm_puts_unlocked ("weak-set ", port
);
680 scm_uintprint (SCM_WEAK_SET (exp
)->n_items
, 10, port
);
681 scm_putc_unlocked ('/', port
);
682 scm_uintprint (SCM_WEAK_SET (exp
)->size
, 10, port
);
683 scm_puts_unlocked (">", port
);
687 do_vacuum_weak_set (SCM set
)
691 s
= SCM_WEAK_SET (set
);
693 /* We should always be able to grab this lock, because we are run from
694 a finalizer, which runs in another thread (or an async, which is
695 mostly equivalent). */
696 scm_i_pthread_mutex_lock (&s
->lock
);
698 scm_i_pthread_mutex_unlock (&s
->lock
);
702 scm_c_make_weak_set (unsigned long k
)
706 ret
= make_weak_set (k
);
708 scm_i_register_weak_gc_callback (ret
, do_vacuum_weak_set
);
714 scm_weak_set_p (SCM obj
)
716 return scm_from_bool (SCM_WEAK_SET_P (obj
));
720 scm_weak_set_clear_x (SCM set
)
722 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
724 scm_i_pthread_mutex_lock (&s
->lock
);
726 memset (s
->entries
, 0, sizeof (scm_t_weak_entry
) * s
->size
);
729 scm_i_pthread_mutex_unlock (&s
->lock
);
731 return SCM_UNSPECIFIED
;
735 scm_c_weak_set_lookup (SCM set
, unsigned long raw_hash
,
736 scm_t_set_predicate_fn pred
,
737 void *closure
, SCM dflt
)
740 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
742 scm_i_pthread_mutex_lock (&s
->lock
);
744 ret
= weak_set_lookup (s
, raw_hash
, pred
, closure
, dflt
);
746 scm_i_pthread_mutex_unlock (&s
->lock
);
752 scm_c_weak_set_add_x (SCM set
, unsigned long raw_hash
,
753 scm_t_set_predicate_fn pred
,
754 void *closure
, SCM obj
)
757 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
759 scm_i_pthread_mutex_lock (&s
->lock
);
761 ret
= weak_set_add_x (s
, raw_hash
, pred
, closure
, obj
);
763 scm_i_pthread_mutex_unlock (&s
->lock
);
769 scm_c_weak_set_remove_x (SCM set
, unsigned long raw_hash
,
770 scm_t_set_predicate_fn pred
,
773 scm_t_weak_set
*s
= SCM_WEAK_SET (set
);
775 scm_i_pthread_mutex_lock (&s
->lock
);
777 weak_set_remove_x (s
, raw_hash
, pred
, closure
);
779 scm_i_pthread_mutex_unlock (&s
->lock
);
783 eq_predicate (SCM x
, void *closure
)
785 return scm_is_eq (x
, SCM_PACK_POINTER (closure
));
789 scm_weak_set_add_x (SCM set
, SCM obj
)
791 return scm_c_weak_set_add_x (set
, scm_ihashq (obj
, -1),
792 eq_predicate
, SCM_UNPACK_POINTER (obj
), obj
);
796 scm_weak_set_remove_x (SCM set
, SCM obj
)
798 scm_c_weak_set_remove_x (set
, scm_ihashq (obj
, -1),
799 eq_predicate
, SCM_UNPACK_POINTER (obj
));
801 return SCM_UNSPECIFIED
;
805 scm_c_weak_set_fold (scm_t_set_fold_fn proc
, void *closure
,
809 scm_t_weak_entry
*entries
;
810 unsigned long k
, size
;
812 s
= SCM_WEAK_SET (set
);
814 scm_i_pthread_mutex_lock (&s
->lock
);
817 entries
= s
->entries
;
819 for (k
= 0; k
< size
; k
++)
823 scm_t_weak_entry copy
;
825 copy_weak_entry (&entries
[k
], ©
);
829 /* Release set lock while we call the function. */
830 scm_i_pthread_mutex_unlock (&s
->lock
);
831 init
= proc (closure
, SCM_PACK (copy
.key
), init
);
832 scm_i_pthread_mutex_lock (&s
->lock
);
837 scm_i_pthread_mutex_unlock (&s
->lock
);
843 fold_trampoline (void *closure
, SCM item
, SCM init
)
845 return scm_call_2 (SCM_PACK_POINTER (closure
), item
, init
);
849 scm_weak_set_fold (SCM proc
, SCM init
, SCM set
)
851 return scm_c_weak_set_fold (fold_trampoline
, SCM_UNPACK_POINTER (proc
), init
, set
);
855 for_each_trampoline (void *closure
, SCM item
, SCM seed
)
857 scm_call_1 (SCM_PACK_POINTER (closure
), item
);
862 scm_weak_set_for_each (SCM proc
, SCM set
)
864 scm_c_weak_set_fold (for_each_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_BOOL_F
, set
);
866 return SCM_UNSPECIFIED
;
870 map_trampoline (void *closure
, SCM item
, SCM seed
)
872 return scm_cons (scm_call_1 (SCM_PACK_POINTER (closure
), item
), seed
);
876 scm_weak_set_map_to_list (SCM proc
, SCM set
)
878 return scm_c_weak_set_fold (map_trampoline
, SCM_UNPACK_POINTER (proc
), SCM_EOL
, set
);
885 #include "libguile/weak-set.x"