2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
14 #include "afs_atomlist.h"
15 #include "afs_lhash.h"
17 /* for now, only turn on assertions in user-space code */
19 #define CHECK_INVARIANTS
26 /* max hash table load factor */
27 enum { LOAD_FACTOR
= 5 };
36 int (*equal
) (const void *a
, const void *b
);
38 void *(*allocate
) (size_t n
);
40 void (*deallocate
) (void *p
, size_t n
);
42 size_t p
; /* index of next bucket to be split */
43 size_t maxp
; /* upper bound on p during this expansion */
45 size_t ndata
; /* count of data records in hash */
47 size_t ltable
; /* logical table size */
49 size_t ntable
; /* physical table size */
50 struct bucket
**table
;
52 afs_atomlist
*bucket_list
; /* hash bucket allocator */
54 size_t search_calls
; /* cumulative afs_lhash_search() call count */
55 size_t search_tests
; /* cumulative afs_lhash_search() comparison count */
56 size_t remove_calls
; /* cumulative afs_lhash_remove() call count */
57 size_t remove_tests
; /* cumulative afs_lhash_remove() comparison count */
61 * make sure the given hash table can accomodate the given index
62 * value; expand the bucket table if necessary
64 * returns 0 on success, <0 on failure
68 afs_lhash_accomodate(afs_lhash
* lh
, size_t max_index
)
71 * The usual approach to growing ntable to accomodate max_index
72 * would be to double ntable enough times. This would give us
73 * an exponential backoff in how many times we need to grow
74 * ntable. However, there is a space tradeoff.
76 * Since afs_lhash may be used in an environment where memory is
77 * constrained, we choose instead to grow ntable in fixed
78 * increments. This may have a larger total cost, but it is
79 * amortized in smaller increments.
81 * If even this cost is too great, we can consider adopting the
82 * two-level array approach mentioned in the paper. This could
83 * keep the sizes of the allocations more consistent, and also
84 * reduce the amount of data copying. However, we won't do that
85 * until we see real results that show that the benefit of the
86 * additional complexity is worth it.
88 enum { ntable_inc
= 1024 / sizeof *lh
->table
};
91 struct bucket
**new_table
;
94 /* if the given index fits, we're done */
96 if (max_index
< lh
->ntable
)
99 /* otherwise, determine new_ntable and allocate new_table */
101 if (lh
->ntable
== (size_t) 0) {
102 new_ntable
= ntable_inc
;
104 new_ntable
= lh
->ntable
;
107 for (; !(max_index
< new_ntable
); new_ntable
+= ntable_inc
)
110 new_table
= lh
->allocate(new_ntable
* sizeof *lh
->table
);
115 /* initialize new_table from the old table */
117 for (i
= 0; i
< lh
->ntable
; i
++) {
118 new_table
[i
] = lh
->table
[i
];
120 for (i
= lh
->ntable
; i
< new_ntable
; i
++) {
125 * free the old table, if any, and switch to the new table
127 * (when called from afs_lhash_create(), the table is empty)
130 if (lh
->table
&& lh
->ntable
) {
131 lh
->deallocate(lh
->table
, lh
->ntable
* sizeof *lh
->table
);
133 lh
->ntable
= new_ntable
;
134 lh
->table
= new_table
;
140 * given a hash table and a key value, returns the index corresponding
145 afs_lhash_address(const afs_lhash
* lh
, unsigned key
)
147 enum { prime
= 1048583 };
151 h
= key
% prime
; /* 0 <= h < prime */
153 address
= h
% lh
->maxp
;
154 if (address
< lh
->p
) {
155 address
= h
% (2 * lh
->maxp
);
162 * if possible, expand the logical size of the given hash table
165 afs_lhash_expand(afs_lhash
* lh
)
167 size_t old_address
; /* index of bucket to split */
168 size_t new_address
; /* index of new bucket */
170 struct bucket
*current_b
; /* for scanning down old chain */
171 struct bucket
*previous
;
173 struct bucket
*last_of_new
; /* last element in new chain */
175 /* save address to split */
179 /* determine new address, grow table if necessary */
181 new_address
= lh
->maxp
+ lh
->p
;
183 if (afs_lhash_accomodate(lh
, new_address
) < 0) {
187 /* adjust the state variables */
189 /* this makes afs_lhash_address() work with respect
190 * to the expanded table */
193 if (lh
->p
== lh
->maxp
) {
200 #ifdef CHECK_INVARIANTS
201 assert(lh
->ltable
- 1 == new_address
);
202 assert(lh
->ltable
<= lh
->ntable
);
203 #endif /* CHECK_INVARIANTS */
205 /* relocate records to the new bucket */
207 current_b
= lh
->table
[old_address
];
210 lh
->table
[new_address
] = 0;
214 addr
= afs_lhash_address(lh
, current_b
->key
);
215 if (addr
== new_address
) {
216 /* attach it to the end of the new chain */
218 last_of_new
->next
= current_b
;
220 lh
->table
[new_address
] = current_b
;
223 previous
->next
= current_b
->next
;
225 lh
->table
[old_address
] = current_b
->next
;
227 last_of_new
= current_b
;
228 current_b
= current_b
->next
;
229 last_of_new
->next
= 0;
231 #ifdef CHECK_INVARIANTS
232 assert(addr
== old_address
);
233 #endif /* CHECK_INVARIANTS */
234 /* leave it on the old chain */
235 previous
= current_b
;
236 current_b
= current_b
->next
;
242 afs_lhash_create(int (*equal
) (const void *a
, const void *b
),
243 /* returns true if the elements pointed to by
244 * a and b are the same, false otherwise */
245 void *(*allocate
) (size_t n
), void (*deallocate
) (void *p
,
251 lh
= allocate(sizeof *lh
);
253 return (afs_lhash
*) 0;
256 lh
->allocate
= allocate
;
257 lh
->deallocate
= deallocate
;
262 lh
->ltable
= lh
->maxp
;
269 if (afs_lhash_accomodate(lh
, lh
->ltable
- 1) < 0) {
270 lh
->deallocate(lh
, sizeof *lh
);
271 return (afs_lhash
*) 0;
273 #ifdef CHECK_INVARIANTS
274 assert(lh
->ltable
<= lh
->ntable
);
275 #endif /* CHECK_INVARIANTS */
277 /* maybe the chunk size should be passed in for hashes, so we
278 * can pass it down here */
281 afs_atomlist_create(sizeof(struct bucket
), sizeof(long) * 1024,
282 allocate
, deallocate
);
283 #ifdef CHECK_INVARIANTS
284 assert(lh
->bucket_list
);
285 #endif /* CHECK_INVARIANTS */
287 lh
->search_calls
= 0;
288 lh
->search_tests
= 0;
289 lh
->remove_calls
= 0;
290 lh
->remove_tests
= 0;
296 afs_lhash_destroy(afs_lhash
* lh
)
298 #ifdef CHECK_INVARIANTS
299 assert(lh
->ltable
<= lh
->ntable
);
300 #endif /* CHECK_INVARIANTS */
303 * first, free the buckets
305 * afs_atomlist_destroy() implicitly frees all the memory allocated
306 * from the given afs_atomlist, so there is no need to go through
307 * the hash table to explicitly free each bucket.
310 afs_atomlist_destroy(lh
->bucket_list
);
312 /* then, free the table and the afs_lhash */
314 lh
->deallocate(lh
->table
, lh
->ntable
* sizeof *lh
->table
);
315 lh
->deallocate(lh
, sizeof *lh
);
319 afs_lhash_iter(afs_lhash
* lh
,
320 void (*f
) (size_t index
, unsigned key
, void *data
)
325 #ifdef CHECK_INVARIANTS
326 assert(lh
->ltable
<= lh
->ntable
);
327 #endif /* CHECK_INVARIANTS */
329 for (i
= 0; i
< lh
->ltable
; i
++) {
330 struct bucket
*current_b
;
332 for (current_b
= lh
->table
[i
]; current_b
; current_b
= current_b
->next
) {
333 f(i
, current_b
->key
, current_b
->data
);
339 afs_lhash_search(afs_lhash
* lh
, unsigned key
, const void *data
)
342 struct bucket
*previous
;
343 struct bucket
*current_b
;
347 k
= afs_lhash_address(lh
, key
);
348 for (previous
= 0, current_b
= lh
->table
[k
]; current_b
;
349 previous
= current_b
, current_b
= current_b
->next
) {
351 if (lh
->equal(data
, current_b
->data
)) {
354 * Since we found what we were looking for, move
355 * the bucket to the head of the chain. The
356 * theory is that unused hash table entries will
357 * be left at the end of the chain, where they
358 * will not cause search times to increase.
360 * This optimization is both easy to understand
361 * and cheap to execute, so we go ahead and do
367 previous
->next
= current_b
->next
;
368 current_b
->next
= lh
->table
[k
];
369 lh
->table
[k
] = current_b
;
372 return current_b
->data
;
380 afs_lhash_rosearch(const afs_lhash
* lh
, unsigned key
, const void *data
)
383 struct bucket
*current_b
;
385 k
= afs_lhash_address(lh
, key
);
386 for (current_b
= lh
->table
[k
]; current_b
; current_b
= current_b
->next
) {
387 if (lh
->equal(data
, current_b
->data
)) {
388 return current_b
->data
;
396 afs_lhash_remove(afs_lhash
* lh
, unsigned key
, const void *data
)
399 struct bucket
**pprev
;
404 k
= afs_lhash_address(lh
, key
);
405 for (pprev
= 0, cur
= lh
->table
[k
]; cur
;
406 pprev
= &cur
->next
, cur
= cur
->next
) {
408 if (lh
->equal(data
, cur
->data
)) {
409 void *ptr
= cur
->data
;
414 lh
->table
[k
] = cur
->next
;
417 afs_atomlist_put(lh
->bucket_list
, cur
);
429 afs_lhash_enter(afs_lhash
* lh
, unsigned key
, void *data
)
434 buck
= afs_atomlist_get(lh
->bucket_list
);
442 k
= afs_lhash_address(lh
, key
);
444 buck
->next
= lh
->table
[k
];
450 * The load factor is the number of data items in the hash
451 * table divided by the logical table length. We expand the
452 * table when the load factor exceeds a predetermined limit
453 * (LOAD_FACTOR). To avoid floating point, we multiply both
454 * sides of the inequality by the logical table length...
456 if (lh
->ndata
> LOAD_FACTOR
* lh
->ltable
) {
457 afs_lhash_expand(lh
);
459 printf("lh->p = %d; lh->maxp = %d\n", lh
->p
, lh
->maxp
);
462 #ifdef CHECK_INVARIANTS
463 assert(lh
->ndata
<= LOAD_FACTOR
* lh
->ltable
);
464 #endif /* CHECK_INVARIANTS */
470 afs_lhash_stat(afs_lhash
* lh
, struct afs_lhash_stat
*sb
)
474 int min_chain_length
= -1;
475 int max_chain_length
= -1;
476 size_t buckets
= lh
->ltable
;
482 #ifdef CHECK_INVARIANTS
483 assert(lh
->ltable
<= lh
->ntable
);
484 #endif /* CHECK_INVARIANTS */
486 for (k
= 0; k
< lh
->ltable
; k
++) {
488 int chain_length
= 0;
490 for (buck
= lh
->table
[k
]; buck
; buck
= buck
->next
) {
495 if (min_chain_length
== -1) {
496 min_chain_length
= chain_length
;
499 if (max_chain_length
== -1) {
500 max_chain_length
= chain_length
;
503 if (chain_length
< min_chain_length
) {
504 min_chain_length
= chain_length
;
507 if (max_chain_length
< chain_length
) {
508 max_chain_length
= chain_length
;
512 sb
->min_chain_length
= min_chain_length
;
513 sb
->max_chain_length
= max_chain_length
;
514 sb
->buckets
= buckets
;
515 sb
->records
= records
;
517 #ifdef CHECK_INVARIANTS
518 assert(lh
->ndata
== records
);
519 #endif /* CHECK_INVARIANTS */
521 sb
->search_calls
= lh
->search_calls
;
522 sb
->search_tests
= lh
->search_tests
;
523 sb
->remove_calls
= lh
->remove_calls
;
524 sb
->remove_tests
= lh
->remove_tests
;