backport to buster
[hcoop/debian/openafs.git] / src / util / afs_lhash.c
CommitLineData
805e021f
CE
1/*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
9
10#include <afsconfig.h>
11#include <afs/param.h>
12
13
14#include "afs_atomlist.h"
15#include "afs_lhash.h"
16#ifndef KERNEL
17/* for now, only turn on assertions in user-space code */
18#include <assert.h>
19#define CHECK_INVARIANTS
20#endif /* !KERNEL */
21
22#ifndef NULL
23#define NULL 0
24#endif
25
26/* max hash table load factor */
27enum { LOAD_FACTOR = 5 };
28
29struct bucket {
30 struct bucket *next;
31 void *data;
32 unsigned key;
33};
34
35struct afs_lhash {
36 int (*equal) (const void *a, const void *b);
37
38 void *(*allocate) (size_t n);
39
40 void (*deallocate) (void *p, size_t n);
41
42 size_t p; /* index of next bucket to be split */
43 size_t maxp; /* upper bound on p during this expansion */
44
45 size_t ndata; /* count of data records in hash */
46
47 size_t ltable; /* logical table size */
48
49 size_t ntable; /* physical table size */
50 struct bucket **table;
51
52 afs_atomlist *bucket_list; /* hash bucket allocator */
53
54 size_t search_calls; /* cumulative afs_lhash_search() call count */
55 size_t search_tests; /* cumulative afs_lhash_search() comparison count */
56 size_t remove_calls; /* cumulative afs_lhash_remove() call count */
57 size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */
58};
59
60/*
61 * make sure the given hash table can accomodate the given index
62 * value; expand the bucket table if necessary
63 *
64 * returns 0 on success, <0 on failure
65 */
66
67static int
68afs_lhash_accomodate(afs_lhash * lh, size_t max_index)
69{
70 /*
71 * The usual approach to growing ntable to accomodate max_index
72 * would be to double ntable enough times. This would give us
73 * an exponential backoff in how many times we need to grow
74 * ntable. However, there is a space tradeoff.
75 *
76 * Since afs_lhash may be used in an environment where memory is
77 * constrained, we choose instead to grow ntable in fixed
78 * increments. This may have a larger total cost, but it is
79 * amortized in smaller increments.
80 *
81 * If even this cost is too great, we can consider adopting the
82 * two-level array approach mentioned in the paper. This could
83 * keep the sizes of the allocations more consistent, and also
84 * reduce the amount of data copying. However, we won't do that
85 * until we see real results that show that the benefit of the
86 * additional complexity is worth it.
87 */
88 enum { ntable_inc = 1024 / sizeof *lh->table };
89
90 size_t new_ntable;
91 struct bucket **new_table;
92 size_t i;
93
94 /* if the given index fits, we're done */
95
96 if (max_index < lh->ntable)
97 return 0;
98
99 /* otherwise, determine new_ntable and allocate new_table */
100
101 if (lh->ntable == (size_t) 0) {
102 new_ntable = ntable_inc;
103 } else {
104 new_ntable = lh->ntable;
105 }
106
107 for (; !(max_index < new_ntable); new_ntable += ntable_inc)
108 continue;
109
110 new_table = lh->allocate(new_ntable * sizeof *lh->table);
111 if (!new_table) {
112 return -1;
113 }
114
115 /* initialize new_table from the old table */
116
117 for (i = 0; i < lh->ntable; i++) {
118 new_table[i] = lh->table[i];
119 }
120 for (i = lh->ntable; i < new_ntable; i++) {
121 new_table[i] = 0;
122 }
123
124 /*
125 * free the old table, if any, and switch to the new table
126 *
127 * (when called from afs_lhash_create(), the table is empty)
128 */
129
130 if (lh->table && lh->ntable) {
131 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
132 }
133 lh->ntable = new_ntable;
134 lh->table = new_table;
135
136 return 0;
137}
138
139/*
140 * given a hash table and a key value, returns the index corresponding
141 * to the key value
142 */
143
144static size_t
145afs_lhash_address(const afs_lhash * lh, unsigned key)
146{
147 enum { prime = 1048583 };
148 size_t h;
149 size_t address;
150
151 h = key % prime; /* 0 <= h < prime */
152
153 address = h % lh->maxp;
154 if (address < lh->p) {
155 address = h % (2 * lh->maxp);
156 }
157
158 return address;
159}
160
161/*
162 * if possible, expand the logical size of the given hash table
163 */
164static void
165afs_lhash_expand(afs_lhash * lh)
166{
167 size_t old_address; /* index of bucket to split */
168 size_t new_address; /* index of new bucket */
169
170 struct bucket *current_b; /* for scanning down old chain */
171 struct bucket *previous;
172
173 struct bucket *last_of_new; /* last element in new chain */
174
175 /* save address to split */
176
177 old_address = lh->p;
178
179 /* determine new address, grow table if necessary */
180
181 new_address = lh->maxp + lh->p;
182
183 if (afs_lhash_accomodate(lh, new_address) < 0) {
184 return;
185 }
186
187 /* adjust the state variables */
188
189 /* this makes afs_lhash_address() work with respect
190 * to the expanded table */
191
192 lh->p++;
193 if (lh->p == lh->maxp) {
194 lh->maxp *= 2;
195 lh->p = 0;
196 }
197
198 lh->ltable++;
199
200#ifdef CHECK_INVARIANTS
201 assert(lh->ltable - 1 == new_address);
202 assert(lh->ltable <= lh->ntable);
203#endif /* CHECK_INVARIANTS */
204
205 /* relocate records to the new bucket */
206
207 current_b = lh->table[old_address];
208 previous = 0;
209 last_of_new = 0;
210 lh->table[new_address] = 0;
211
212 while (current_b) {
213 size_t addr;
214 addr = afs_lhash_address(lh, current_b->key);
215 if (addr == new_address) {
216 /* attach it to the end of the new chain */
217 if (last_of_new) {
218 last_of_new->next = current_b;
219 } else {
220 lh->table[new_address] = current_b;
221 }
222 if (previous) {
223 previous->next = current_b->next;
224 } else {
225 lh->table[old_address] = current_b->next;
226 }
227 last_of_new = current_b;
228 current_b = current_b->next;
229 last_of_new->next = 0;
230 } else {
231#ifdef CHECK_INVARIANTS
232 assert(addr == old_address);
233#endif /* CHECK_INVARIANTS */
234 /* leave it on the old chain */
235 previous = current_b;
236 current_b = current_b->next;
237 }
238 }
239}
240
241afs_lhash *
242afs_lhash_create(int (*equal) (const void *a, const void *b),
243 /* returns true if the elements pointed to by
244 * a and b are the same, false otherwise */
245 void *(*allocate) (size_t n), void (*deallocate) (void *p,
246 size_t n)
247 )
248{
249 afs_lhash *lh;
250
251 lh = allocate(sizeof *lh);
252 if (!lh)
253 return (afs_lhash *) 0;
254
255 lh->equal = equal;
256 lh->allocate = allocate;
257 lh->deallocate = deallocate;
258
259 lh->p = 0;
260 lh->maxp = 16;
261
262 lh->ltable = lh->maxp;
263
264 lh->ndata = 0;
265
266 lh->ntable = 0;
267 lh->table = NULL;
268
269 if (afs_lhash_accomodate(lh, lh->ltable - 1) < 0) {
270 lh->deallocate(lh, sizeof *lh);
271 return (afs_lhash *) 0;
272 }
273#ifdef CHECK_INVARIANTS
274 assert(lh->ltable <= lh->ntable);
275#endif /* CHECK_INVARIANTS */
276
277 /* maybe the chunk size should be passed in for hashes, so we
278 * can pass it down here */
279
280 lh->bucket_list =
281 afs_atomlist_create(sizeof(struct bucket), sizeof(long) * 1024,
282 allocate, deallocate);
283#ifdef CHECK_INVARIANTS
284 assert(lh->bucket_list);
285#endif /* CHECK_INVARIANTS */
286
287 lh->search_calls = 0;
288 lh->search_tests = 0;
289 lh->remove_calls = 0;
290 lh->remove_tests = 0;
291
292 return lh;
293}
294
295void
296afs_lhash_destroy(afs_lhash * lh)
297{
298#ifdef CHECK_INVARIANTS
299 assert(lh->ltable <= lh->ntable);
300#endif /* CHECK_INVARIANTS */
301
302 /*
303 * first, free the buckets
304 *
305 * afs_atomlist_destroy() implicitly frees all the memory allocated
306 * from the given afs_atomlist, so there is no need to go through
307 * the hash table to explicitly free each bucket.
308 */
309
310 afs_atomlist_destroy(lh->bucket_list);
311
312 /* then, free the table and the afs_lhash */
313
314 lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
315 lh->deallocate(lh, sizeof *lh);
316}
317
318void
319afs_lhash_iter(afs_lhash * lh,
320 void (*f) (size_t index, unsigned key, void *data)
321 )
322{
323 size_t i;
324
325#ifdef CHECK_INVARIANTS
326 assert(lh->ltable <= lh->ntable);
327#endif /* CHECK_INVARIANTS */
328
329 for (i = 0; i < lh->ltable; i++) {
330 struct bucket *current_b;
331
332 for (current_b = lh->table[i]; current_b; current_b = current_b->next) {
333 f(i, current_b->key, current_b->data);
334 }
335 }
336}
337
338void *
339afs_lhash_search(afs_lhash * lh, unsigned key, const void *data)
340{
341 size_t k;
342 struct bucket *previous;
343 struct bucket *current_b;
344
345 lh->search_calls++;
346
347 k = afs_lhash_address(lh, key);
348 for (previous = 0, current_b = lh->table[k]; current_b;
349 previous = current_b, current_b = current_b->next) {
350 lh->search_tests++;
351 if (lh->equal(data, current_b->data)) {
352
353 /*
354 * Since we found what we were looking for, move
355 * the bucket to the head of the chain. The
356 * theory is that unused hash table entries will
357 * be left at the end of the chain, where they
358 * will not cause search times to increase.
359 *
360 * This optimization is both easy to understand
361 * and cheap to execute, so we go ahead and do
362 * it.
363 *
364 */
365
366 if (previous) {
367 previous->next = current_b->next;
368 current_b->next = lh->table[k];
369 lh->table[k] = current_b;
370 }
371
372 return current_b->data;
373 }
374 }
375
376 return 0;
377}
378
379void *
380afs_lhash_rosearch(const afs_lhash * lh, unsigned key, const void *data)
381{
382 size_t k;
383 struct bucket *current_b;
384
385 k = afs_lhash_address(lh, key);
386 for (current_b = lh->table[k]; current_b; current_b = current_b->next) {
387 if (lh->equal(data, current_b->data)) {
388 return current_b->data;
389 }
390 }
391
392 return 0;
393}
394
395void *
396afs_lhash_remove(afs_lhash * lh, unsigned key, const void *data)
397{
398 size_t k;
399 struct bucket **pprev;
400 struct bucket *cur;
401
402 lh->remove_calls++;
403
404 k = afs_lhash_address(lh, key);
405 for (pprev = 0, cur = lh->table[k]; cur;
406 pprev = &cur->next, cur = cur->next) {
407 lh->remove_tests++;
408 if (lh->equal(data, cur->data)) {
409 void *ptr = cur->data;
410
411 if (pprev) {
412 *pprev = cur->next;
413 } else {
414 lh->table[k] = cur->next;
415 }
416
417 afs_atomlist_put(lh->bucket_list, cur);
418
419 lh->ndata--;
420
421 return ptr;
422 }
423 }
424
425 return 0;
426}
427
428int
429afs_lhash_enter(afs_lhash * lh, unsigned key, void *data)
430{
431 size_t k;
432 struct bucket *buck;
433
434 buck = afs_atomlist_get(lh->bucket_list);
435 if (!buck) {
436 return -1;
437 }
438
439 buck->key = key;
440 buck->data = data;
441
442 k = afs_lhash_address(lh, key);
443
444 buck->next = lh->table[k];
445 lh->table[k] = buck;
446
447 lh->ndata++;
448
449 /*
450 * The load factor is the number of data items in the hash
451 * table divided by the logical table length. We expand the
452 * table when the load factor exceeds a predetermined limit
453 * (LOAD_FACTOR). To avoid floating point, we multiply both
454 * sides of the inequality by the logical table length...
455 */
456 if (lh->ndata > LOAD_FACTOR * lh->ltable) {
457 afs_lhash_expand(lh);
458#if 0
459 printf("lh->p = %d; lh->maxp = %d\n", lh->p, lh->maxp);
460#endif
461 }
462#ifdef CHECK_INVARIANTS
463 assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
464#endif /* CHECK_INVARIANTS */
465
466 return 0;
467}
468
469int
470afs_lhash_stat(afs_lhash * lh, struct afs_lhash_stat *sb)
471{
472 size_t k;
473
474 int min_chain_length = -1;
475 int max_chain_length = -1;
476 size_t buckets = lh->ltable;
477 size_t records = 0;
478
479 if (!sb) {
480 return -1;
481 }
482#ifdef CHECK_INVARIANTS
483 assert(lh->ltable <= lh->ntable);
484#endif /* CHECK_INVARIANTS */
485
486 for (k = 0; k < lh->ltable; k++) {
487 struct bucket *buck;
488 int chain_length = 0;
489
490 for (buck = lh->table[k]; buck; buck = buck->next) {
491 chain_length++;
492 records++;
493 }
494
495 if (min_chain_length == -1) {
496 min_chain_length = chain_length;
497 }
498
499 if (max_chain_length == -1) {
500 max_chain_length = chain_length;
501 }
502
503 if (chain_length < min_chain_length) {
504 min_chain_length = chain_length;
505 }
506
507 if (max_chain_length < chain_length) {
508 max_chain_length = chain_length;
509 }
510 }
511
512 sb->min_chain_length = min_chain_length;
513 sb->max_chain_length = max_chain_length;
514 sb->buckets = buckets;
515 sb->records = records;
516
517#ifdef CHECK_INVARIANTS
518 assert(lh->ndata == records);
519#endif /* CHECK_INVARIANTS */
520
521 sb->search_calls = lh->search_calls;
522 sb->search_tests = lh->search_tests;
523 sb->remove_calls = lh->remove_calls;
524 sb->remove_tests = lh->remove_tests;
525
526 return 0;
527}