Commit | Line | Data |
---|---|---|
805e021f CE |
1 | /* |
2 | * Copyright 2000, International Business Machines Corporation and others. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * This software has been released under the terms of the IBM Public | |
6 | * License. For details, see the LICENSE file in the top-level source | |
7 | * directory or online at http://www.openafs.org/dl/license10.html | |
8 | */ | |
9 | ||
10 | #include <afsconfig.h> | |
11 | #include <afs/param.h> | |
12 | ||
13 | ||
14 | #include "afs_atomlist.h" | |
15 | #include "afs_lhash.h" | |
16 | #ifndef KERNEL | |
17 | /* for now, only turn on assertions in user-space code */ | |
18 | #include <assert.h> | |
19 | #define CHECK_INVARIANTS | |
20 | #endif /* !KERNEL */ | |
21 | ||
22 | #ifndef NULL | |
23 | #define NULL 0 | |
24 | #endif | |
25 | ||
26 | /* max hash table load factor */ | |
27 | enum { LOAD_FACTOR = 5 }; | |
28 | ||
29 | struct bucket { | |
30 | struct bucket *next; | |
31 | void *data; | |
32 | unsigned key; | |
33 | }; | |
34 | ||
35 | struct afs_lhash { | |
36 | int (*equal) (const void *a, const void *b); | |
37 | ||
38 | void *(*allocate) (size_t n); | |
39 | ||
40 | void (*deallocate) (void *p, size_t n); | |
41 | ||
42 | size_t p; /* index of next bucket to be split */ | |
43 | size_t maxp; /* upper bound on p during this expansion */ | |
44 | ||
45 | size_t ndata; /* count of data records in hash */ | |
46 | ||
47 | size_t ltable; /* logical table size */ | |
48 | ||
49 | size_t ntable; /* physical table size */ | |
50 | struct bucket **table; | |
51 | ||
52 | afs_atomlist *bucket_list; /* hash bucket allocator */ | |
53 | ||
54 | size_t search_calls; /* cumulative afs_lhash_search() call count */ | |
55 | size_t search_tests; /* cumulative afs_lhash_search() comparison count */ | |
56 | size_t remove_calls; /* cumulative afs_lhash_remove() call count */ | |
57 | size_t remove_tests; /* cumulative afs_lhash_remove() comparison count */ | |
58 | }; | |
59 | ||
60 | /* | |
61 | * make sure the given hash table can accomodate the given index | |
62 | * value; expand the bucket table if necessary | |
63 | * | |
64 | * returns 0 on success, <0 on failure | |
65 | */ | |
66 | ||
67 | static int | |
68 | afs_lhash_accomodate(afs_lhash * lh, size_t max_index) | |
69 | { | |
70 | /* | |
71 | * The usual approach to growing ntable to accomodate max_index | |
72 | * would be to double ntable enough times. This would give us | |
73 | * an exponential backoff in how many times we need to grow | |
74 | * ntable. However, there is a space tradeoff. | |
75 | * | |
76 | * Since afs_lhash may be used in an environment where memory is | |
77 | * constrained, we choose instead to grow ntable in fixed | |
78 | * increments. This may have a larger total cost, but it is | |
79 | * amortized in smaller increments. | |
80 | * | |
81 | * If even this cost is too great, we can consider adopting the | |
82 | * two-level array approach mentioned in the paper. This could | |
83 | * keep the sizes of the allocations more consistent, and also | |
84 | * reduce the amount of data copying. However, we won't do that | |
85 | * until we see real results that show that the benefit of the | |
86 | * additional complexity is worth it. | |
87 | */ | |
88 | enum { ntable_inc = 1024 / sizeof *lh->table }; | |
89 | ||
90 | size_t new_ntable; | |
91 | struct bucket **new_table; | |
92 | size_t i; | |
93 | ||
94 | /* if the given index fits, we're done */ | |
95 | ||
96 | if (max_index < lh->ntable) | |
97 | return 0; | |
98 | ||
99 | /* otherwise, determine new_ntable and allocate new_table */ | |
100 | ||
101 | if (lh->ntable == (size_t) 0) { | |
102 | new_ntable = ntable_inc; | |
103 | } else { | |
104 | new_ntable = lh->ntable; | |
105 | } | |
106 | ||
107 | for (; !(max_index < new_ntable); new_ntable += ntable_inc) | |
108 | continue; | |
109 | ||
110 | new_table = lh->allocate(new_ntable * sizeof *lh->table); | |
111 | if (!new_table) { | |
112 | return -1; | |
113 | } | |
114 | ||
115 | /* initialize new_table from the old table */ | |
116 | ||
117 | for (i = 0; i < lh->ntable; i++) { | |
118 | new_table[i] = lh->table[i]; | |
119 | } | |
120 | for (i = lh->ntable; i < new_ntable; i++) { | |
121 | new_table[i] = 0; | |
122 | } | |
123 | ||
124 | /* | |
125 | * free the old table, if any, and switch to the new table | |
126 | * | |
127 | * (when called from afs_lhash_create(), the table is empty) | |
128 | */ | |
129 | ||
130 | if (lh->table && lh->ntable) { | |
131 | lh->deallocate(lh->table, lh->ntable * sizeof *lh->table); | |
132 | } | |
133 | lh->ntable = new_ntable; | |
134 | lh->table = new_table; | |
135 | ||
136 | return 0; | |
137 | } | |
138 | ||
139 | /* | |
140 | * given a hash table and a key value, returns the index corresponding | |
141 | * to the key value | |
142 | */ | |
143 | ||
144 | static size_t | |
145 | afs_lhash_address(const afs_lhash * lh, unsigned key) | |
146 | { | |
147 | enum { prime = 1048583 }; | |
148 | size_t h; | |
149 | size_t address; | |
150 | ||
151 | h = key % prime; /* 0 <= h < prime */ | |
152 | ||
153 | address = h % lh->maxp; | |
154 | if (address < lh->p) { | |
155 | address = h % (2 * lh->maxp); | |
156 | } | |
157 | ||
158 | return address; | |
159 | } | |
160 | ||
161 | /* | |
162 | * if possible, expand the logical size of the given hash table | |
163 | */ | |
164 | static void | |
165 | afs_lhash_expand(afs_lhash * lh) | |
166 | { | |
167 | size_t old_address; /* index of bucket to split */ | |
168 | size_t new_address; /* index of new bucket */ | |
169 | ||
170 | struct bucket *current_b; /* for scanning down old chain */ | |
171 | struct bucket *previous; | |
172 | ||
173 | struct bucket *last_of_new; /* last element in new chain */ | |
174 | ||
175 | /* save address to split */ | |
176 | ||
177 | old_address = lh->p; | |
178 | ||
179 | /* determine new address, grow table if necessary */ | |
180 | ||
181 | new_address = lh->maxp + lh->p; | |
182 | ||
183 | if (afs_lhash_accomodate(lh, new_address) < 0) { | |
184 | return; | |
185 | } | |
186 | ||
187 | /* adjust the state variables */ | |
188 | ||
189 | /* this makes afs_lhash_address() work with respect | |
190 | * to the expanded table */ | |
191 | ||
192 | lh->p++; | |
193 | if (lh->p == lh->maxp) { | |
194 | lh->maxp *= 2; | |
195 | lh->p = 0; | |
196 | } | |
197 | ||
198 | lh->ltable++; | |
199 | ||
200 | #ifdef CHECK_INVARIANTS | |
201 | assert(lh->ltable - 1 == new_address); | |
202 | assert(lh->ltable <= lh->ntable); | |
203 | #endif /* CHECK_INVARIANTS */ | |
204 | ||
205 | /* relocate records to the new bucket */ | |
206 | ||
207 | current_b = lh->table[old_address]; | |
208 | previous = 0; | |
209 | last_of_new = 0; | |
210 | lh->table[new_address] = 0; | |
211 | ||
212 | while (current_b) { | |
213 | size_t addr; | |
214 | addr = afs_lhash_address(lh, current_b->key); | |
215 | if (addr == new_address) { | |
216 | /* attach it to the end of the new chain */ | |
217 | if (last_of_new) { | |
218 | last_of_new->next = current_b; | |
219 | } else { | |
220 | lh->table[new_address] = current_b; | |
221 | } | |
222 | if (previous) { | |
223 | previous->next = current_b->next; | |
224 | } else { | |
225 | lh->table[old_address] = current_b->next; | |
226 | } | |
227 | last_of_new = current_b; | |
228 | current_b = current_b->next; | |
229 | last_of_new->next = 0; | |
230 | } else { | |
231 | #ifdef CHECK_INVARIANTS | |
232 | assert(addr == old_address); | |
233 | #endif /* CHECK_INVARIANTS */ | |
234 | /* leave it on the old chain */ | |
235 | previous = current_b; | |
236 | current_b = current_b->next; | |
237 | } | |
238 | } | |
239 | } | |
240 | ||
241 | afs_lhash * | |
242 | afs_lhash_create(int (*equal) (const void *a, const void *b), | |
243 | /* returns true if the elements pointed to by | |
244 | * a and b are the same, false otherwise */ | |
245 | void *(*allocate) (size_t n), void (*deallocate) (void *p, | |
246 | size_t n) | |
247 | ) | |
248 | { | |
249 | afs_lhash *lh; | |
250 | ||
251 | lh = allocate(sizeof *lh); | |
252 | if (!lh) | |
253 | return (afs_lhash *) 0; | |
254 | ||
255 | lh->equal = equal; | |
256 | lh->allocate = allocate; | |
257 | lh->deallocate = deallocate; | |
258 | ||
259 | lh->p = 0; | |
260 | lh->maxp = 16; | |
261 | ||
262 | lh->ltable = lh->maxp; | |
263 | ||
264 | lh->ndata = 0; | |
265 | ||
266 | lh->ntable = 0; | |
267 | lh->table = NULL; | |
268 | ||
269 | if (afs_lhash_accomodate(lh, lh->ltable - 1) < 0) { | |
270 | lh->deallocate(lh, sizeof *lh); | |
271 | return (afs_lhash *) 0; | |
272 | } | |
273 | #ifdef CHECK_INVARIANTS | |
274 | assert(lh->ltable <= lh->ntable); | |
275 | #endif /* CHECK_INVARIANTS */ | |
276 | ||
277 | /* maybe the chunk size should be passed in for hashes, so we | |
278 | * can pass it down here */ | |
279 | ||
280 | lh->bucket_list = | |
281 | afs_atomlist_create(sizeof(struct bucket), sizeof(long) * 1024, | |
282 | allocate, deallocate); | |
283 | #ifdef CHECK_INVARIANTS | |
284 | assert(lh->bucket_list); | |
285 | #endif /* CHECK_INVARIANTS */ | |
286 | ||
287 | lh->search_calls = 0; | |
288 | lh->search_tests = 0; | |
289 | lh->remove_calls = 0; | |
290 | lh->remove_tests = 0; | |
291 | ||
292 | return lh; | |
293 | } | |
294 | ||
295 | void | |
296 | afs_lhash_destroy(afs_lhash * lh) | |
297 | { | |
298 | #ifdef CHECK_INVARIANTS | |
299 | assert(lh->ltable <= lh->ntable); | |
300 | #endif /* CHECK_INVARIANTS */ | |
301 | ||
302 | /* | |
303 | * first, free the buckets | |
304 | * | |
305 | * afs_atomlist_destroy() implicitly frees all the memory allocated | |
306 | * from the given afs_atomlist, so there is no need to go through | |
307 | * the hash table to explicitly free each bucket. | |
308 | */ | |
309 | ||
310 | afs_atomlist_destroy(lh->bucket_list); | |
311 | ||
312 | /* then, free the table and the afs_lhash */ | |
313 | ||
314 | lh->deallocate(lh->table, lh->ntable * sizeof *lh->table); | |
315 | lh->deallocate(lh, sizeof *lh); | |
316 | } | |
317 | ||
318 | void | |
319 | afs_lhash_iter(afs_lhash * lh, | |
320 | void (*f) (size_t index, unsigned key, void *data) | |
321 | ) | |
322 | { | |
323 | size_t i; | |
324 | ||
325 | #ifdef CHECK_INVARIANTS | |
326 | assert(lh->ltable <= lh->ntable); | |
327 | #endif /* CHECK_INVARIANTS */ | |
328 | ||
329 | for (i = 0; i < lh->ltable; i++) { | |
330 | struct bucket *current_b; | |
331 | ||
332 | for (current_b = lh->table[i]; current_b; current_b = current_b->next) { | |
333 | f(i, current_b->key, current_b->data); | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | void * | |
339 | afs_lhash_search(afs_lhash * lh, unsigned key, const void *data) | |
340 | { | |
341 | size_t k; | |
342 | struct bucket *previous; | |
343 | struct bucket *current_b; | |
344 | ||
345 | lh->search_calls++; | |
346 | ||
347 | k = afs_lhash_address(lh, key); | |
348 | for (previous = 0, current_b = lh->table[k]; current_b; | |
349 | previous = current_b, current_b = current_b->next) { | |
350 | lh->search_tests++; | |
351 | if (lh->equal(data, current_b->data)) { | |
352 | ||
353 | /* | |
354 | * Since we found what we were looking for, move | |
355 | * the bucket to the head of the chain. The | |
356 | * theory is that unused hash table entries will | |
357 | * be left at the end of the chain, where they | |
358 | * will not cause search times to increase. | |
359 | * | |
360 | * This optimization is both easy to understand | |
361 | * and cheap to execute, so we go ahead and do | |
362 | * it. | |
363 | * | |
364 | */ | |
365 | ||
366 | if (previous) { | |
367 | previous->next = current_b->next; | |
368 | current_b->next = lh->table[k]; | |
369 | lh->table[k] = current_b; | |
370 | } | |
371 | ||
372 | return current_b->data; | |
373 | } | |
374 | } | |
375 | ||
376 | return 0; | |
377 | } | |
378 | ||
379 | void * | |
380 | afs_lhash_rosearch(const afs_lhash * lh, unsigned key, const void *data) | |
381 | { | |
382 | size_t k; | |
383 | struct bucket *current_b; | |
384 | ||
385 | k = afs_lhash_address(lh, key); | |
386 | for (current_b = lh->table[k]; current_b; current_b = current_b->next) { | |
387 | if (lh->equal(data, current_b->data)) { | |
388 | return current_b->data; | |
389 | } | |
390 | } | |
391 | ||
392 | return 0; | |
393 | } | |
394 | ||
395 | void * | |
396 | afs_lhash_remove(afs_lhash * lh, unsigned key, const void *data) | |
397 | { | |
398 | size_t k; | |
399 | struct bucket **pprev; | |
400 | struct bucket *cur; | |
401 | ||
402 | lh->remove_calls++; | |
403 | ||
404 | k = afs_lhash_address(lh, key); | |
405 | for (pprev = 0, cur = lh->table[k]; cur; | |
406 | pprev = &cur->next, cur = cur->next) { | |
407 | lh->remove_tests++; | |
408 | if (lh->equal(data, cur->data)) { | |
409 | void *ptr = cur->data; | |
410 | ||
411 | if (pprev) { | |
412 | *pprev = cur->next; | |
413 | } else { | |
414 | lh->table[k] = cur->next; | |
415 | } | |
416 | ||
417 | afs_atomlist_put(lh->bucket_list, cur); | |
418 | ||
419 | lh->ndata--; | |
420 | ||
421 | return ptr; | |
422 | } | |
423 | } | |
424 | ||
425 | return 0; | |
426 | } | |
427 | ||
428 | int | |
429 | afs_lhash_enter(afs_lhash * lh, unsigned key, void *data) | |
430 | { | |
431 | size_t k; | |
432 | struct bucket *buck; | |
433 | ||
434 | buck = afs_atomlist_get(lh->bucket_list); | |
435 | if (!buck) { | |
436 | return -1; | |
437 | } | |
438 | ||
439 | buck->key = key; | |
440 | buck->data = data; | |
441 | ||
442 | k = afs_lhash_address(lh, key); | |
443 | ||
444 | buck->next = lh->table[k]; | |
445 | lh->table[k] = buck; | |
446 | ||
447 | lh->ndata++; | |
448 | ||
449 | /* | |
450 | * The load factor is the number of data items in the hash | |
451 | * table divided by the logical table length. We expand the | |
452 | * table when the load factor exceeds a predetermined limit | |
453 | * (LOAD_FACTOR). To avoid floating point, we multiply both | |
454 | * sides of the inequality by the logical table length... | |
455 | */ | |
456 | if (lh->ndata > LOAD_FACTOR * lh->ltable) { | |
457 | afs_lhash_expand(lh); | |
458 | #if 0 | |
459 | printf("lh->p = %d; lh->maxp = %d\n", lh->p, lh->maxp); | |
460 | #endif | |
461 | } | |
462 | #ifdef CHECK_INVARIANTS | |
463 | assert(lh->ndata <= LOAD_FACTOR * lh->ltable); | |
464 | #endif /* CHECK_INVARIANTS */ | |
465 | ||
466 | return 0; | |
467 | } | |
468 | ||
469 | int | |
470 | afs_lhash_stat(afs_lhash * lh, struct afs_lhash_stat *sb) | |
471 | { | |
472 | size_t k; | |
473 | ||
474 | int min_chain_length = -1; | |
475 | int max_chain_length = -1; | |
476 | size_t buckets = lh->ltable; | |
477 | size_t records = 0; | |
478 | ||
479 | if (!sb) { | |
480 | return -1; | |
481 | } | |
482 | #ifdef CHECK_INVARIANTS | |
483 | assert(lh->ltable <= lh->ntable); | |
484 | #endif /* CHECK_INVARIANTS */ | |
485 | ||
486 | for (k = 0; k < lh->ltable; k++) { | |
487 | struct bucket *buck; | |
488 | int chain_length = 0; | |
489 | ||
490 | for (buck = lh->table[k]; buck; buck = buck->next) { | |
491 | chain_length++; | |
492 | records++; | |
493 | } | |
494 | ||
495 | if (min_chain_length == -1) { | |
496 | min_chain_length = chain_length; | |
497 | } | |
498 | ||
499 | if (max_chain_length == -1) { | |
500 | max_chain_length = chain_length; | |
501 | } | |
502 | ||
503 | if (chain_length < min_chain_length) { | |
504 | min_chain_length = chain_length; | |
505 | } | |
506 | ||
507 | if (max_chain_length < chain_length) { | |
508 | max_chain_length = chain_length; | |
509 | } | |
510 | } | |
511 | ||
512 | sb->min_chain_length = min_chain_length; | |
513 | sb->max_chain_length = max_chain_length; | |
514 | sb->buckets = buckets; | |
515 | sb->records = records; | |
516 | ||
517 | #ifdef CHECK_INVARIANTS | |
518 | assert(lh->ndata == records); | |
519 | #endif /* CHECK_INVARIANTS */ | |
520 | ||
521 | sb->search_calls = lh->search_calls; | |
522 | sb->search_tests = lh->search_tests; | |
523 | sb->remove_calls = lh->remove_calls; | |
524 | sb->remove_tests = lh->remove_tests; | |
525 | ||
526 | return 0; | |
527 | } |