src/util/afs_lhash.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 #include <afsconfig.h>
  11 #include <afs/param.h>
  12
  13
  14 #include "afs_atomlist.h"
  15 #include "afs_lhash.h"
  16 #ifndef KERNEL
  17 /* for now, only turn on assertions in user-space code */
  18 #include <assert.h>
  19 #define CHECK_INVARIANTS
  20 #endif /* !KERNEL */
  21
  22 #ifndef NULL
  23 #define NULL 0
  24 #endif
  25
  26 /* max hash table load factor */
  27 enum { LOAD_FACTOR = 5 };
  28
  29 struct bucket {
  30     struct bucket *next;
  31     void *data;
  32     unsigned key;
  33 };
  34
  35 struct afs_lhash {
  36     int (*equal) (const void *a, const void *b);
  37
  38     void *(*allocate) (size_t n);
  39
  40     void (*deallocate) (void *p, size_t n);
  41
  42     size_t p;                   /* index of next bucket to be split */
  43     size_t maxp;                /* upper bound on p during this expansion */
  44
  45     size_t ndata;               /* count of data records in hash */
  46
  47     size_t ltable;              /* logical table size */
  48
  49     size_t ntable;              /* physical table size */
  50     struct bucket **table;
  51
  52     afs_atomlist *bucket_list;  /* hash bucket allocator */
  53
  54     size_t search_calls;        /* cumulative afs_lhash_search() call count */
  55     size_t search_tests;        /* cumulative afs_lhash_search() comparison count */
  56     size_t remove_calls;        /* cumulative afs_lhash_remove() call count */
  57     size_t remove_tests;        /* cumulative afs_lhash_remove() comparison count */
  58 };
  59
  60 /*
  61  * make sure the given hash table can accomodate the given index
  62  * value; expand the bucket table if necessary
  63  *
  64  * returns 0 on success, <0 on failure
  65  */
  66
  67 static int
  68 afs_lhash_accomodate(afs_lhash * lh, size_t max_index)
  69 {
  70     /*
  71      * The usual approach to growing ntable to accomodate max_index
  72      * would be to double ntable enough times.  This would give us
  73      * an exponential backoff in how many times we need to grow
  74      * ntable.  However, there is a space tradeoff.
  75      *
  76      * Since afs_lhash may be used in an environment where memory is
  77      * constrained, we choose instead to grow ntable in fixed
  78      * increments.  This may have a larger total cost, but it is
  79      * amortized in smaller increments.
  80      *
  81      * If even this cost is too great, we can consider adopting the
  82      * two-level array approach mentioned in the paper.  This could
  83      * keep the sizes of the allocations more consistent, and also
  84      * reduce the amount of data copying.  However, we won't do that
  85      * until we see real results that show that the benefit of the
  86      * additional complexity is worth it.
  87      */
  88     enum { ntable_inc = 1024 / sizeof *lh->table };
  89
  90     size_t new_ntable;
  91     struct bucket **new_table;
  92     size_t i;
  93
  94     /* if the given index fits, we're done */
  95
  96     if (max_index < lh->ntable)
  97         return 0;
  98
  99     /* otherwise, determine new_ntable and allocate new_table */
 100
 101     if (lh->ntable == (size_t) 0) {
 102         new_ntable = ntable_inc;
 103     } else {
 104         new_ntable = lh->ntable;
 105     }
 106
 107     for (; !(max_index < new_ntable); new_ntable += ntable_inc)
 108         continue;
 109
 110     new_table = lh->allocate(new_ntable * sizeof *lh->table);
 111     if (!new_table) {
 112         return -1;
 113     }
 114
 115     /* initialize new_table from the old table */
 116
 117     for (i = 0; i < lh->ntable; i++) {
 118         new_table[i] = lh->table[i];
 119     }
 120     for (i = lh->ntable; i < new_ntable; i++) {
 121         new_table[i] = 0;
 122     }
 123
 124     /*
 125      * free the old table, if any, and switch to the new table
 126      *
 127      * (when called from afs_lhash_create(), the table is empty)
 128      */
 129
 130     if (lh->table && lh->ntable) {
 131         lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 132     }
 133     lh->ntable = new_ntable;
 134     lh->table = new_table;
 135
 136     return 0;
 137 }
 138
 139 /*
 140  * given a hash table and a key value, returns the index corresponding
 141  * to the key value
 142  */
 143
 144 static size_t
 145 afs_lhash_address(const afs_lhash * lh, unsigned key)
 146 {
 147     enum { prime = 1048583 };
 148     size_t h;
 149     size_t address;
 150
 151     h = key % prime;            /* 0 <= h < prime */
 152
 153     address = h % lh->maxp;
 154     if (address < lh->p) {
 155         address = h % (2 * lh->maxp);
 156     }
 157
 158     return address;
 159 }
 160
 161 /*
 162  * if possible, expand the logical size of the given hash table
 163  */
 164 static void
 165 afs_lhash_expand(afs_lhash * lh)
 166 {
 167     size_t old_address;         /* index of bucket to split */
 168     size_t new_address;         /* index of new bucket */
 169
 170     struct bucket *current_b;   /* for scanning down old chain */
 171     struct bucket *previous;
 172
 173     struct bucket *last_of_new; /* last element in new chain */
 174
 175     /* save address to split */
 176
 177     old_address = lh->p;
 178
 179     /* determine new address, grow table if necessary */
 180
 181     new_address = lh->maxp + lh->p;
 182
 183     if (afs_lhash_accomodate(lh, new_address) < 0) {
 184         return;
 185     }
 186
 187     /* adjust the state variables */
 188
 189     /* this makes afs_lhash_address() work with respect
 190      * to the expanded table */
 191
 192     lh->p++;
 193     if (lh->p == lh->maxp) {
 194         lh->maxp *= 2;
 195         lh->p = 0;
 196     }
 197
 198     lh->ltable++;
 199
 200 #ifdef CHECK_INVARIANTS
 201     assert(lh->ltable - 1 == new_address);
 202     assert(lh->ltable <= lh->ntable);
 203 #endif /* CHECK_INVARIANTS */
 204
 205     /* relocate records to the new bucket */
 206
 207     current_b = lh->table[old_address];
 208     previous = 0;
 209     last_of_new = 0;
 210     lh->table[new_address] = 0;
 211
 212     while (current_b) {
 213         size_t addr;
 214         addr = afs_lhash_address(lh, current_b->key);
 215         if (addr == new_address) {
 216             /* attach it to the end of the new chain */
 217             if (last_of_new) {
 218                 last_of_new->next = current_b;
 219             } else {
 220                 lh->table[new_address] = current_b;
 221             }
 222             if (previous) {
 223                 previous->next = current_b->next;
 224             } else {
 225                 lh->table[old_address] = current_b->next;
 226             }
 227             last_of_new = current_b;
 228             current_b = current_b->next;
 229             last_of_new->next = 0;
 230         } else {
 231 #ifdef CHECK_INVARIANTS
 232             assert(addr == old_address);
 233 #endif /* CHECK_INVARIANTS */
 234             /* leave it on the old chain */
 235             previous = current_b;
 236             current_b = current_b->next;
 237         }
 238     }
 239 }
 240
 241 afs_lhash *
 242 afs_lhash_create(int (*equal) (const void *a, const void *b),
 243                  /* returns true if the elements pointed to by
 244                   * a and b are the same, false otherwise */
 245                  void *(*allocate) (size_t n), void (*deallocate) (void *p,
 246                                                                    size_t n)
 247     )
 248 {
 249     afs_lhash *lh;
 250
 251     lh = allocate(sizeof *lh);
 252     if (!lh)
 253         return (afs_lhash *) 0;
 254
 255     lh->equal = equal;
 256     lh->allocate = allocate;
 257     lh->deallocate = deallocate;
 258
 259     lh->p = 0;
 260     lh->maxp = 16;
 261
 262     lh->ltable = lh->maxp;
 263
 264     lh->ndata = 0;
 265
 266     lh->ntable = 0;
 267     lh->table = NULL;
 268
 269     if (afs_lhash_accomodate(lh, lh->ltable - 1) < 0) {
 270         lh->deallocate(lh, sizeof *lh);
 271         return (afs_lhash *) 0;
 272     }
 273 #ifdef CHECK_INVARIANTS
 274     assert(lh->ltable <= lh->ntable);
 275 #endif /* CHECK_INVARIANTS */
 276
 277     /* maybe the chunk size should be passed in for hashes, so we
 278      * can pass it down here */
 279
 280     lh->bucket_list =
 281         afs_atomlist_create(sizeof(struct bucket), sizeof(long) * 1024,
 282                             allocate, deallocate);
 283 #ifdef CHECK_INVARIANTS
 284     assert(lh->bucket_list);
 285 #endif /* CHECK_INVARIANTS */
 286
 287     lh->search_calls = 0;
 288     lh->search_tests = 0;
 289     lh->remove_calls = 0;
 290     lh->remove_tests = 0;
 291
 292     return lh;
 293 }
 294
 295 void
 296 afs_lhash_destroy(afs_lhash * lh)
 297 {
 298 #ifdef CHECK_INVARIANTS
 299     assert(lh->ltable <= lh->ntable);
 300 #endif /* CHECK_INVARIANTS */
 301
 302     /*
 303      * first, free the buckets
 304      *
 305      * afs_atomlist_destroy() implicitly frees all the memory allocated
 306      * from the given afs_atomlist, so there is no need to go through
 307      * the hash table to explicitly free each bucket.
 308      */
 309
 310     afs_atomlist_destroy(lh->bucket_list);
 311
 312     /* then, free the table and the afs_lhash */
 313
 314     lh->deallocate(lh->table, lh->ntable * sizeof *lh->table);
 315     lh->deallocate(lh, sizeof *lh);
 316 }
 317
 318 void
 319 afs_lhash_iter(afs_lhash * lh,
 320                void (*f) (size_t index, unsigned key, void *data)
 321     )
 322 {
 323     size_t i;
 324
 325 #ifdef CHECK_INVARIANTS
 326     assert(lh->ltable <= lh->ntable);
 327 #endif /* CHECK_INVARIANTS */
 328
 329     for (i = 0; i < lh->ltable; i++) {
 330         struct bucket *current_b;
 331
 332         for (current_b = lh->table[i]; current_b; current_b = current_b->next) {
 333             f(i, current_b->key, current_b->data);
 334         }
 335     }
 336 }
 337
 338 void *
 339 afs_lhash_search(afs_lhash * lh, unsigned key, const void *data)
 340 {
 341     size_t k;
 342     struct bucket *previous;
 343     struct bucket *current_b;
 344
 345     lh->search_calls++;
 346
 347     k = afs_lhash_address(lh, key);
 348     for (previous = 0, current_b = lh->table[k]; current_b;
 349          previous = current_b, current_b = current_b->next) {
 350         lh->search_tests++;
 351         if (lh->equal(data, current_b->data)) {
 352
 353             /*
 354              * Since we found what we were looking for, move
 355              * the bucket to the head of the chain.  The
 356              * theory is that unused hash table entries will
 357              * be left at the end of the chain, where they
 358              * will not cause search times to increase.
 359              *
 360              * This optimization is both easy to understand
 361              * and cheap to execute, so we go ahead and do
 362              * it.
 363              *
 364              */
 365
 366             if (previous) {
 367                 previous->next = current_b->next;
 368                 current_b->next = lh->table[k];
 369                 lh->table[k] = current_b;
 370             }
 371
 372             return current_b->data;
 373         }
 374     }
 375
 376     return 0;
 377 }
 378
 379 void *
 380 afs_lhash_rosearch(const afs_lhash * lh, unsigned key, const void *data)
 381 {
 382     size_t k;
 383     struct bucket *current_b;
 384
 385     k = afs_lhash_address(lh, key);
 386     for (current_b = lh->table[k]; current_b; current_b = current_b->next) {
 387         if (lh->equal(data, current_b->data)) {
 388             return current_b->data;
 389         }
 390     }
 391
 392     return 0;
 393 }
 394
 395 void *
 396 afs_lhash_remove(afs_lhash * lh, unsigned key, const void *data)
 397 {
 398     size_t k;
 399     struct bucket **pprev;
 400     struct bucket *cur;
 401
 402     lh->remove_calls++;
 403
 404     k = afs_lhash_address(lh, key);
 405     for (pprev = 0, cur = lh->table[k]; cur;
 406          pprev = &cur->next, cur = cur->next) {
 407         lh->remove_tests++;
 408         if (lh->equal(data, cur->data)) {
 409             void *ptr = cur->data;
 410
 411             if (pprev) {
 412                 *pprev = cur->next;
 413             } else {
 414                 lh->table[k] = cur->next;
 415             }
 416
 417             afs_atomlist_put(lh->bucket_list, cur);
 418
 419             lh->ndata--;
 420
 421             return ptr;
 422         }
 423     }
 424
 425     return 0;
 426 }
 427
 428 int
 429 afs_lhash_enter(afs_lhash * lh, unsigned key, void *data)
 430 {
 431     size_t k;
 432     struct bucket *buck;
 433
 434     buck = afs_atomlist_get(lh->bucket_list);
 435     if (!buck) {
 436         return -1;
 437     }
 438
 439     buck->key = key;
 440     buck->data = data;
 441
 442     k = afs_lhash_address(lh, key);
 443
 444     buck->next = lh->table[k];
 445     lh->table[k] = buck;
 446
 447     lh->ndata++;
 448
 449     /*
 450      * The load factor is the number of data items in the hash
 451      * table divided by the logical table length.  We expand the
 452      * table when the load factor exceeds a predetermined limit
 453      * (LOAD_FACTOR).  To avoid floating point, we multiply both
 454      * sides of the inequality by the logical table length...
 455      */
 456     if (lh->ndata > LOAD_FACTOR * lh->ltable) {
 457         afs_lhash_expand(lh);
 458 #if 0
 459         printf("lh->p = %d; lh->maxp = %d\n", lh->p, lh->maxp);
 460 #endif
 461     }
 462 #ifdef CHECK_INVARIANTS
 463     assert(lh->ndata <= LOAD_FACTOR * lh->ltable);
 464 #endif /* CHECK_INVARIANTS */
 465
 466     return 0;
 467 }
 468
 469 int
 470 afs_lhash_stat(afs_lhash * lh, struct afs_lhash_stat *sb)
 471 {
 472     size_t k;
 473
 474     int min_chain_length = -1;
 475     int max_chain_length = -1;
 476     size_t buckets = lh->ltable;
 477     size_t records = 0;
 478
 479     if (!sb) {
 480         return -1;
 481     }
 482 #ifdef CHECK_INVARIANTS
 483     assert(lh->ltable <= lh->ntable);
 484 #endif /* CHECK_INVARIANTS */
 485
 486     for (k = 0; k < lh->ltable; k++) {
 487         struct bucket *buck;
 488         int chain_length = 0;
 489
 490         for (buck = lh->table[k]; buck; buck = buck->next) {
 491             chain_length++;
 492             records++;
 493         }
 494
 495         if (min_chain_length == -1) {
 496             min_chain_length = chain_length;
 497         }
 498
 499         if (max_chain_length == -1) {
 500             max_chain_length = chain_length;
 501         }
 502
 503         if (chain_length < min_chain_length) {
 504             min_chain_length = chain_length;
 505         }
 506
 507         if (max_chain_length < chain_length) {
 508             max_chain_length = chain_length;
 509         }
 510     }
 511
 512     sb->min_chain_length = min_chain_length;
 513     sb->max_chain_length = max_chain_length;
 514     sb->buckets = buckets;
 515     sb->records = records;
 516
 517 #ifdef CHECK_INVARIANTS
 518     assert(lh->ndata == records);
 519 #endif /* CHECK_INVARIANTS */
 520
 521     sb->search_calls = lh->search_calls;
 522     sb->search_tests = lh->search_tests;
 523     sb->remove_calls = lh->remove_calls;
 524     sb->remove_tests = lh->remove_tests;
 525
 526     return 0;
 527 }