1 /* 2 * Resizable, Scalable, Concurrent Hash Table 3 * 4 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> 5 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> 6 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 7 * 8 * Code partially derived from nft_hash 9 * Rewritten with rehash code from br_multicast plus single list 10 * pointer as suggested by Josh Triplett 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #include <linux/atomic.h> 18 #include <linux/kernel.h> 19 #include <linux/init.h> 20 #include <linux/log2.h> 21 #include <linux/sched.h> 22 #include <linux/rculist.h> 23 #include <linux/slab.h> 24 #include <linux/vmalloc.h> 25 #include <linux/mm.h> 26 #include <linux/jhash.h> 27 #include <linux/random.h> 28 #include <linux/rhashtable.h> 29 #include <linux/err.h> 30 #include <linux/export.h> 31 32 #define HASH_DEFAULT_SIZE 64UL 33 #define HASH_MIN_SIZE 4U 34 #define BUCKET_LOCKS_PER_CPU 32UL 35 36 union nested_table { 37 union nested_table __rcu *table; 38 struct rhash_head __rcu *bucket; 39 }; 40 41 static u32 head_hashfn(struct rhashtable *ht, 42 const struct bucket_table *tbl, 43 const struct rhash_head *he) 44 { 45 return rht_head_hashfn(ht, tbl, he, ht->p); 46 } 47 48 #ifdef CONFIG_PROVE_LOCKING 49 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 50 51 int lockdep_rht_mutex_is_held(struct rhashtable *ht) 52 { 53 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; 54 } 55 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 56 57 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 58 { 59 spinlock_t *lock = rht_bucket_lock(tbl, hash); 60 61 return (debug_locks) ? lockdep_is_held(lock) : 1; 62 } 63 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 64 #else 65 #define ASSERT_RHT_MUTEX(HT) 66 #endif 67 68 69 static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, 70 gfp_t gfp) 71 { 72 unsigned int i, size; 73 #if defined(CONFIG_PROVE_LOCKING) 74 unsigned int nr_pcpus = 2; 75 #else 76 unsigned int nr_pcpus = num_possible_cpus(); 77 #endif 78 79 nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); 80 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); 81 82 /* Never allocate more than 0.5 locks per bucket */ 83 size = min_t(unsigned int, size, tbl->size >> 1); 84 85 if (tbl->nest) 86 size = min(size, 1U << tbl->nest); 87 88 if (sizeof(spinlock_t) != 0) { 89 tbl->locks = NULL; 90 #ifdef CONFIG_NUMA 91 if (size * sizeof(spinlock_t) > PAGE_SIZE && 92 gfp == GFP_KERNEL) 93 tbl->locks = vmalloc(size * sizeof(spinlock_t)); 94 #endif 95 if (gfp != GFP_KERNEL) 96 gfp |= __GFP_NOWARN | __GFP_NORETRY; 97 98 if (!tbl->locks) 99 tbl->locks = kmalloc_array(size, sizeof(spinlock_t), 100 gfp); 101 if (!tbl->locks) 102 return -ENOMEM; 103 for (i = 0; i < size; i++) 104 spin_lock_init(&tbl->locks[i]); 105 } 106 tbl->locks_mask = size - 1; 107 108 return 0; 109 } 110 111 static void nested_table_free(union nested_table *ntbl, unsigned int size) 112 { 113 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 114 const unsigned int len = 1 << shift; 115 unsigned int i; 116 117 ntbl = rcu_dereference_raw(ntbl->table); 118 if (!ntbl) 119 return; 120 121 if (size > len) { 122 size >>= shift; 123 for (i = 0; i < len; i++) 124 nested_table_free(ntbl + i, size); 125 } 126 127 kfree(ntbl); 128 } 129 130 static void nested_bucket_table_free(const struct bucket_table *tbl) 131 { 132 unsigned int size = tbl->size >> tbl->nest; 133 unsigned int len = 1 << tbl->nest; 134 union nested_table *ntbl; 135 unsigned int i; 136 137 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); 138 139 for (i = 0; i < len; i++) 140 nested_table_free(ntbl + i, size); 141 142 kfree(ntbl); 143 } 144 145 static void bucket_table_free(const struct bucket_table *tbl) 146 { 147 if (tbl->nest) 148 nested_bucket_table_free(tbl); 149 150 kvfree(tbl->locks); 151 kvfree(tbl); 152 } 153 154 static void bucket_table_free_rcu(struct rcu_head *head) 155 { 156 bucket_table_free(container_of(head, struct bucket_table, rcu)); 157 } 158 159 static union nested_table *nested_table_alloc(struct rhashtable *ht, 160 union nested_table __rcu **prev, 161 unsigned int shifted, 162 unsigned int nhash) 163 { 164 union nested_table *ntbl; 165 int i; 166 167 ntbl = rcu_dereference(*prev); 168 if (ntbl) 169 return ntbl; 170 171 ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); 172 173 if (ntbl && shifted) { 174 for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) 175 INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, 176 (i << shifted) | nhash); 177 } 178 179 rcu_assign_pointer(*prev, ntbl); 180 181 return ntbl; 182 } 183 184 static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, 185 size_t nbuckets, 186 gfp_t gfp) 187 { 188 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 189 struct bucket_table *tbl; 190 size_t size; 191 192 if (nbuckets < (1 << (shift + 1))) 193 return NULL; 194 195 size = sizeof(*tbl) + sizeof(tbl->buckets[0]); 196 197 tbl = kzalloc(size, gfp); 198 if (!tbl) 199 return NULL; 200 201 if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, 202 0, 0)) { 203 kfree(tbl); 204 return NULL; 205 } 206 207 tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; 208 209 return tbl; 210 } 211 212 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 213 size_t nbuckets, 214 gfp_t gfp) 215 { 216 struct bucket_table *tbl = NULL; 217 size_t size; 218 int i; 219 220 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); 221 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || 222 gfp != GFP_KERNEL) 223 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); 224 if (tbl == NULL && gfp == GFP_KERNEL) 225 tbl = vzalloc(size); 226 227 size = nbuckets; 228 229 if (tbl == NULL && gfp != GFP_KERNEL) { 230 tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); 231 nbuckets = 0; 232 } 233 if (tbl == NULL) 234 return NULL; 235 236 tbl->size = size; 237 238 if (alloc_bucket_locks(ht, tbl, gfp) < 0) { 239 bucket_table_free(tbl); 240 return NULL; 241 } 242 243 INIT_LIST_HEAD(&tbl->walkers); 244 245 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); 246 247 for (i = 0; i < nbuckets; i++) 248 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); 249 250 return tbl; 251 } 252 253 static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, 254 struct bucket_table *tbl) 255 { 256 struct bucket_table *new_tbl; 257 258 do { 259 new_tbl = tbl; 260 tbl = rht_dereference_rcu(tbl->future_tbl, ht); 261 } while (tbl); 262 263 return new_tbl; 264 } 265 266 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) 267 { 268 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 269 struct bucket_table *new_tbl = rhashtable_last_table(ht, 270 rht_dereference_rcu(old_tbl->future_tbl, ht)); 271 struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); 272 int err = -EAGAIN; 273 struct rhash_head *head, *next, *entry; 274 spinlock_t *new_bucket_lock; 275 unsigned int new_hash; 276 277 if (new_tbl->nest) 278 goto out; 279 280 err = -ENOENT; 281 282 rht_for_each(entry, old_tbl, old_hash) { 283 err = 0; 284 next = rht_dereference_bucket(entry->next, old_tbl, old_hash); 285 286 if (rht_is_a_nulls(next)) 287 break; 288 289 pprev = &entry->next; 290 } 291 292 if (err) 293 goto out; 294 295 new_hash = head_hashfn(ht, new_tbl, entry); 296 297 new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); 298 299 spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); 300 head = rht_dereference_bucket(new_tbl->buckets[new_hash], 301 new_tbl, new_hash); 302 303 RCU_INIT_POINTER(entry->next, head); 304 305 rcu_assign_pointer(new_tbl->buckets[new_hash], entry); 306 spin_unlock(new_bucket_lock); 307 308 rcu_assign_pointer(*pprev, next); 309 310 out: 311 return err; 312 } 313 314 static int rhashtable_rehash_chain(struct rhashtable *ht, 315 unsigned int old_hash) 316 { 317 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 318 spinlock_t *old_bucket_lock; 319 int err; 320 321 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); 322 323 spin_lock_bh(old_bucket_lock); 324 while (!(err = rhashtable_rehash_one(ht, old_hash))) 325 ; 326 327 if (err == -ENOENT) { 328 old_tbl->rehash++; 329 err = 0; 330 } 331 spin_unlock_bh(old_bucket_lock); 332 333 return err; 334 } 335 336 static int rhashtable_rehash_attach(struct rhashtable *ht, 337 struct bucket_table *old_tbl, 338 struct bucket_table *new_tbl) 339 { 340 /* Protect future_tbl using the first bucket lock. */ 341 spin_lock_bh(old_tbl->locks); 342 343 /* Did somebody beat us to it? */ 344 if (rcu_access_pointer(old_tbl->future_tbl)) { 345 spin_unlock_bh(old_tbl->locks); 346 return -EEXIST; 347 } 348 349 /* Make insertions go into the new, empty table right away. Deletions 350 * and lookups will be attempted in both tables until we synchronize. 351 */ 352 rcu_assign_pointer(old_tbl->future_tbl, new_tbl); 353 354 spin_unlock_bh(old_tbl->locks); 355 356 return 0; 357 } 358 359 static int rhashtable_rehash_table(struct rhashtable *ht) 360 { 361 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 362 struct bucket_table *new_tbl; 363 struct rhashtable_walker *walker; 364 unsigned int old_hash; 365 int err; 366 367 new_tbl = rht_dereference(old_tbl->future_tbl, ht); 368 if (!new_tbl) 369 return 0; 370 371 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { 372 err = rhashtable_rehash_chain(ht, old_hash); 373 if (err) 374 return err; 375 } 376 377 /* Publish the new table pointer. */ 378 rcu_assign_pointer(ht->tbl, new_tbl); 379 380 spin_lock(&ht->lock); 381 list_for_each_entry(walker, &old_tbl->walkers, list) 382 walker->tbl = NULL; 383 spin_unlock(&ht->lock); 384 385 /* Wait for readers. All new readers will see the new 386 * table, and thus no references to the old table will 387 * remain. 388 */ 389 call_rcu(&old_tbl->rcu, bucket_table_free_rcu); 390 391 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; 392 } 393 394 static int rhashtable_rehash_alloc(struct rhashtable *ht, 395 struct bucket_table *old_tbl, 396 unsigned int size) 397 { 398 struct bucket_table *new_tbl; 399 int err; 400 401 ASSERT_RHT_MUTEX(ht); 402 403 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 404 if (new_tbl == NULL) 405 return -ENOMEM; 406 407 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); 408 if (err) 409 bucket_table_free(new_tbl); 410 411 return err; 412 } 413 414 /** 415 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 416 * @ht: the hash table to shrink 417 * 418 * This function shrinks the hash table to fit, i.e., the smallest 419 * size would not cause it to expand right away automatically. 420 * 421 * The caller must ensure that no concurrent resizing occurs by holding 422 * ht->mutex. 423 * 424 * The caller must ensure that no concurrent table mutations take place. 425 * It is however valid to have concurrent lookups if they are RCU protected. 426 * 427 * It is valid to have concurrent insertions and deletions protected by per 428 * bucket locks or concurrent RCU protected lookups and traversals. 429 */ 430 static int rhashtable_shrink(struct rhashtable *ht) 431 { 432 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 433 unsigned int nelems = atomic_read(&ht->nelems); 434 unsigned int size = 0; 435 436 if (nelems) 437 size = roundup_pow_of_two(nelems * 3 / 2); 438 if (size < ht->p.min_size) 439 size = ht->p.min_size; 440 441 if (old_tbl->size <= size) 442 return 0; 443 444 if (rht_dereference(old_tbl->future_tbl, ht)) 445 return -EEXIST; 446 447 return rhashtable_rehash_alloc(ht, old_tbl, size); 448 } 449 450 static void rht_deferred_worker(struct work_struct *work) 451 { 452 struct rhashtable *ht; 453 struct bucket_table *tbl; 454 int err = 0; 455 456 ht = container_of(work, struct rhashtable, run_work); 457 mutex_lock(&ht->mutex); 458 459 tbl = rht_dereference(ht->tbl, ht); 460 tbl = rhashtable_last_table(ht, tbl); 461 462 if (rht_grow_above_75(ht, tbl)) 463 err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); 464 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) 465 err = rhashtable_shrink(ht); 466 else if (tbl->nest) 467 err = rhashtable_rehash_alloc(ht, tbl, tbl->size); 468 469 if (!err) 470 err = rhashtable_rehash_table(ht); 471 472 mutex_unlock(&ht->mutex); 473 474 if (err) 475 schedule_work(&ht->run_work); 476 } 477 478 static int rhashtable_insert_rehash(struct rhashtable *ht, 479 struct bucket_table *tbl) 480 { 481 struct bucket_table *old_tbl; 482 struct bucket_table *new_tbl; 483 unsigned int size; 484 int err; 485 486 old_tbl = rht_dereference_rcu(ht->tbl, ht); 487 488 size = tbl->size; 489 490 err = -EBUSY; 491 492 if (rht_grow_above_75(ht, tbl)) 493 size *= 2; 494 /* Do not schedule more than one rehash */ 495 else if (old_tbl != tbl) 496 goto fail; 497 498 err = -ENOMEM; 499 500 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); 501 if (new_tbl == NULL) 502 goto fail; 503 504 err = rhashtable_rehash_attach(ht, tbl, new_tbl); 505 if (err) { 506 bucket_table_free(new_tbl); 507 if (err == -EEXIST) 508 err = 0; 509 } else 510 schedule_work(&ht->run_work); 511 512 return err; 513 514 fail: 515 /* Do not fail the insert if someone else did a rehash. */ 516 if (likely(rcu_dereference_raw(tbl->future_tbl))) 517 return 0; 518 519 /* Schedule async rehash to retry allocation in process context. */ 520 if (err == -ENOMEM) 521 schedule_work(&ht->run_work); 522 523 return err; 524 } 525 526 static void *rhashtable_lookup_one(struct rhashtable *ht, 527 struct bucket_table *tbl, unsigned int hash, 528 const void *key, struct rhash_head *obj) 529 { 530 struct rhashtable_compare_arg arg = { 531 .ht = ht, 532 .key = key, 533 }; 534 struct rhash_head __rcu **pprev; 535 struct rhash_head *head; 536 int elasticity; 537 538 elasticity = ht->elasticity; 539 pprev = rht_bucket_var(tbl, hash); 540 rht_for_each_continue(head, *pprev, tbl, hash) { 541 struct rhlist_head *list; 542 struct rhlist_head *plist; 543 544 elasticity--; 545 if (!key || 546 (ht->p.obj_cmpfn ? 547 ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : 548 rhashtable_compare(&arg, rht_obj(ht, head)))) 549 continue; 550 551 if (!ht->rhlist) 552 return rht_obj(ht, head); 553 554 list = container_of(obj, struct rhlist_head, rhead); 555 plist = container_of(head, struct rhlist_head, rhead); 556 557 RCU_INIT_POINTER(list->next, plist); 558 head = rht_dereference_bucket(head->next, tbl, hash); 559 RCU_INIT_POINTER(list->rhead.next, head); 560 rcu_assign_pointer(*pprev, obj); 561 562 return NULL; 563 } 564 565 if (elasticity <= 0) 566 return ERR_PTR(-EAGAIN); 567 568 return ERR_PTR(-ENOENT); 569 } 570 571 static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, 572 struct bucket_table *tbl, 573 unsigned int hash, 574 struct rhash_head *obj, 575 void *data) 576 { 577 struct rhash_head __rcu **pprev; 578 struct bucket_table *new_tbl; 579 struct rhash_head *head; 580 581 if (!IS_ERR_OR_NULL(data)) 582 return ERR_PTR(-EEXIST); 583 584 if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) 585 return ERR_CAST(data); 586 587 new_tbl = rcu_dereference(tbl->future_tbl); 588 if (new_tbl) 589 return new_tbl; 590 591 if (PTR_ERR(data) != -ENOENT) 592 return ERR_CAST(data); 593 594 if (unlikely(rht_grow_above_max(ht, tbl))) 595 return ERR_PTR(-E2BIG); 596 597 if (unlikely(rht_grow_above_100(ht, tbl))) 598 return ERR_PTR(-EAGAIN); 599 600 pprev = rht_bucket_insert(ht, tbl, hash); 601 if (!pprev) 602 return ERR_PTR(-ENOMEM); 603 604 head = rht_dereference_bucket(*pprev, tbl, hash); 605 606 RCU_INIT_POINTER(obj->next, head); 607 if (ht->rhlist) { 608 struct rhlist_head *list; 609 610 list = container_of(obj, struct rhlist_head, rhead); 611 RCU_INIT_POINTER(list->next, NULL); 612 } 613 614 rcu_assign_pointer(*pprev, obj); 615 616 atomic_inc(&ht->nelems); 617 if (rht_grow_above_75(ht, tbl)) 618 schedule_work(&ht->run_work); 619 620 return NULL; 621 } 622 623 static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, 624 struct rhash_head *obj) 625 { 626 struct bucket_table *new_tbl; 627 struct bucket_table *tbl; 628 unsigned int hash; 629 spinlock_t *lock; 630 void *data; 631 632 tbl = rcu_dereference(ht->tbl); 633 634 /* All insertions must grab the oldest table containing 635 * the hashed bucket that is yet to be rehashed. 636 */ 637 for (;;) { 638 hash = rht_head_hashfn(ht, tbl, obj, ht->p); 639 lock = rht_bucket_lock(tbl, hash); 640 spin_lock_bh(lock); 641 642 if (tbl->rehash <= hash) 643 break; 644 645 spin_unlock_bh(lock); 646 tbl = rcu_dereference(tbl->future_tbl); 647 } 648 649 data = rhashtable_lookup_one(ht, tbl, hash, key, obj); 650 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); 651 if (PTR_ERR(new_tbl) != -EEXIST) 652 data = ERR_CAST(new_tbl); 653 654 while (!IS_ERR_OR_NULL(new_tbl)) { 655 tbl = new_tbl; 656 hash = rht_head_hashfn(ht, tbl, obj, ht->p); 657 spin_lock_nested(rht_bucket_lock(tbl, hash), 658 SINGLE_DEPTH_NESTING); 659 660 data = rhashtable_lookup_one(ht, tbl, hash, key, obj); 661 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); 662 if (PTR_ERR(new_tbl) != -EEXIST) 663 data = ERR_CAST(new_tbl); 664 665 spin_unlock(rht_bucket_lock(tbl, hash)); 666 } 667 668 spin_unlock_bh(lock); 669 670 if (PTR_ERR(data) == -EAGAIN) 671 data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: 672 -EAGAIN); 673 674 return data; 675 } 676 677 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, 678 struct rhash_head *obj) 679 { 680 void *data; 681 682 do { 683 rcu_read_lock(); 684 data = rhashtable_try_insert(ht, key, obj); 685 rcu_read_unlock(); 686 } while (PTR_ERR(data) == -EAGAIN); 687 688 return data; 689 } 690 EXPORT_SYMBOL_GPL(rhashtable_insert_slow); 691 692 /** 693 * rhashtable_walk_enter - Initialise an iterator 694 * @ht: Table to walk over 695 * @iter: Hash table Iterator 696 * 697 * This function prepares a hash table walk. 698 * 699 * Note that if you restart a walk after rhashtable_walk_stop you 700 * may see the same object twice. Also, you may miss objects if 701 * there are removals in between rhashtable_walk_stop and the next 702 * call to rhashtable_walk_start. 703 * 704 * For a completely stable walk you should construct your own data 705 * structure outside the hash table. 706 * 707 * This function may sleep so you must not call it from interrupt 708 * context or with spin locks held. 709 * 710 * You must call rhashtable_walk_exit after this function returns. 711 */ 712 void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) 713 { 714 iter->ht = ht; 715 iter->p = NULL; 716 iter->slot = 0; 717 iter->skip = 0; 718 719 spin_lock(&ht->lock); 720 iter->walker.tbl = 721 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); 722 list_add(&iter->walker.list, &iter->walker.tbl->walkers); 723 spin_unlock(&ht->lock); 724 } 725 EXPORT_SYMBOL_GPL(rhashtable_walk_enter); 726 727 /** 728 * rhashtable_walk_exit - Free an iterator 729 * @iter: Hash table Iterator 730 * 731 * This function frees resources allocated by rhashtable_walk_init. 732 */ 733 void rhashtable_walk_exit(struct rhashtable_iter *iter) 734 { 735 spin_lock(&iter->ht->lock); 736 if (iter->walker.tbl) 737 list_del(&iter->walker.list); 738 spin_unlock(&iter->ht->lock); 739 } 740 EXPORT_SYMBOL_GPL(rhashtable_walk_exit); 741 742 /** 743 * rhashtable_walk_start - Start a hash table walk 744 * @iter: Hash table iterator 745 * 746 * Start a hash table walk. Note that we take the RCU lock in all 747 * cases including when we return an error. So you must always call 748 * rhashtable_walk_stop to clean up. 749 * 750 * Returns zero if successful. 751 * 752 * Returns -EAGAIN if resize event occured. Note that the iterator 753 * will rewind back to the beginning and you may use it immediately 754 * by calling rhashtable_walk_next. 755 */ 756 int rhashtable_walk_start(struct rhashtable_iter *iter) 757 __acquires(RCU) 758 { 759 struct rhashtable *ht = iter->ht; 760 761 rcu_read_lock(); 762 763 spin_lock(&ht->lock); 764 if (iter->walker.tbl) 765 list_del(&iter->walker.list); 766 spin_unlock(&ht->lock); 767 768 if (!iter->walker.tbl) { 769 iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); 770 return -EAGAIN; 771 } 772 773 return 0; 774 } 775 EXPORT_SYMBOL_GPL(rhashtable_walk_start); 776 777 /** 778 * rhashtable_walk_next - Return the next object and advance the iterator 779 * @iter: Hash table iterator 780 * 781 * Note that you must call rhashtable_walk_stop when you are finished 782 * with the walk. 783 * 784 * Returns the next object or NULL when the end of the table is reached. 785 * 786 * Returns -EAGAIN if resize event occured. Note that the iterator 787 * will rewind back to the beginning and you may continue to use it. 788 */ 789 void *rhashtable_walk_next(struct rhashtable_iter *iter) 790 { 791 struct bucket_table *tbl = iter->walker.tbl; 792 struct rhlist_head *list = iter->list; 793 struct rhashtable *ht = iter->ht; 794 struct rhash_head *p = iter->p; 795 bool rhlist = ht->rhlist; 796 797 if (p) { 798 if (!rhlist || !(list = rcu_dereference(list->next))) { 799 p = rcu_dereference(p->next); 800 list = container_of(p, struct rhlist_head, rhead); 801 } 802 goto next; 803 } 804 805 for (; iter->slot < tbl->size; iter->slot++) { 806 int skip = iter->skip; 807 808 rht_for_each_rcu(p, tbl, iter->slot) { 809 if (rhlist) { 810 list = container_of(p, struct rhlist_head, 811 rhead); 812 do { 813 if (!skip) 814 goto next; 815 skip--; 816 list = rcu_dereference(list->next); 817 } while (list); 818 819 continue; 820 } 821 if (!skip) 822 break; 823 skip--; 824 } 825 826 next: 827 if (!rht_is_a_nulls(p)) { 828 iter->skip++; 829 iter->p = p; 830 iter->list = list; 831 return rht_obj(ht, rhlist ? &list->rhead : p); 832 } 833 834 iter->skip = 0; 835 } 836 837 iter->p = NULL; 838 839 /* Ensure we see any new tables. */ 840 smp_rmb(); 841 842 iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); 843 if (iter->walker.tbl) { 844 iter->slot = 0; 845 iter->skip = 0; 846 return ERR_PTR(-EAGAIN); 847 } 848 849 return NULL; 850 } 851 EXPORT_SYMBOL_GPL(rhashtable_walk_next); 852 853 /** 854 * rhashtable_walk_stop - Finish a hash table walk 855 * @iter: Hash table iterator 856 * 857 * Finish a hash table walk. 858 */ 859 void rhashtable_walk_stop(struct rhashtable_iter *iter) 860 __releases(RCU) 861 { 862 struct rhashtable *ht; 863 struct bucket_table *tbl = iter->walker.tbl; 864 865 if (!tbl) 866 goto out; 867 868 ht = iter->ht; 869 870 spin_lock(&ht->lock); 871 if (tbl->rehash < tbl->size) 872 list_add(&iter->walker.list, &tbl->walkers); 873 else 874 iter->walker.tbl = NULL; 875 spin_unlock(&ht->lock); 876 877 iter->p = NULL; 878 879 out: 880 rcu_read_unlock(); 881 } 882 EXPORT_SYMBOL_GPL(rhashtable_walk_stop); 883 884 static size_t rounded_hashtable_size(const struct rhashtable_params *params) 885 { 886 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 887 (unsigned long)params->min_size); 888 } 889 890 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) 891 { 892 return jhash2(key, length, seed); 893 } 894 895 /** 896 * rhashtable_init - initialize a new hash table 897 * @ht: hash table to be initialized 898 * @params: configuration parameters 899 * 900 * Initializes a new hash table based on the provided configuration 901 * parameters. A table can be configured either with a variable or 902 * fixed length key: 903 * 904 * Configuration Example 1: Fixed length keys 905 * struct test_obj { 906 * int key; 907 * void * my_member; 908 * struct rhash_head node; 909 * }; 910 * 911 * struct rhashtable_params params = { 912 * .head_offset = offsetof(struct test_obj, node), 913 * .key_offset = offsetof(struct test_obj, key), 914 * .key_len = sizeof(int), 915 * .hashfn = jhash, 916 * .nulls_base = (1U << RHT_BASE_SHIFT), 917 * }; 918 * 919 * Configuration Example 2: Variable length keys 920 * struct test_obj { 921 * [...] 922 * struct rhash_head node; 923 * }; 924 * 925 * u32 my_hash_fn(const void *data, u32 len, u32 seed) 926 * { 927 * struct test_obj *obj = data; 928 * 929 * return [... hash ...]; 930 * } 931 * 932 * struct rhashtable_params params = { 933 * .head_offset = offsetof(struct test_obj, node), 934 * .hashfn = jhash, 935 * .obj_hashfn = my_hash_fn, 936 * }; 937 */ 938 int rhashtable_init(struct rhashtable *ht, 939 const struct rhashtable_params *params) 940 { 941 struct bucket_table *tbl; 942 size_t size; 943 944 size = HASH_DEFAULT_SIZE; 945 946 if ((!params->key_len && !params->obj_hashfn) || 947 (params->obj_hashfn && !params->obj_cmpfn)) 948 return -EINVAL; 949 950 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) 951 return -EINVAL; 952 953 memset(ht, 0, sizeof(*ht)); 954 mutex_init(&ht->mutex); 955 spin_lock_init(&ht->lock); 956 memcpy(&ht->p, params, sizeof(*params)); 957 958 if (params->min_size) 959 ht->p.min_size = roundup_pow_of_two(params->min_size); 960 961 if (params->max_size) 962 ht->p.max_size = rounddown_pow_of_two(params->max_size); 963 964 if (params->insecure_max_entries) 965 ht->p.insecure_max_entries = 966 rounddown_pow_of_two(params->insecure_max_entries); 967 else 968 ht->p.insecure_max_entries = ht->p.max_size * 2; 969 970 ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); 971 972 if (params->nelem_hint) 973 size = rounded_hashtable_size(&ht->p); 974 975 /* The maximum (not average) chain length grows with the 976 * size of the hash table, at a rate of (log N)/(log log N). 977 * The value of 16 is selected so that even if the hash 978 * table grew to 2^32 you would not expect the maximum 979 * chain length to exceed it unless we are under attack 980 * (or extremely unlucky). 981 * 982 * As this limit is only to detect attacks, we don't need 983 * to set it to a lower value as you'd need the chain 984 * length to vastly exceed 16 to have any real effect 985 * on the system. 986 */ 987 if (!params->insecure_elasticity) 988 ht->elasticity = 16; 989 990 if (params->locks_mul) 991 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); 992 else 993 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; 994 995 ht->key_len = ht->p.key_len; 996 if (!params->hashfn) { 997 ht->p.hashfn = jhash; 998 999 if (!(ht->key_len & (sizeof(u32) - 1))) { 1000 ht->key_len /= sizeof(u32); 1001 ht->p.hashfn = rhashtable_jhash2; 1002 } 1003 } 1004 1005 tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 1006 if (tbl == NULL) 1007 return -ENOMEM; 1008 1009 atomic_set(&ht->nelems, 0); 1010 1011 RCU_INIT_POINTER(ht->tbl, tbl); 1012 1013 INIT_WORK(&ht->run_work, rht_deferred_worker); 1014 1015 return 0; 1016 } 1017 EXPORT_SYMBOL_GPL(rhashtable_init); 1018 1019 /** 1020 * rhltable_init - initialize a new hash list table 1021 * @hlt: hash list table to be initialized 1022 * @params: configuration parameters 1023 * 1024 * Initializes a new hash list table. 1025 * 1026 * See documentation for rhashtable_init. 1027 */ 1028 int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) 1029 { 1030 int err; 1031 1032 /* No rhlist NULLs marking for now. */ 1033 if (params->nulls_base) 1034 return -EINVAL; 1035 1036 err = rhashtable_init(&hlt->ht, params); 1037 hlt->ht.rhlist = true; 1038 return err; 1039 } 1040 EXPORT_SYMBOL_GPL(rhltable_init); 1041 1042 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, 1043 void (*free_fn)(void *ptr, void *arg), 1044 void *arg) 1045 { 1046 struct rhlist_head *list; 1047 1048 if (!ht->rhlist) { 1049 free_fn(rht_obj(ht, obj), arg); 1050 return; 1051 } 1052 1053 list = container_of(obj, struct rhlist_head, rhead); 1054 do { 1055 obj = &list->rhead; 1056 list = rht_dereference(list->next, ht); 1057 free_fn(rht_obj(ht, obj), arg); 1058 } while (list); 1059 } 1060 1061 /** 1062 * rhashtable_free_and_destroy - free elements and destroy hash table 1063 * @ht: the hash table to destroy 1064 * @free_fn: callback to release resources of element 1065 * @arg: pointer passed to free_fn 1066 * 1067 * Stops an eventual async resize. If defined, invokes free_fn for each 1068 * element to releasal resources. Please note that RCU protected 1069 * readers may still be accessing the elements. Releasing of resources 1070 * must occur in a compatible manner. Then frees the bucket array. 1071 * 1072 * This function will eventually sleep to wait for an async resize 1073 * to complete. The caller is responsible that no further write operations 1074 * occurs in parallel. 1075 */ 1076 void rhashtable_free_and_destroy(struct rhashtable *ht, 1077 void (*free_fn)(void *ptr, void *arg), 1078 void *arg) 1079 { 1080 struct bucket_table *tbl; 1081 unsigned int i; 1082 1083 cancel_work_sync(&ht->run_work); 1084 1085 mutex_lock(&ht->mutex); 1086 tbl = rht_dereference(ht->tbl, ht); 1087 if (free_fn) { 1088 for (i = 0; i < tbl->size; i++) { 1089 struct rhash_head *pos, *next; 1090 1091 for (pos = rht_dereference(*rht_bucket(tbl, i), ht), 1092 next = !rht_is_a_nulls(pos) ? 1093 rht_dereference(pos->next, ht) : NULL; 1094 !rht_is_a_nulls(pos); 1095 pos = next, 1096 next = !rht_is_a_nulls(pos) ? 1097 rht_dereference(pos->next, ht) : NULL) 1098 rhashtable_free_one(ht, pos, free_fn, arg); 1099 } 1100 } 1101 1102 bucket_table_free(tbl); 1103 mutex_unlock(&ht->mutex); 1104 } 1105 EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); 1106 1107 void rhashtable_destroy(struct rhashtable *ht) 1108 { 1109 return rhashtable_free_and_destroy(ht, NULL, NULL); 1110 } 1111 EXPORT_SYMBOL_GPL(rhashtable_destroy); 1112 1113 struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, 1114 unsigned int hash) 1115 { 1116 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 1117 static struct rhash_head __rcu *rhnull = 1118 (struct rhash_head __rcu *)NULLS_MARKER(0); 1119 unsigned int index = hash & ((1 << tbl->nest) - 1); 1120 unsigned int size = tbl->size >> tbl->nest; 1121 unsigned int subhash = hash; 1122 union nested_table *ntbl; 1123 1124 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); 1125 ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); 1126 subhash >>= tbl->nest; 1127 1128 while (ntbl && size > (1 << shift)) { 1129 index = subhash & ((1 << shift) - 1); 1130 ntbl = rht_dereference_bucket_rcu(ntbl[index].table, 1131 tbl, hash); 1132 size >>= shift; 1133 subhash >>= shift; 1134 } 1135 1136 if (!ntbl) 1137 return &rhnull; 1138 1139 return &ntbl[subhash].bucket; 1140 1141 } 1142 EXPORT_SYMBOL_GPL(rht_bucket_nested); 1143 1144 struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, 1145 struct bucket_table *tbl, 1146 unsigned int hash) 1147 { 1148 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 1149 unsigned int index = hash & ((1 << tbl->nest) - 1); 1150 unsigned int size = tbl->size >> tbl->nest; 1151 union nested_table *ntbl; 1152 unsigned int shifted; 1153 unsigned int nhash; 1154 1155 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); 1156 hash >>= tbl->nest; 1157 nhash = index; 1158 shifted = tbl->nest; 1159 ntbl = nested_table_alloc(ht, &ntbl[index].table, 1160 size <= (1 << shift) ? shifted : 0, nhash); 1161 1162 while (ntbl && size > (1 << shift)) { 1163 index = hash & ((1 << shift) - 1); 1164 size >>= shift; 1165 hash >>= shift; 1166 nhash |= index << shifted; 1167 shifted += shift; 1168 ntbl = nested_table_alloc(ht, &ntbl[index].table, 1169 size <= (1 << shift) ? shifted : 0, 1170 nhash); 1171 } 1172 1173 if (!ntbl) 1174 return NULL; 1175 1176 return &ntbl[hash].bucket; 1177 1178 } 1179 EXPORT_SYMBOL_GPL(rht_bucket_nested_insert); 1180