17e1e7763SThomas Graf /* 27e1e7763SThomas Graf * Resizable, Scalable, Concurrent Hash Table 37e1e7763SThomas Graf * 47e1e7763SThomas Graf * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> 57e1e7763SThomas Graf * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 67e1e7763SThomas Graf * 77e1e7763SThomas Graf * Based on the following paper: 87e1e7763SThomas Graf * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf 97e1e7763SThomas Graf * 107e1e7763SThomas Graf * Code partially derived from nft_hash 117e1e7763SThomas Graf * 127e1e7763SThomas Graf * This program is free software; you can redistribute it and/or modify 137e1e7763SThomas Graf * it under the terms of the GNU General Public License version 2 as 147e1e7763SThomas Graf * published by the Free Software Foundation. 157e1e7763SThomas Graf */ 167e1e7763SThomas Graf 177e1e7763SThomas Graf #include <linux/kernel.h> 187e1e7763SThomas Graf #include <linux/init.h> 197e1e7763SThomas Graf #include <linux/log2.h> 207e1e7763SThomas Graf #include <linux/slab.h> 217e1e7763SThomas Graf #include <linux/vmalloc.h> 227e1e7763SThomas Graf #include <linux/mm.h> 2387545899SDaniel Borkmann #include <linux/jhash.h> 247e1e7763SThomas Graf #include <linux/random.h> 257e1e7763SThomas Graf #include <linux/rhashtable.h> 267e1e7763SThomas Graf 277e1e7763SThomas Graf #define HASH_DEFAULT_SIZE 64UL 287e1e7763SThomas Graf #define HASH_MIN_SIZE 4UL 2997defe1eSThomas Graf #define BUCKET_LOCKS_PER_CPU 128UL 3097defe1eSThomas Graf 31f89bd6f8SThomas Graf /* Base bits plus 1 bit for nulls marker */ 32f89bd6f8SThomas Graf #define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) 33f89bd6f8SThomas Graf 3497defe1eSThomas Graf enum { 3597defe1eSThomas Graf RHT_LOCK_NORMAL, 3697defe1eSThomas Graf RHT_LOCK_NESTED, 3797defe1eSThomas Graf RHT_LOCK_NESTED2, 3897defe1eSThomas Graf }; 3997defe1eSThomas Graf 4097defe1eSThomas Graf /* The bucket lock is selected based on the hash and protects mutations 4197defe1eSThomas Graf * on a group of hash buckets. 4297defe1eSThomas Graf * 4397defe1eSThomas Graf * IMPORTANT: When holding the bucket lock of both the old and new table 4497defe1eSThomas Graf * during expansions and shrinking, the old bucket lock must always be 4597defe1eSThomas Graf * acquired first. 4697defe1eSThomas Graf */ 4797defe1eSThomas Graf static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash) 4897defe1eSThomas Graf { 4997defe1eSThomas Graf return &tbl->locks[hash & tbl->locks_mask]; 5097defe1eSThomas Graf } 517e1e7763SThomas Graf 527e1e7763SThomas Graf #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 5397defe1eSThomas Graf #define ASSERT_BUCKET_LOCK(TBL, HASH) \ 5497defe1eSThomas Graf BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH)) 557e1e7763SThomas Graf 567e1e7763SThomas Graf #ifdef CONFIG_PROVE_LOCKING 5797defe1eSThomas Graf int lockdep_rht_mutex_is_held(struct rhashtable *ht) 587e1e7763SThomas Graf { 5997defe1eSThomas Graf return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; 607e1e7763SThomas Graf } 617e1e7763SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 6288d6ed15SThomas Graf 6388d6ed15SThomas Graf int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 6488d6ed15SThomas Graf { 6597defe1eSThomas Graf spinlock_t *lock = bucket_lock(tbl, hash); 6697defe1eSThomas Graf 6797defe1eSThomas Graf return (debug_locks) ? lockdep_is_held(lock) : 1; 6888d6ed15SThomas Graf } 6988d6ed15SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 707e1e7763SThomas Graf #endif 717e1e7763SThomas Graf 72c91eee56SThomas Graf static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) 737e1e7763SThomas Graf { 747e1e7763SThomas Graf return (void *) he - ht->p.head_offset; 757e1e7763SThomas Graf } 767e1e7763SThomas Graf 778d24c0b4SThomas Graf static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) 787e1e7763SThomas Graf { 798d24c0b4SThomas Graf return hash & (tbl->size - 1); 807e1e7763SThomas Graf } 817e1e7763SThomas Graf 828d24c0b4SThomas Graf static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) 838d24c0b4SThomas Graf { 848d24c0b4SThomas Graf u32 hash; 858d24c0b4SThomas Graf 868d24c0b4SThomas Graf if (unlikely(!ht->p.key_len)) 878d24c0b4SThomas Graf hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); 888d24c0b4SThomas Graf else 898d24c0b4SThomas Graf hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, 908d24c0b4SThomas Graf ht->p.hash_rnd); 918d24c0b4SThomas Graf 92f89bd6f8SThomas Graf return hash >> HASH_RESERVED_SPACE; 938d24c0b4SThomas Graf } 948d24c0b4SThomas Graf 9597defe1eSThomas Graf static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) 967e1e7763SThomas Graf { 977e1e7763SThomas Graf struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 988d24c0b4SThomas Graf u32 hash; 997e1e7763SThomas Graf 1008d24c0b4SThomas Graf hash = ht->p.hashfn(key, len, ht->p.hash_rnd); 101f89bd6f8SThomas Graf hash >>= HASH_RESERVED_SPACE; 1028d24c0b4SThomas Graf 1038d24c0b4SThomas Graf return rht_bucket_index(tbl, hash); 1047e1e7763SThomas Graf } 1057e1e7763SThomas Graf 1067e1e7763SThomas Graf static u32 head_hashfn(const struct rhashtable *ht, 1078d24c0b4SThomas Graf const struct bucket_table *tbl, 1088d24c0b4SThomas Graf const struct rhash_head *he) 1097e1e7763SThomas Graf { 1108d24c0b4SThomas Graf return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); 1117e1e7763SThomas Graf } 1127e1e7763SThomas Graf 113b8e1943eSThomas Graf static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) 114b8e1943eSThomas Graf { 115b8e1943eSThomas Graf struct rhash_head __rcu **pprev; 116b8e1943eSThomas Graf 117b8e1943eSThomas Graf for (pprev = &tbl->buckets[n]; 118f89bd6f8SThomas Graf !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n)); 119b8e1943eSThomas Graf pprev = &rht_dereference_bucket(*pprev, tbl, n)->next) 120b8e1943eSThomas Graf ; 121b8e1943eSThomas Graf 122b8e1943eSThomas Graf return pprev; 123b8e1943eSThomas Graf } 124b8e1943eSThomas Graf 12597defe1eSThomas Graf static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) 12697defe1eSThomas Graf { 12797defe1eSThomas Graf unsigned int i, size; 12897defe1eSThomas Graf #if defined(CONFIG_PROVE_LOCKING) 12997defe1eSThomas Graf unsigned int nr_pcpus = 2; 13097defe1eSThomas Graf #else 13197defe1eSThomas Graf unsigned int nr_pcpus = num_possible_cpus(); 13297defe1eSThomas Graf #endif 13397defe1eSThomas Graf 13497defe1eSThomas Graf nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); 13597defe1eSThomas Graf size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); 13697defe1eSThomas Graf 13797defe1eSThomas Graf /* Never allocate more than one lock per bucket */ 13897defe1eSThomas Graf size = min_t(unsigned int, size, tbl->size); 13997defe1eSThomas Graf 14097defe1eSThomas Graf if (sizeof(spinlock_t) != 0) { 14197defe1eSThomas Graf #ifdef CONFIG_NUMA 14297defe1eSThomas Graf if (size * sizeof(spinlock_t) > PAGE_SIZE) 14397defe1eSThomas Graf tbl->locks = vmalloc(size * sizeof(spinlock_t)); 14497defe1eSThomas Graf else 14597defe1eSThomas Graf #endif 14697defe1eSThomas Graf tbl->locks = kmalloc_array(size, sizeof(spinlock_t), 14797defe1eSThomas Graf GFP_KERNEL); 14897defe1eSThomas Graf if (!tbl->locks) 14997defe1eSThomas Graf return -ENOMEM; 15097defe1eSThomas Graf for (i = 0; i < size; i++) 15197defe1eSThomas Graf spin_lock_init(&tbl->locks[i]); 15297defe1eSThomas Graf } 15397defe1eSThomas Graf tbl->locks_mask = size - 1; 15497defe1eSThomas Graf 15597defe1eSThomas Graf return 0; 15697defe1eSThomas Graf } 15797defe1eSThomas Graf 15897defe1eSThomas Graf static void bucket_table_free(const struct bucket_table *tbl) 15997defe1eSThomas Graf { 16097defe1eSThomas Graf if (tbl) 16197defe1eSThomas Graf kvfree(tbl->locks); 16297defe1eSThomas Graf 16397defe1eSThomas Graf kvfree(tbl); 16497defe1eSThomas Graf } 16597defe1eSThomas Graf 16697defe1eSThomas Graf static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 16797defe1eSThomas Graf size_t nbuckets) 1687e1e7763SThomas Graf { 1697e1e7763SThomas Graf struct bucket_table *tbl; 1707e1e7763SThomas Graf size_t size; 171f89bd6f8SThomas Graf int i; 1727e1e7763SThomas Graf 1737e1e7763SThomas Graf size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); 1746eba8224SThomas Graf tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1757e1e7763SThomas Graf if (tbl == NULL) 1767e1e7763SThomas Graf tbl = vzalloc(size); 1777e1e7763SThomas Graf 1787e1e7763SThomas Graf if (tbl == NULL) 1797e1e7763SThomas Graf return NULL; 1807e1e7763SThomas Graf 1817e1e7763SThomas Graf tbl->size = nbuckets; 1827e1e7763SThomas Graf 18397defe1eSThomas Graf if (alloc_bucket_locks(ht, tbl) < 0) { 18497defe1eSThomas Graf bucket_table_free(tbl); 18597defe1eSThomas Graf return NULL; 1867e1e7763SThomas Graf } 1877e1e7763SThomas Graf 188f89bd6f8SThomas Graf for (i = 0; i < nbuckets; i++) 189f89bd6f8SThomas Graf INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); 190f89bd6f8SThomas Graf 19197defe1eSThomas Graf return tbl; 1927e1e7763SThomas Graf } 1937e1e7763SThomas Graf 1947e1e7763SThomas Graf /** 1957e1e7763SThomas Graf * rht_grow_above_75 - returns true if nelems > 0.75 * table-size 1967e1e7763SThomas Graf * @ht: hash table 1977e1e7763SThomas Graf * @new_size: new table size 1987e1e7763SThomas Graf */ 1997e1e7763SThomas Graf bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) 2007e1e7763SThomas Graf { 2017e1e7763SThomas Graf /* Expand table when exceeding 75% load */ 20297defe1eSThomas Graf return atomic_read(&ht->nelems) > (new_size / 4 * 3); 2037e1e7763SThomas Graf } 2047e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rht_grow_above_75); 2057e1e7763SThomas Graf 2067e1e7763SThomas Graf /** 2077e1e7763SThomas Graf * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size 2087e1e7763SThomas Graf * @ht: hash table 2097e1e7763SThomas Graf * @new_size: new table size 2107e1e7763SThomas Graf */ 2117e1e7763SThomas Graf bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) 2127e1e7763SThomas Graf { 2137e1e7763SThomas Graf /* Shrink table beneath 30% load */ 21497defe1eSThomas Graf return atomic_read(&ht->nelems) < (new_size * 3 / 10); 2157e1e7763SThomas Graf } 2167e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rht_shrink_below_30); 2177e1e7763SThomas Graf 2187e1e7763SThomas Graf static void hashtable_chain_unzip(const struct rhashtable *ht, 2197e1e7763SThomas Graf const struct bucket_table *new_tbl, 22097defe1eSThomas Graf struct bucket_table *old_tbl, 22197defe1eSThomas Graf size_t old_hash) 2227e1e7763SThomas Graf { 2237e1e7763SThomas Graf struct rhash_head *he, *p, *next; 22497defe1eSThomas Graf spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL; 22597defe1eSThomas Graf unsigned int new_hash, new_hash2; 22697defe1eSThomas Graf 22797defe1eSThomas Graf ASSERT_BUCKET_LOCK(old_tbl, old_hash); 2287e1e7763SThomas Graf 2297e1e7763SThomas Graf /* Old bucket empty, no work needed. */ 23097defe1eSThomas Graf p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, 23197defe1eSThomas Graf old_hash); 232f89bd6f8SThomas Graf if (rht_is_a_nulls(p)) 2337e1e7763SThomas Graf return; 2347e1e7763SThomas Graf 23597defe1eSThomas Graf new_hash = new_hash2 = head_hashfn(ht, new_tbl, p); 23697defe1eSThomas Graf new_bucket_lock = bucket_lock(new_tbl, new_hash); 23797defe1eSThomas Graf 2387e1e7763SThomas Graf /* Advance the old bucket pointer one or more times until it 2397e1e7763SThomas Graf * reaches a node that doesn't hash to the same bucket as the 2407e1e7763SThomas Graf * previous node p. Call the previous node p; 2417e1e7763SThomas Graf */ 24297defe1eSThomas Graf rht_for_each_continue(he, p->next, old_tbl, old_hash) { 24397defe1eSThomas Graf new_hash2 = head_hashfn(ht, new_tbl, he); 24497defe1eSThomas Graf if (new_hash != new_hash2) 2457e1e7763SThomas Graf break; 2467e1e7763SThomas Graf p = he; 2477e1e7763SThomas Graf } 24897defe1eSThomas Graf rcu_assign_pointer(old_tbl->buckets[old_hash], p->next); 24997defe1eSThomas Graf 25097defe1eSThomas Graf spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); 25197defe1eSThomas Graf 25297defe1eSThomas Graf /* If we have encountered an entry that maps to a different bucket in 25397defe1eSThomas Graf * the new table, lock down that bucket as well as we might cut off 25497defe1eSThomas Graf * the end of the chain. 25597defe1eSThomas Graf */ 25697defe1eSThomas Graf new_bucket_lock2 = bucket_lock(new_tbl, new_hash); 25797defe1eSThomas Graf if (new_bucket_lock != new_bucket_lock2) 25897defe1eSThomas Graf spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2); 2597e1e7763SThomas Graf 2607e1e7763SThomas Graf /* Find the subsequent node which does hash to the same 2617e1e7763SThomas Graf * bucket as node P, or NULL if no such node exists. 2627e1e7763SThomas Graf */ 263f89bd6f8SThomas Graf INIT_RHT_NULLS_HEAD(next, ht, old_hash); 264f89bd6f8SThomas Graf if (!rht_is_a_nulls(he)) { 26597defe1eSThomas Graf rht_for_each_continue(he, he->next, old_tbl, old_hash) { 26697defe1eSThomas Graf if (head_hashfn(ht, new_tbl, he) == new_hash) { 2677e1e7763SThomas Graf next = he; 2687e1e7763SThomas Graf break; 2697e1e7763SThomas Graf } 2707e1e7763SThomas Graf } 2717e1e7763SThomas Graf } 2727e1e7763SThomas Graf 2737e1e7763SThomas Graf /* Set p's next pointer to that subsequent node pointer, 2747e1e7763SThomas Graf * bypassing the nodes which do not hash to p's bucket 2757e1e7763SThomas Graf */ 27697defe1eSThomas Graf rcu_assign_pointer(p->next, next); 27797defe1eSThomas Graf 27897defe1eSThomas Graf if (new_bucket_lock != new_bucket_lock2) 27997defe1eSThomas Graf spin_unlock_bh(new_bucket_lock2); 28097defe1eSThomas Graf spin_unlock_bh(new_bucket_lock); 28197defe1eSThomas Graf } 28297defe1eSThomas Graf 28397defe1eSThomas Graf static void link_old_to_new(struct bucket_table *new_tbl, 28497defe1eSThomas Graf unsigned int new_hash, struct rhash_head *entry) 28597defe1eSThomas Graf { 28697defe1eSThomas Graf spinlock_t *new_bucket_lock; 28797defe1eSThomas Graf 28897defe1eSThomas Graf new_bucket_lock = bucket_lock(new_tbl, new_hash); 28997defe1eSThomas Graf 29097defe1eSThomas Graf spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED); 29197defe1eSThomas Graf rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); 29297defe1eSThomas Graf spin_unlock_bh(new_bucket_lock); 2937e1e7763SThomas Graf } 2947e1e7763SThomas Graf 2957e1e7763SThomas Graf /** 2967e1e7763SThomas Graf * rhashtable_expand - Expand hash table while allowing concurrent lookups 2977e1e7763SThomas Graf * @ht: the hash table to expand 2987e1e7763SThomas Graf * 2997e1e7763SThomas Graf * A secondary bucket array is allocated and the hash entries are migrated 3007e1e7763SThomas Graf * while keeping them on both lists until the end of the RCU grace period. 3017e1e7763SThomas Graf * 3027e1e7763SThomas Graf * This function may only be called in a context where it is safe to call 3037e1e7763SThomas Graf * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 3047e1e7763SThomas Graf * 30597defe1eSThomas Graf * The caller must ensure that no concurrent resizing occurs by holding 30697defe1eSThomas Graf * ht->mutex. 30797defe1eSThomas Graf * 30897defe1eSThomas Graf * It is valid to have concurrent insertions and deletions protected by per 30997defe1eSThomas Graf * bucket locks or concurrent RCU protected lookups and traversals. 3107e1e7763SThomas Graf */ 3116eba8224SThomas Graf int rhashtable_expand(struct rhashtable *ht) 3127e1e7763SThomas Graf { 3137e1e7763SThomas Graf struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 3147e1e7763SThomas Graf struct rhash_head *he; 31597defe1eSThomas Graf spinlock_t *old_bucket_lock; 31697defe1eSThomas Graf unsigned int new_hash, old_hash; 31797defe1eSThomas Graf bool complete = false; 3187e1e7763SThomas Graf 3197e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 3207e1e7763SThomas Graf 3217e1e7763SThomas Graf if (ht->p.max_shift && ht->shift >= ht->p.max_shift) 3227e1e7763SThomas Graf return 0; 3237e1e7763SThomas Graf 32497defe1eSThomas Graf new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); 3257e1e7763SThomas Graf if (new_tbl == NULL) 3267e1e7763SThomas Graf return -ENOMEM; 3277e1e7763SThomas Graf 3287e1e7763SThomas Graf ht->shift++; 3297e1e7763SThomas Graf 33097defe1eSThomas Graf /* Make insertions go into the new, empty table right away. Deletions 33197defe1eSThomas Graf * and lookups will be attempted in both tables until we synchronize. 33297defe1eSThomas Graf * The synchronize_rcu() guarantees for the new table to be picked up 33397defe1eSThomas Graf * so no new additions go into the old table while we relink. 3347e1e7763SThomas Graf */ 33597defe1eSThomas Graf rcu_assign_pointer(ht->future_tbl, new_tbl); 33697defe1eSThomas Graf synchronize_rcu(); 33797defe1eSThomas Graf 33897defe1eSThomas Graf /* For each new bucket, search the corresponding old bucket for the 33997defe1eSThomas Graf * first entry that hashes to the new bucket, and link the end of 34097defe1eSThomas Graf * newly formed bucket chain (containing entries added to future 34197defe1eSThomas Graf * table) to that entry. Since all the entries which will end up in 34297defe1eSThomas Graf * the new bucket appear in the same old bucket, this constructs an 34397defe1eSThomas Graf * entirely valid new hash table, but with multiple buckets 34497defe1eSThomas Graf * "zipped" together into a single imprecise chain. 34597defe1eSThomas Graf */ 34697defe1eSThomas Graf for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { 34797defe1eSThomas Graf old_hash = rht_bucket_index(old_tbl, new_hash); 34897defe1eSThomas Graf old_bucket_lock = bucket_lock(old_tbl, old_hash); 34997defe1eSThomas Graf 35097defe1eSThomas Graf spin_lock_bh(old_bucket_lock); 35197defe1eSThomas Graf rht_for_each(he, old_tbl, old_hash) { 35297defe1eSThomas Graf if (head_hashfn(ht, new_tbl, he) == new_hash) { 35397defe1eSThomas Graf link_old_to_new(new_tbl, new_hash, he); 3547e1e7763SThomas Graf break; 3557e1e7763SThomas Graf } 3567e1e7763SThomas Graf } 35797defe1eSThomas Graf spin_unlock_bh(old_bucket_lock); 3587e1e7763SThomas Graf } 3597e1e7763SThomas Graf 3607e1e7763SThomas Graf /* Publish the new table pointer. Lookups may now traverse 3610c828f2fSHerbert Xu * the new table, but they will not benefit from any 3620c828f2fSHerbert Xu * additional efficiency until later steps unzip the buckets. 3637e1e7763SThomas Graf */ 3647e1e7763SThomas Graf rcu_assign_pointer(ht->tbl, new_tbl); 3657e1e7763SThomas Graf 3667e1e7763SThomas Graf /* Unzip interleaved hash chains */ 36797defe1eSThomas Graf while (!complete && !ht->being_destroyed) { 3687e1e7763SThomas Graf /* Wait for readers. All new readers will see the new 3697e1e7763SThomas Graf * table, and thus no references to the old table will 3707e1e7763SThomas Graf * remain. 3717e1e7763SThomas Graf */ 3727e1e7763SThomas Graf synchronize_rcu(); 3737e1e7763SThomas Graf 3747e1e7763SThomas Graf /* For each bucket in the old table (each of which 3757e1e7763SThomas Graf * contains items from multiple buckets of the new 3767e1e7763SThomas Graf * table): ... 3777e1e7763SThomas Graf */ 3787e1e7763SThomas Graf complete = true; 37997defe1eSThomas Graf for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { 380f89bd6f8SThomas Graf struct rhash_head *head; 381f89bd6f8SThomas Graf 38297defe1eSThomas Graf old_bucket_lock = bucket_lock(old_tbl, old_hash); 38397defe1eSThomas Graf spin_lock_bh(old_bucket_lock); 38497defe1eSThomas Graf 38597defe1eSThomas Graf hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash); 386f89bd6f8SThomas Graf head = rht_dereference_bucket(old_tbl->buckets[old_hash], 387f89bd6f8SThomas Graf old_tbl, old_hash); 388f89bd6f8SThomas Graf if (!rht_is_a_nulls(head)) 3897e1e7763SThomas Graf complete = false; 39097defe1eSThomas Graf 39197defe1eSThomas Graf spin_unlock_bh(old_bucket_lock); 3927e1e7763SThomas Graf } 39397defe1eSThomas Graf } 3947e1e7763SThomas Graf 3957e1e7763SThomas Graf bucket_table_free(old_tbl); 3967e1e7763SThomas Graf return 0; 3977e1e7763SThomas Graf } 3987e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_expand); 3997e1e7763SThomas Graf 4007e1e7763SThomas Graf /** 4017e1e7763SThomas Graf * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 4027e1e7763SThomas Graf * @ht: the hash table to shrink 4037e1e7763SThomas Graf * 4047e1e7763SThomas Graf * This function may only be called in a context where it is safe to call 4057e1e7763SThomas Graf * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 4067e1e7763SThomas Graf * 40797defe1eSThomas Graf * The caller must ensure that no concurrent resizing occurs by holding 40897defe1eSThomas Graf * ht->mutex. 40997defe1eSThomas Graf * 4107e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations take place. 4117e1e7763SThomas Graf * It is however valid to have concurrent lookups if they are RCU protected. 41297defe1eSThomas Graf * 41397defe1eSThomas Graf * It is valid to have concurrent insertions and deletions protected by per 41497defe1eSThomas Graf * bucket locks or concurrent RCU protected lookups and traversals. 4157e1e7763SThomas Graf */ 4166eba8224SThomas Graf int rhashtable_shrink(struct rhashtable *ht) 4177e1e7763SThomas Graf { 41897defe1eSThomas Graf struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); 41997defe1eSThomas Graf spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2; 42097defe1eSThomas Graf unsigned int new_hash; 4217e1e7763SThomas Graf 4227e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 4237e1e7763SThomas Graf 42494000176SYing Xue if (ht->shift <= ht->p.min_shift) 4257e1e7763SThomas Graf return 0; 4267e1e7763SThomas Graf 42797defe1eSThomas Graf new_tbl = bucket_table_alloc(ht, tbl->size / 2); 42897defe1eSThomas Graf if (new_tbl == NULL) 4297e1e7763SThomas Graf return -ENOMEM; 4307e1e7763SThomas Graf 43197defe1eSThomas Graf rcu_assign_pointer(ht->future_tbl, new_tbl); 43297defe1eSThomas Graf synchronize_rcu(); 4337e1e7763SThomas Graf 43497defe1eSThomas Graf /* Link the first entry in the old bucket to the end of the 43597defe1eSThomas Graf * bucket in the new table. As entries are concurrently being 43697defe1eSThomas Graf * added to the new table, lock down the new bucket. As we 43797defe1eSThomas Graf * always divide the size in half when shrinking, each bucket 43897defe1eSThomas Graf * in the new table maps to exactly two buckets in the old 43997defe1eSThomas Graf * table. 44097defe1eSThomas Graf * 44197defe1eSThomas Graf * As removals can occur concurrently on the old table, we need 44297defe1eSThomas Graf * to lock down both matching buckets in the old table. 4437e1e7763SThomas Graf */ 44497defe1eSThomas Graf for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { 44597defe1eSThomas Graf old_bucket_lock1 = bucket_lock(tbl, new_hash); 44697defe1eSThomas Graf old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size); 44797defe1eSThomas Graf new_bucket_lock = bucket_lock(new_tbl, new_hash); 4487e1e7763SThomas Graf 44997defe1eSThomas Graf spin_lock_bh(old_bucket_lock1); 45097defe1eSThomas Graf spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED); 45197defe1eSThomas Graf spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2); 45297defe1eSThomas Graf 45397defe1eSThomas Graf rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), 45497defe1eSThomas Graf tbl->buckets[new_hash]); 45597defe1eSThomas Graf rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), 45697defe1eSThomas Graf tbl->buckets[new_hash + new_tbl->size]); 45797defe1eSThomas Graf 45897defe1eSThomas Graf spin_unlock_bh(new_bucket_lock); 45997defe1eSThomas Graf spin_unlock_bh(old_bucket_lock2); 46097defe1eSThomas Graf spin_unlock_bh(old_bucket_lock1); 4617e1e7763SThomas Graf } 4627e1e7763SThomas Graf 4637e1e7763SThomas Graf /* Publish the new, valid hash table */ 46497defe1eSThomas Graf rcu_assign_pointer(ht->tbl, new_tbl); 46597defe1eSThomas Graf ht->shift--; 4667e1e7763SThomas Graf 4677e1e7763SThomas Graf /* Wait for readers. No new readers will have references to the 4687e1e7763SThomas Graf * old hash table. 4697e1e7763SThomas Graf */ 4707e1e7763SThomas Graf synchronize_rcu(); 4717e1e7763SThomas Graf 4727e1e7763SThomas Graf bucket_table_free(tbl); 4737e1e7763SThomas Graf 4747e1e7763SThomas Graf return 0; 4757e1e7763SThomas Graf } 4767e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_shrink); 4777e1e7763SThomas Graf 47897defe1eSThomas Graf static void rht_deferred_worker(struct work_struct *work) 47997defe1eSThomas Graf { 48097defe1eSThomas Graf struct rhashtable *ht; 48197defe1eSThomas Graf struct bucket_table *tbl; 48297defe1eSThomas Graf 48397defe1eSThomas Graf ht = container_of(work, struct rhashtable, run_work.work); 48497defe1eSThomas Graf mutex_lock(&ht->mutex); 48597defe1eSThomas Graf tbl = rht_dereference(ht->tbl, ht); 48697defe1eSThomas Graf 48797defe1eSThomas Graf if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) 48897defe1eSThomas Graf rhashtable_expand(ht); 48997defe1eSThomas Graf else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) 49097defe1eSThomas Graf rhashtable_shrink(ht); 49197defe1eSThomas Graf 49297defe1eSThomas Graf mutex_unlock(&ht->mutex); 49397defe1eSThomas Graf } 49497defe1eSThomas Graf 4957e1e7763SThomas Graf /** 4967e1e7763SThomas Graf * rhashtable_insert - insert object into hash hash table 4977e1e7763SThomas Graf * @ht: hash table 4987e1e7763SThomas Graf * @obj: pointer to hash head inside object 4997e1e7763SThomas Graf * 50097defe1eSThomas Graf * Will take a per bucket spinlock to protect against mutual mutations 50197defe1eSThomas Graf * on the same bucket. Multiple insertions may occur in parallel unless 50297defe1eSThomas Graf * they map to the same bucket lock. 5037e1e7763SThomas Graf * 50497defe1eSThomas Graf * It is safe to call this function from atomic context. 50597defe1eSThomas Graf * 50697defe1eSThomas Graf * Will trigger an automatic deferred table resizing if the size grows 50797defe1eSThomas Graf * beyond the watermark indicated by grow_decision() which can be passed 50897defe1eSThomas Graf * to rhashtable_init(). 5097e1e7763SThomas Graf */ 5106eba8224SThomas Graf void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) 5117e1e7763SThomas Graf { 51297defe1eSThomas Graf struct bucket_table *tbl; 513f89bd6f8SThomas Graf struct rhash_head *head; 51497defe1eSThomas Graf spinlock_t *lock; 51597defe1eSThomas Graf unsigned hash; 5167e1e7763SThomas Graf 51797defe1eSThomas Graf rcu_read_lock(); 5187e1e7763SThomas Graf 51997defe1eSThomas Graf tbl = rht_dereference_rcu(ht->future_tbl, ht); 5208d24c0b4SThomas Graf hash = head_hashfn(ht, tbl, obj); 52197defe1eSThomas Graf lock = bucket_lock(tbl, hash); 52297defe1eSThomas Graf 52397defe1eSThomas Graf spin_lock_bh(lock); 524f89bd6f8SThomas Graf head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); 525f89bd6f8SThomas Graf if (rht_is_a_nulls(head)) 526f89bd6f8SThomas Graf INIT_RHT_NULLS_HEAD(obj->next, ht, hash); 527f89bd6f8SThomas Graf else 528f89bd6f8SThomas Graf RCU_INIT_POINTER(obj->next, head); 529f89bd6f8SThomas Graf 5307e1e7763SThomas Graf rcu_assign_pointer(tbl->buckets[hash], obj); 53197defe1eSThomas Graf spin_unlock_bh(lock); 5327e1e7763SThomas Graf 53397defe1eSThomas Graf atomic_inc(&ht->nelems); 53497defe1eSThomas Graf 53597defe1eSThomas Graf /* Only grow the table if no resizing is currently in progress. */ 53697defe1eSThomas Graf if (ht->tbl != ht->future_tbl && 53797defe1eSThomas Graf ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) 53897defe1eSThomas Graf schedule_delayed_work(&ht->run_work, 0); 53997defe1eSThomas Graf 54097defe1eSThomas Graf rcu_read_unlock(); 5417e1e7763SThomas Graf } 5427e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_insert); 5437e1e7763SThomas Graf 5447e1e7763SThomas Graf /** 5457e1e7763SThomas Graf * rhashtable_remove - remove object from hash table 5467e1e7763SThomas Graf * @ht: hash table 5477e1e7763SThomas Graf * @obj: pointer to hash head inside object 5487e1e7763SThomas Graf * 5497e1e7763SThomas Graf * Since the hash chain is single linked, the removal operation needs to 5507e1e7763SThomas Graf * walk the bucket chain upon removal. The removal operation is thus 5517e1e7763SThomas Graf * considerable slow if the hash table is not correctly sized. 5527e1e7763SThomas Graf * 5537e1e7763SThomas Graf * Will automatically shrink the table via rhashtable_expand() if the the 5547e1e7763SThomas Graf * shrink_decision function specified at rhashtable_init() returns true. 5557e1e7763SThomas Graf * 5567e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations occur. It is 5577e1e7763SThomas Graf * however valid to have concurrent lookups if they are RCU protected. 5587e1e7763SThomas Graf */ 5596eba8224SThomas Graf bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) 5607e1e7763SThomas Graf { 56197defe1eSThomas Graf struct bucket_table *tbl; 5627e1e7763SThomas Graf struct rhash_head __rcu **pprev; 5637e1e7763SThomas Graf struct rhash_head *he; 56497defe1eSThomas Graf spinlock_t *lock; 56597defe1eSThomas Graf unsigned int hash; 5667e1e7763SThomas Graf 56797defe1eSThomas Graf rcu_read_lock(); 56897defe1eSThomas Graf tbl = rht_dereference_rcu(ht->tbl, ht); 56997defe1eSThomas Graf hash = head_hashfn(ht, tbl, obj); 5707e1e7763SThomas Graf 57197defe1eSThomas Graf lock = bucket_lock(tbl, hash); 57297defe1eSThomas Graf spin_lock_bh(lock); 5737e1e7763SThomas Graf 57497defe1eSThomas Graf restart: 57597defe1eSThomas Graf pprev = &tbl->buckets[hash]; 57697defe1eSThomas Graf rht_for_each(he, tbl, hash) { 5777e1e7763SThomas Graf if (he != obj) { 5787e1e7763SThomas Graf pprev = &he->next; 5797e1e7763SThomas Graf continue; 5807e1e7763SThomas Graf } 5817e1e7763SThomas Graf 58297defe1eSThomas Graf rcu_assign_pointer(*pprev, obj->next); 58397defe1eSThomas Graf atomic_dec(&ht->nelems); 584897362e4SThomas Graf 58597defe1eSThomas Graf spin_unlock_bh(lock); 58697defe1eSThomas Graf 58797defe1eSThomas Graf if (ht->tbl != ht->future_tbl && 58897defe1eSThomas Graf ht->p.shrink_decision && 589897362e4SThomas Graf ht->p.shrink_decision(ht, tbl->size)) 59097defe1eSThomas Graf schedule_delayed_work(&ht->run_work, 0); 59197defe1eSThomas Graf 59297defe1eSThomas Graf rcu_read_unlock(); 593897362e4SThomas Graf 5947e1e7763SThomas Graf return true; 5957e1e7763SThomas Graf } 5967e1e7763SThomas Graf 59797defe1eSThomas Graf if (tbl != rht_dereference_rcu(ht->tbl, ht)) { 59897defe1eSThomas Graf spin_unlock_bh(lock); 59997defe1eSThomas Graf 60097defe1eSThomas Graf tbl = rht_dereference_rcu(ht->tbl, ht); 60197defe1eSThomas Graf hash = head_hashfn(ht, tbl, obj); 60297defe1eSThomas Graf 60397defe1eSThomas Graf lock = bucket_lock(tbl, hash); 60497defe1eSThomas Graf spin_lock_bh(lock); 60597defe1eSThomas Graf goto restart; 60697defe1eSThomas Graf } 60797defe1eSThomas Graf 60897defe1eSThomas Graf spin_unlock_bh(lock); 60997defe1eSThomas Graf rcu_read_unlock(); 61097defe1eSThomas Graf 6117e1e7763SThomas Graf return false; 6127e1e7763SThomas Graf } 6137e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_remove); 6147e1e7763SThomas Graf 615*efb975a6SYing Xue struct rhashtable_compare_arg { 616*efb975a6SYing Xue struct rhashtable *ht; 617*efb975a6SYing Xue const void *key; 618*efb975a6SYing Xue }; 619*efb975a6SYing Xue 620*efb975a6SYing Xue static bool rhashtable_compare(void *ptr, void *arg) 621*efb975a6SYing Xue { 622*efb975a6SYing Xue struct rhashtable_compare_arg *x = arg; 623*efb975a6SYing Xue struct rhashtable *ht = x->ht; 624*efb975a6SYing Xue 625*efb975a6SYing Xue return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len); 626*efb975a6SYing Xue } 627*efb975a6SYing Xue 6287e1e7763SThomas Graf /** 6297e1e7763SThomas Graf * rhashtable_lookup - lookup key in hash table 6307e1e7763SThomas Graf * @ht: hash table 6317e1e7763SThomas Graf * @key: pointer to key 6327e1e7763SThomas Graf * 6337e1e7763SThomas Graf * Computes the hash value for the key and traverses the bucket chain looking 6347e1e7763SThomas Graf * for a entry with an identical key. The first matching entry is returned. 6357e1e7763SThomas Graf * 6367e1e7763SThomas Graf * This lookup function may only be used for fixed key hash table (key_len 6377e1e7763SThomas Graf * paramter set). It will BUG() if used inappropriately. 6387e1e7763SThomas Graf * 63997defe1eSThomas Graf * Lookups may occur in parallel with hashtable mutations and resizing. 6407e1e7763SThomas Graf */ 64197defe1eSThomas Graf void *rhashtable_lookup(struct rhashtable *ht, const void *key) 6427e1e7763SThomas Graf { 643*efb975a6SYing Xue struct rhashtable_compare_arg arg = { 644*efb975a6SYing Xue .ht = ht, 645*efb975a6SYing Xue .key = key, 646*efb975a6SYing Xue }; 6477e1e7763SThomas Graf 6487e1e7763SThomas Graf BUG_ON(!ht->p.key_len); 6497e1e7763SThomas Graf 650*efb975a6SYing Xue return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg); 6517e1e7763SThomas Graf } 6527e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_lookup); 6537e1e7763SThomas Graf 6547e1e7763SThomas Graf /** 6557e1e7763SThomas Graf * rhashtable_lookup_compare - search hash table with compare function 6567e1e7763SThomas Graf * @ht: hash table 6578d24c0b4SThomas Graf * @key: the pointer to the key 6587e1e7763SThomas Graf * @compare: compare function, must return true on match 6597e1e7763SThomas Graf * @arg: argument passed on to compare function 6607e1e7763SThomas Graf * 6617e1e7763SThomas Graf * Traverses the bucket chain behind the provided hash value and calls the 6627e1e7763SThomas Graf * specified compare function for each entry. 6637e1e7763SThomas Graf * 66497defe1eSThomas Graf * Lookups may occur in parallel with hashtable mutations and resizing. 6657e1e7763SThomas Graf * 6667e1e7763SThomas Graf * Returns the first entry on which the compare function returned true. 6677e1e7763SThomas Graf */ 66897defe1eSThomas Graf void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, 6697e1e7763SThomas Graf bool (*compare)(void *, void *), void *arg) 6707e1e7763SThomas Graf { 67197defe1eSThomas Graf const struct bucket_table *tbl, *old_tbl; 6727e1e7763SThomas Graf struct rhash_head *he; 6738d24c0b4SThomas Graf u32 hash; 6747e1e7763SThomas Graf 67597defe1eSThomas Graf rcu_read_lock(); 67697defe1eSThomas Graf 67797defe1eSThomas Graf old_tbl = rht_dereference_rcu(ht->tbl, ht); 67897defe1eSThomas Graf tbl = rht_dereference_rcu(ht->future_tbl, ht); 6798d24c0b4SThomas Graf hash = key_hashfn(ht, key, ht->p.key_len); 68097defe1eSThomas Graf restart: 68197defe1eSThomas Graf rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) { 6827e1e7763SThomas Graf if (!compare(rht_obj(ht, he), arg)) 6837e1e7763SThomas Graf continue; 68497defe1eSThomas Graf rcu_read_unlock(); 685a4b18cdaSThomas Graf return rht_obj(ht, he); 6867e1e7763SThomas Graf } 6877e1e7763SThomas Graf 68897defe1eSThomas Graf if (unlikely(tbl != old_tbl)) { 68997defe1eSThomas Graf tbl = old_tbl; 69097defe1eSThomas Graf goto restart; 69197defe1eSThomas Graf } 69297defe1eSThomas Graf rcu_read_unlock(); 69397defe1eSThomas Graf 6947e1e7763SThomas Graf return NULL; 6957e1e7763SThomas Graf } 6967e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); 6977e1e7763SThomas Graf 69894000176SYing Xue static size_t rounded_hashtable_size(struct rhashtable_params *params) 6997e1e7763SThomas Graf { 70094000176SYing Xue return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 70194000176SYing Xue 1UL << params->min_shift); 7027e1e7763SThomas Graf } 7037e1e7763SThomas Graf 7047e1e7763SThomas Graf /** 7057e1e7763SThomas Graf * rhashtable_init - initialize a new hash table 7067e1e7763SThomas Graf * @ht: hash table to be initialized 7077e1e7763SThomas Graf * @params: configuration parameters 7087e1e7763SThomas Graf * 7097e1e7763SThomas Graf * Initializes a new hash table based on the provided configuration 7107e1e7763SThomas Graf * parameters. A table can be configured either with a variable or 7117e1e7763SThomas Graf * fixed length key: 7127e1e7763SThomas Graf * 7137e1e7763SThomas Graf * Configuration Example 1: Fixed length keys 7147e1e7763SThomas Graf * struct test_obj { 7157e1e7763SThomas Graf * int key; 7167e1e7763SThomas Graf * void * my_member; 7177e1e7763SThomas Graf * struct rhash_head node; 7187e1e7763SThomas Graf * }; 7197e1e7763SThomas Graf * 7207e1e7763SThomas Graf * struct rhashtable_params params = { 7217e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node), 7227e1e7763SThomas Graf * .key_offset = offsetof(struct test_obj, key), 7237e1e7763SThomas Graf * .key_len = sizeof(int), 72487545899SDaniel Borkmann * .hashfn = jhash, 725f89bd6f8SThomas Graf * .nulls_base = (1U << RHT_BASE_SHIFT), 7267e1e7763SThomas Graf * }; 7277e1e7763SThomas Graf * 7287e1e7763SThomas Graf * Configuration Example 2: Variable length keys 7297e1e7763SThomas Graf * struct test_obj { 7307e1e7763SThomas Graf * [...] 7317e1e7763SThomas Graf * struct rhash_head node; 7327e1e7763SThomas Graf * }; 7337e1e7763SThomas Graf * 7347e1e7763SThomas Graf * u32 my_hash_fn(const void *data, u32 seed) 7357e1e7763SThomas Graf * { 7367e1e7763SThomas Graf * struct test_obj *obj = data; 7377e1e7763SThomas Graf * 7387e1e7763SThomas Graf * return [... hash ...]; 7397e1e7763SThomas Graf * } 7407e1e7763SThomas Graf * 7417e1e7763SThomas Graf * struct rhashtable_params params = { 7427e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node), 74387545899SDaniel Borkmann * .hashfn = jhash, 7447e1e7763SThomas Graf * .obj_hashfn = my_hash_fn, 7457e1e7763SThomas Graf * }; 7467e1e7763SThomas Graf */ 7477e1e7763SThomas Graf int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) 7487e1e7763SThomas Graf { 7497e1e7763SThomas Graf struct bucket_table *tbl; 7507e1e7763SThomas Graf size_t size; 7517e1e7763SThomas Graf 7527e1e7763SThomas Graf size = HASH_DEFAULT_SIZE; 7537e1e7763SThomas Graf 7547e1e7763SThomas Graf if ((params->key_len && !params->hashfn) || 7557e1e7763SThomas Graf (!params->key_len && !params->obj_hashfn)) 7567e1e7763SThomas Graf return -EINVAL; 7577e1e7763SThomas Graf 758f89bd6f8SThomas Graf if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) 759f89bd6f8SThomas Graf return -EINVAL; 760f89bd6f8SThomas Graf 76194000176SYing Xue params->min_shift = max_t(size_t, params->min_shift, 76294000176SYing Xue ilog2(HASH_MIN_SIZE)); 76394000176SYing Xue 7647e1e7763SThomas Graf if (params->nelem_hint) 76594000176SYing Xue size = rounded_hashtable_size(params); 7667e1e7763SThomas Graf 76797defe1eSThomas Graf memset(ht, 0, sizeof(*ht)); 76897defe1eSThomas Graf mutex_init(&ht->mutex); 76997defe1eSThomas Graf memcpy(&ht->p, params, sizeof(*params)); 77097defe1eSThomas Graf 77197defe1eSThomas Graf if (params->locks_mul) 77297defe1eSThomas Graf ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); 77397defe1eSThomas Graf else 77497defe1eSThomas Graf ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; 77597defe1eSThomas Graf 77697defe1eSThomas Graf tbl = bucket_table_alloc(ht, size); 7777e1e7763SThomas Graf if (tbl == NULL) 7787e1e7763SThomas Graf return -ENOMEM; 7797e1e7763SThomas Graf 7807e1e7763SThomas Graf ht->shift = ilog2(tbl->size); 7817e1e7763SThomas Graf RCU_INIT_POINTER(ht->tbl, tbl); 78297defe1eSThomas Graf RCU_INIT_POINTER(ht->future_tbl, tbl); 7837e1e7763SThomas Graf 7847e1e7763SThomas Graf if (!ht->p.hash_rnd) 7857e1e7763SThomas Graf get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); 7867e1e7763SThomas Graf 78797defe1eSThomas Graf if (ht->p.grow_decision || ht->p.shrink_decision) 78897defe1eSThomas Graf INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker); 78997defe1eSThomas Graf 7907e1e7763SThomas Graf return 0; 7917e1e7763SThomas Graf } 7927e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_init); 7937e1e7763SThomas Graf 7947e1e7763SThomas Graf /** 7957e1e7763SThomas Graf * rhashtable_destroy - destroy hash table 7967e1e7763SThomas Graf * @ht: the hash table to destroy 7977e1e7763SThomas Graf * 798ae82ddcfSPablo Neira Ayuso * Frees the bucket array. This function is not rcu safe, therefore the caller 799ae82ddcfSPablo Neira Ayuso * has to make sure that no resizing may happen by unpublishing the hashtable 800ae82ddcfSPablo Neira Ayuso * and waiting for the quiescent cycle before releasing the bucket array. 8017e1e7763SThomas Graf */ 80297defe1eSThomas Graf void rhashtable_destroy(struct rhashtable *ht) 8037e1e7763SThomas Graf { 80497defe1eSThomas Graf ht->being_destroyed = true; 80597defe1eSThomas Graf 80697defe1eSThomas Graf mutex_lock(&ht->mutex); 80797defe1eSThomas Graf 80897defe1eSThomas Graf cancel_delayed_work(&ht->run_work); 80997defe1eSThomas Graf bucket_table_free(rht_dereference(ht->tbl, ht)); 81097defe1eSThomas Graf 81197defe1eSThomas Graf mutex_unlock(&ht->mutex); 8127e1e7763SThomas Graf } 8137e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_destroy); 8147e1e7763SThomas Graf 8157e1e7763SThomas Graf /************************************************************************** 8167e1e7763SThomas Graf * Self Test 8177e1e7763SThomas Graf **************************************************************************/ 8187e1e7763SThomas Graf 8197e1e7763SThomas Graf #ifdef CONFIG_TEST_RHASHTABLE 8207e1e7763SThomas Graf 8217e1e7763SThomas Graf #define TEST_HT_SIZE 8 8227e1e7763SThomas Graf #define TEST_ENTRIES 2048 8237e1e7763SThomas Graf #define TEST_PTR ((void *) 0xdeadbeef) 8247e1e7763SThomas Graf #define TEST_NEXPANDS 4 8257e1e7763SThomas Graf 8267e1e7763SThomas Graf struct test_obj { 8277e1e7763SThomas Graf void *ptr; 8287e1e7763SThomas Graf int value; 8297e1e7763SThomas Graf struct rhash_head node; 8307e1e7763SThomas Graf }; 8317e1e7763SThomas Graf 8327e1e7763SThomas Graf static int __init test_rht_lookup(struct rhashtable *ht) 8337e1e7763SThomas Graf { 8347e1e7763SThomas Graf unsigned int i; 8357e1e7763SThomas Graf 8367e1e7763SThomas Graf for (i = 0; i < TEST_ENTRIES * 2; i++) { 8377e1e7763SThomas Graf struct test_obj *obj; 8387e1e7763SThomas Graf bool expected = !(i % 2); 8397e1e7763SThomas Graf u32 key = i; 8407e1e7763SThomas Graf 8417e1e7763SThomas Graf obj = rhashtable_lookup(ht, &key); 8427e1e7763SThomas Graf 8437e1e7763SThomas Graf if (expected && !obj) { 8447e1e7763SThomas Graf pr_warn("Test failed: Could not find key %u\n", key); 8457e1e7763SThomas Graf return -ENOENT; 8467e1e7763SThomas Graf } else if (!expected && obj) { 8477e1e7763SThomas Graf pr_warn("Test failed: Unexpected entry found for key %u\n", 8487e1e7763SThomas Graf key); 8497e1e7763SThomas Graf return -EEXIST; 8507e1e7763SThomas Graf } else if (expected && obj) { 8517e1e7763SThomas Graf if (obj->ptr != TEST_PTR || obj->value != i) { 8527e1e7763SThomas Graf pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", 8537e1e7763SThomas Graf obj->ptr, TEST_PTR, obj->value, i); 8547e1e7763SThomas Graf return -EINVAL; 8557e1e7763SThomas Graf } 8567e1e7763SThomas Graf } 8577e1e7763SThomas Graf } 8587e1e7763SThomas Graf 8597e1e7763SThomas Graf return 0; 8607e1e7763SThomas Graf } 8617e1e7763SThomas Graf 8623e7b2ec4SThomas Graf static void test_bucket_stats(struct rhashtable *ht, bool quiet) 8637e1e7763SThomas Graf { 8643e7b2ec4SThomas Graf unsigned int cnt, rcu_cnt, i, total = 0; 86588d6ed15SThomas Graf struct rhash_head *pos; 8667e1e7763SThomas Graf struct test_obj *obj; 8673e7b2ec4SThomas Graf struct bucket_table *tbl; 8687e1e7763SThomas Graf 8693e7b2ec4SThomas Graf tbl = rht_dereference_rcu(ht->tbl, ht); 8707e1e7763SThomas Graf for (i = 0; i < tbl->size; i++) { 8713e7b2ec4SThomas Graf rcu_cnt = cnt = 0; 8727e1e7763SThomas Graf 8737e1e7763SThomas Graf if (!quiet) 8747e1e7763SThomas Graf pr_info(" [%#4x/%zu]", i, tbl->size); 8757e1e7763SThomas Graf 87688d6ed15SThomas Graf rht_for_each_entry_rcu(obj, pos, tbl, i, node) { 8777e1e7763SThomas Graf cnt++; 8787e1e7763SThomas Graf total++; 8797e1e7763SThomas Graf if (!quiet) 8807e1e7763SThomas Graf pr_cont(" [%p],", obj); 8817e1e7763SThomas Graf } 8827e1e7763SThomas Graf 88388d6ed15SThomas Graf rht_for_each_entry_rcu(obj, pos, tbl, i, node) 8843e7b2ec4SThomas Graf rcu_cnt++; 8853e7b2ec4SThomas Graf 8863e7b2ec4SThomas Graf if (rcu_cnt != cnt) 8873e7b2ec4SThomas Graf pr_warn("Test failed: Chain count mismach %d != %d", 8883e7b2ec4SThomas Graf cnt, rcu_cnt); 8893e7b2ec4SThomas Graf 8907e1e7763SThomas Graf if (!quiet) 8917e1e7763SThomas Graf pr_cont("\n [%#x] first element: %p, chain length: %u\n", 8927e1e7763SThomas Graf i, tbl->buckets[i], cnt); 8937e1e7763SThomas Graf } 8947e1e7763SThomas Graf 89597defe1eSThomas Graf pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n", 89697defe1eSThomas Graf total, atomic_read(&ht->nelems), TEST_ENTRIES); 8973e7b2ec4SThomas Graf 89897defe1eSThomas Graf if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES) 8993e7b2ec4SThomas Graf pr_warn("Test failed: Total count mismatch ^^^"); 9007e1e7763SThomas Graf } 9017e1e7763SThomas Graf 9027e1e7763SThomas Graf static int __init test_rhashtable(struct rhashtable *ht) 9037e1e7763SThomas Graf { 9047e1e7763SThomas Graf struct bucket_table *tbl; 90588d6ed15SThomas Graf struct test_obj *obj; 90688d6ed15SThomas Graf struct rhash_head *pos, *next; 9077e1e7763SThomas Graf int err; 9087e1e7763SThomas Graf unsigned int i; 9097e1e7763SThomas Graf 9107e1e7763SThomas Graf /* 9117e1e7763SThomas Graf * Insertion Test: 9127e1e7763SThomas Graf * Insert TEST_ENTRIES into table with all keys even numbers 9137e1e7763SThomas Graf */ 9147e1e7763SThomas Graf pr_info(" Adding %d keys\n", TEST_ENTRIES); 9157e1e7763SThomas Graf for (i = 0; i < TEST_ENTRIES; i++) { 9167e1e7763SThomas Graf struct test_obj *obj; 9177e1e7763SThomas Graf 9187e1e7763SThomas Graf obj = kzalloc(sizeof(*obj), GFP_KERNEL); 9197e1e7763SThomas Graf if (!obj) { 9207e1e7763SThomas Graf err = -ENOMEM; 9217e1e7763SThomas Graf goto error; 9227e1e7763SThomas Graf } 9237e1e7763SThomas Graf 9247e1e7763SThomas Graf obj->ptr = TEST_PTR; 9257e1e7763SThomas Graf obj->value = i * 2; 9267e1e7763SThomas Graf 9276eba8224SThomas Graf rhashtable_insert(ht, &obj->node); 9287e1e7763SThomas Graf } 9297e1e7763SThomas Graf 9307e1e7763SThomas Graf rcu_read_lock(); 9313e7b2ec4SThomas Graf test_bucket_stats(ht, true); 9327e1e7763SThomas Graf test_rht_lookup(ht); 9337e1e7763SThomas Graf rcu_read_unlock(); 9347e1e7763SThomas Graf 9357e1e7763SThomas Graf for (i = 0; i < TEST_NEXPANDS; i++) { 9367e1e7763SThomas Graf pr_info(" Table expansion iteration %u...\n", i); 93797defe1eSThomas Graf mutex_lock(&ht->mutex); 9386eba8224SThomas Graf rhashtable_expand(ht); 93997defe1eSThomas Graf mutex_unlock(&ht->mutex); 9407e1e7763SThomas Graf 9417e1e7763SThomas Graf rcu_read_lock(); 9427e1e7763SThomas Graf pr_info(" Verifying lookups...\n"); 9437e1e7763SThomas Graf test_rht_lookup(ht); 9447e1e7763SThomas Graf rcu_read_unlock(); 9457e1e7763SThomas Graf } 9467e1e7763SThomas Graf 9477e1e7763SThomas Graf for (i = 0; i < TEST_NEXPANDS; i++) { 9487e1e7763SThomas Graf pr_info(" Table shrinkage iteration %u...\n", i); 94997defe1eSThomas Graf mutex_lock(&ht->mutex); 9506eba8224SThomas Graf rhashtable_shrink(ht); 95197defe1eSThomas Graf mutex_unlock(&ht->mutex); 9527e1e7763SThomas Graf 9537e1e7763SThomas Graf rcu_read_lock(); 9547e1e7763SThomas Graf pr_info(" Verifying lookups...\n"); 9557e1e7763SThomas Graf test_rht_lookup(ht); 9567e1e7763SThomas Graf rcu_read_unlock(); 9577e1e7763SThomas Graf } 9587e1e7763SThomas Graf 9593e7b2ec4SThomas Graf rcu_read_lock(); 9603e7b2ec4SThomas Graf test_bucket_stats(ht, true); 9613e7b2ec4SThomas Graf rcu_read_unlock(); 9623e7b2ec4SThomas Graf 9637e1e7763SThomas Graf pr_info(" Deleting %d keys\n", TEST_ENTRIES); 9647e1e7763SThomas Graf for (i = 0; i < TEST_ENTRIES; i++) { 9657e1e7763SThomas Graf u32 key = i * 2; 9667e1e7763SThomas Graf 9677e1e7763SThomas Graf obj = rhashtable_lookup(ht, &key); 9687e1e7763SThomas Graf BUG_ON(!obj); 9697e1e7763SThomas Graf 9706eba8224SThomas Graf rhashtable_remove(ht, &obj->node); 9717e1e7763SThomas Graf kfree(obj); 9727e1e7763SThomas Graf } 9737e1e7763SThomas Graf 9747e1e7763SThomas Graf return 0; 9757e1e7763SThomas Graf 9767e1e7763SThomas Graf error: 9777e1e7763SThomas Graf tbl = rht_dereference_rcu(ht->tbl, ht); 9787e1e7763SThomas Graf for (i = 0; i < tbl->size; i++) 97988d6ed15SThomas Graf rht_for_each_entry_safe(obj, pos, next, tbl, i, node) 9807e1e7763SThomas Graf kfree(obj); 9817e1e7763SThomas Graf 9827e1e7763SThomas Graf return err; 9837e1e7763SThomas Graf } 9847e1e7763SThomas Graf 9857e1e7763SThomas Graf static int __init test_rht_init(void) 9867e1e7763SThomas Graf { 9877e1e7763SThomas Graf struct rhashtable ht; 9887e1e7763SThomas Graf struct rhashtable_params params = { 9897e1e7763SThomas Graf .nelem_hint = TEST_HT_SIZE, 9907e1e7763SThomas Graf .head_offset = offsetof(struct test_obj, node), 9917e1e7763SThomas Graf .key_offset = offsetof(struct test_obj, value), 9927e1e7763SThomas Graf .key_len = sizeof(int), 99387545899SDaniel Borkmann .hashfn = jhash, 994f89bd6f8SThomas Graf .nulls_base = (3U << RHT_BASE_SHIFT), 9957e1e7763SThomas Graf .grow_decision = rht_grow_above_75, 9967e1e7763SThomas Graf .shrink_decision = rht_shrink_below_30, 9977e1e7763SThomas Graf }; 9987e1e7763SThomas Graf int err; 9997e1e7763SThomas Graf 10007e1e7763SThomas Graf pr_info("Running resizable hashtable tests...\n"); 10017e1e7763SThomas Graf 10027e1e7763SThomas Graf err = rhashtable_init(&ht, ¶ms); 10037e1e7763SThomas Graf if (err < 0) { 10047e1e7763SThomas Graf pr_warn("Test failed: Unable to initialize hashtable: %d\n", 10057e1e7763SThomas Graf err); 10067e1e7763SThomas Graf return err; 10077e1e7763SThomas Graf } 10087e1e7763SThomas Graf 10097e1e7763SThomas Graf err = test_rhashtable(&ht); 10107e1e7763SThomas Graf 10117e1e7763SThomas Graf rhashtable_destroy(&ht); 10127e1e7763SThomas Graf 10137e1e7763SThomas Graf return err; 10147e1e7763SThomas Graf } 10157e1e7763SThomas Graf 10167e1e7763SThomas Graf subsys_initcall(test_rht_init); 10177e1e7763SThomas Graf 10187e1e7763SThomas Graf #endif /* CONFIG_TEST_RHASHTABLE */ 1019