1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 27e1e7763SThomas Graf /* 37e1e7763SThomas Graf * Resizable, Scalable, Concurrent Hash Table 47e1e7763SThomas Graf * 502fd97c3SHerbert Xu * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> 6a5ec68e3SThomas Graf * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> 77e1e7763SThomas Graf * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 87e1e7763SThomas Graf * 97e1e7763SThomas Graf * Code partially derived from nft_hash 1002fd97c3SHerbert Xu * Rewritten with rehash code from br_multicast plus single list 1102fd97c3SHerbert Xu * pointer as suggested by Josh Triplett 127e1e7763SThomas Graf */ 137e1e7763SThomas Graf 1407ee0722SHerbert Xu #include <linux/atomic.h> 157e1e7763SThomas Graf #include <linux/kernel.h> 167e1e7763SThomas Graf #include <linux/init.h> 177e1e7763SThomas Graf #include <linux/log2.h> 185beb5c90SEric Dumazet #include <linux/sched.h> 19b2d09103SIngo Molnar #include <linux/rculist.h> 207e1e7763SThomas Graf #include <linux/slab.h> 217e1e7763SThomas Graf #include <linux/vmalloc.h> 227e1e7763SThomas Graf #include <linux/mm.h> 2387545899SDaniel Borkmann #include <linux/jhash.h> 247e1e7763SThomas Graf #include <linux/random.h> 257e1e7763SThomas Graf #include <linux/rhashtable.h> 2661d7b097SStephen Rothwell #include <linux/err.h> 276d795413SHauke Mehrtens #include <linux/export.h> 287e1e7763SThomas Graf 297e1e7763SThomas Graf #define HASH_DEFAULT_SIZE 64UL 30c2e213cfSHerbert Xu #define HASH_MIN_SIZE 4U 3197defe1eSThomas Graf 32da20420fSHerbert Xu union nested_table { 33da20420fSHerbert Xu union nested_table __rcu *table; 34ce9b362bSHerbert Xu struct rhash_lock_head __rcu *bucket; 35da20420fSHerbert Xu }; 36da20420fSHerbert Xu 37988dfbd7SHerbert Xu static u32 head_hashfn(struct rhashtable *ht, 388d24c0b4SThomas Graf const struct bucket_table *tbl, 398d24c0b4SThomas Graf const struct rhash_head *he) 407e1e7763SThomas Graf { 4102fd97c3SHerbert Xu return rht_head_hashfn(ht, tbl, he, ht->p); 427e1e7763SThomas Graf } 437e1e7763SThomas Graf 44a03eaec0SThomas Graf #ifdef CONFIG_PROVE_LOCKING 45a03eaec0SThomas Graf #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 46a03eaec0SThomas Graf 47a03eaec0SThomas Graf int lockdep_rht_mutex_is_held(struct rhashtable *ht) 48a03eaec0SThomas Graf { 49a03eaec0SThomas Graf return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; 50a03eaec0SThomas Graf } 51a03eaec0SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 52a03eaec0SThomas Graf 53a03eaec0SThomas Graf int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) 54a03eaec0SThomas Graf { 558f0db018SNeilBrown if (!debug_locks) 568f0db018SNeilBrown return 1; 578f0db018SNeilBrown if (unlikely(tbl->nest)) 588f0db018SNeilBrown return 1; 59ca0b709dSNeilBrown return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); 60a03eaec0SThomas Graf } 61a03eaec0SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); 62a03eaec0SThomas Graf #else 63a03eaec0SThomas Graf #define ASSERT_RHT_MUTEX(HT) 64a03eaec0SThomas Graf #endif 65a03eaec0SThomas Graf 664a3084aaSHerbert Xu static inline union nested_table *nested_table_top( 674a3084aaSHerbert Xu const struct bucket_table *tbl) 684a3084aaSHerbert Xu { 694a3084aaSHerbert Xu /* The top-level bucket entry does not need RCU protection 704a3084aaSHerbert Xu * because it's set at the same time as tbl->nest. 714a3084aaSHerbert Xu */ 724a3084aaSHerbert Xu return (void *)rcu_dereference_protected(tbl->buckets[0], 1); 734a3084aaSHerbert Xu } 744a3084aaSHerbert Xu 75da20420fSHerbert Xu static void nested_table_free(union nested_table *ntbl, unsigned int size) 76da20420fSHerbert Xu { 77da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 78da20420fSHerbert Xu const unsigned int len = 1 << shift; 79da20420fSHerbert Xu unsigned int i; 80da20420fSHerbert Xu 814a3084aaSHerbert Xu ntbl = rcu_dereference_protected(ntbl->table, 1); 82da20420fSHerbert Xu if (!ntbl) 83da20420fSHerbert Xu return; 84da20420fSHerbert Xu 85da20420fSHerbert Xu if (size > len) { 86da20420fSHerbert Xu size >>= shift; 87da20420fSHerbert Xu for (i = 0; i < len; i++) 88da20420fSHerbert Xu nested_table_free(ntbl + i, size); 89da20420fSHerbert Xu } 90da20420fSHerbert Xu 91da20420fSHerbert Xu kfree(ntbl); 92da20420fSHerbert Xu } 93da20420fSHerbert Xu 94da20420fSHerbert Xu static void nested_bucket_table_free(const struct bucket_table *tbl) 95da20420fSHerbert Xu { 96da20420fSHerbert Xu unsigned int size = tbl->size >> tbl->nest; 97da20420fSHerbert Xu unsigned int len = 1 << tbl->nest; 98da20420fSHerbert Xu union nested_table *ntbl; 99da20420fSHerbert Xu unsigned int i; 100da20420fSHerbert Xu 1014a3084aaSHerbert Xu ntbl = nested_table_top(tbl); 102da20420fSHerbert Xu 103da20420fSHerbert Xu for (i = 0; i < len; i++) 104da20420fSHerbert Xu nested_table_free(ntbl + i, size); 105da20420fSHerbert Xu 106da20420fSHerbert Xu kfree(ntbl); 107da20420fSHerbert Xu } 108da20420fSHerbert Xu 10997defe1eSThomas Graf static void bucket_table_free(const struct bucket_table *tbl) 11097defe1eSThomas Graf { 111da20420fSHerbert Xu if (tbl->nest) 112da20420fSHerbert Xu nested_bucket_table_free(tbl); 113da20420fSHerbert Xu 11497defe1eSThomas Graf kvfree(tbl); 11597defe1eSThomas Graf } 11697defe1eSThomas Graf 1179d901bc0SHerbert Xu static void bucket_table_free_rcu(struct rcu_head *head) 1189d901bc0SHerbert Xu { 1199d901bc0SHerbert Xu bucket_table_free(container_of(head, struct bucket_table, rcu)); 1209d901bc0SHerbert Xu } 1219d901bc0SHerbert Xu 122da20420fSHerbert Xu static union nested_table *nested_table_alloc(struct rhashtable *ht, 123da20420fSHerbert Xu union nested_table __rcu **prev, 1245af68ef7SNeilBrown bool leaf) 125da20420fSHerbert Xu { 126da20420fSHerbert Xu union nested_table *ntbl; 127da20420fSHerbert Xu int i; 128da20420fSHerbert Xu 129da20420fSHerbert Xu ntbl = rcu_dereference(*prev); 130da20420fSHerbert Xu if (ntbl) 131da20420fSHerbert Xu return ntbl; 132da20420fSHerbert Xu 133*9e54dd8bSKent Overstreet ntbl = alloc_hooks_tag(ht->alloc_tag, 134*9e54dd8bSKent Overstreet kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO)); 135da20420fSHerbert Xu 1365af68ef7SNeilBrown if (ntbl && leaf) { 1375af68ef7SNeilBrown for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) 1389b4f64a2SNeilBrown INIT_RHT_NULLS_HEAD(ntbl[i].bucket); 139da20420fSHerbert Xu } 140da20420fSHerbert Xu 141e9458a4eSHerbert Xu if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL) 142da20420fSHerbert Xu return ntbl; 1437a41c294SNeilBrown /* Raced with another thread. */ 1447a41c294SNeilBrown kfree(ntbl); 1457a41c294SNeilBrown return rcu_dereference(*prev); 146da20420fSHerbert Xu } 147da20420fSHerbert Xu 148da20420fSHerbert Xu static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, 149da20420fSHerbert Xu size_t nbuckets, 150da20420fSHerbert Xu gfp_t gfp) 151da20420fSHerbert Xu { 152da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 153da20420fSHerbert Xu struct bucket_table *tbl; 154da20420fSHerbert Xu size_t size; 155da20420fSHerbert Xu 156da20420fSHerbert Xu if (nbuckets < (1 << (shift + 1))) 157da20420fSHerbert Xu return NULL; 158da20420fSHerbert Xu 159da20420fSHerbert Xu size = sizeof(*tbl) + sizeof(tbl->buckets[0]); 160da20420fSHerbert Xu 161*9e54dd8bSKent Overstreet tbl = alloc_hooks_tag(ht->alloc_tag, 162*9e54dd8bSKent Overstreet kmalloc_noprof(size, gfp|__GFP_ZERO)); 163da20420fSHerbert Xu if (!tbl) 164da20420fSHerbert Xu return NULL; 165da20420fSHerbert Xu 166da20420fSHerbert Xu if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, 1675af68ef7SNeilBrown false)) { 168da20420fSHerbert Xu kfree(tbl); 169da20420fSHerbert Xu return NULL; 170da20420fSHerbert Xu } 171da20420fSHerbert Xu 172da20420fSHerbert Xu tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; 173da20420fSHerbert Xu 174da20420fSHerbert Xu return tbl; 175da20420fSHerbert Xu } 176da20420fSHerbert Xu 17797defe1eSThomas Graf static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, 178b9ecfdaaSHerbert Xu size_t nbuckets, 179b9ecfdaaSHerbert Xu gfp_t gfp) 1807e1e7763SThomas Graf { 181eb6d1abfSDaniel Borkmann struct bucket_table *tbl = NULL; 1828f0db018SNeilBrown size_t size; 183f89bd6f8SThomas Graf int i; 184149212f0SNeilBrown static struct lock_class_key __key; 1857e1e7763SThomas Graf 186*9e54dd8bSKent Overstreet tbl = alloc_hooks_tag(ht->alloc_tag, 187*9e54dd8bSKent Overstreet kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets), 188*9e54dd8bSKent Overstreet gfp|__GFP_ZERO, NUMA_NO_NODE)); 189da20420fSHerbert Xu 190da20420fSHerbert Xu size = nbuckets; 191da20420fSHerbert Xu 1922d22ecf6SDavidlohr Bueso if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) { 193da20420fSHerbert Xu tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); 194da20420fSHerbert Xu nbuckets = 0; 195da20420fSHerbert Xu } 1962d22ecf6SDavidlohr Bueso 1977e1e7763SThomas Graf if (tbl == NULL) 1987e1e7763SThomas Graf return NULL; 1997e1e7763SThomas Graf 200149212f0SNeilBrown lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); 201149212f0SNeilBrown 202da20420fSHerbert Xu tbl->size = size; 2037e1e7763SThomas Graf 2044feb7c7aSNeilBrown rcu_head_init(&tbl->rcu); 205eddee5baSHerbert Xu INIT_LIST_HEAD(&tbl->walkers); 206eddee5baSHerbert Xu 207d48ad080SJason A. Donenfeld tbl->hash_rnd = get_random_u32(); 2085269b53dSHerbert Xu 209f89bd6f8SThomas Graf for (i = 0; i < nbuckets; i++) 2109b4f64a2SNeilBrown INIT_RHT_NULLS_HEAD(tbl->buckets[i]); 211f89bd6f8SThomas Graf 21297defe1eSThomas Graf return tbl; 2137e1e7763SThomas Graf } 2147e1e7763SThomas Graf 215b824478bSHerbert Xu static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, 216b824478bSHerbert Xu struct bucket_table *tbl) 217b824478bSHerbert Xu { 218b824478bSHerbert Xu struct bucket_table *new_tbl; 219b824478bSHerbert Xu 220b824478bSHerbert Xu do { 221b824478bSHerbert Xu new_tbl = tbl; 222b824478bSHerbert Xu tbl = rht_dereference_rcu(tbl->future_tbl, ht); 223b824478bSHerbert Xu } while (tbl); 224b824478bSHerbert Xu 225b824478bSHerbert Xu return new_tbl; 226b824478bSHerbert Xu } 227b824478bSHerbert Xu 2288f0db018SNeilBrown static int rhashtable_rehash_one(struct rhashtable *ht, 229ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt, 2308f0db018SNeilBrown unsigned int old_hash) 231a5ec68e3SThomas Graf { 232aa34a6cbSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 233c0690016SNeilBrown struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); 234da20420fSHerbert Xu int err = -EAGAIN; 235aa34a6cbSHerbert Xu struct rhash_head *head, *next, *entry; 236e4edbe3cSNeilBrown struct rhash_head __rcu **pprev = NULL; 237299e5c32SThomas Graf unsigned int new_hash; 238e47877c7STejun Heo unsigned long flags; 239a5ec68e3SThomas Graf 240da20420fSHerbert Xu if (new_tbl->nest) 241da20420fSHerbert Xu goto out; 242da20420fSHerbert Xu 243da20420fSHerbert Xu err = -ENOENT; 244da20420fSHerbert Xu 245adc6a3abSNeilBrown rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), 246adc6a3abSNeilBrown old_tbl, old_hash) { 247aa34a6cbSHerbert Xu err = 0; 248aa34a6cbSHerbert Xu next = rht_dereference_bucket(entry->next, old_tbl, old_hash); 249a5ec68e3SThomas Graf 250aa34a6cbSHerbert Xu if (rht_is_a_nulls(next)) 2517e1e7763SThomas Graf break; 25297defe1eSThomas Graf 253aa34a6cbSHerbert Xu pprev = &entry->next; 2547e1e7763SThomas Graf } 2557e1e7763SThomas Graf 256aa34a6cbSHerbert Xu if (err) 257aa34a6cbSHerbert Xu goto out; 25897defe1eSThomas Graf 259aa34a6cbSHerbert Xu new_hash = head_hashfn(ht, new_tbl, entry); 260a5ec68e3SThomas Graf 261e47877c7STejun Heo flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], 262e47877c7STejun Heo SINGLE_DEPTH_NESTING); 263aa34a6cbSHerbert Xu 264adc6a3abSNeilBrown head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); 265aa34a6cbSHerbert Xu 266aa34a6cbSHerbert Xu RCU_INIT_POINTER(entry->next, head); 267aa34a6cbSHerbert Xu 268e47877c7STejun Heo rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags); 269aa34a6cbSHerbert Xu 2708f0db018SNeilBrown if (pprev) 271aa34a6cbSHerbert Xu rcu_assign_pointer(*pprev, next); 2728f0db018SNeilBrown else 2738f0db018SNeilBrown /* Need to preserved the bit lock. */ 274f4712b46SNeilBrown rht_assign_locked(bkt, next); 275aa34a6cbSHerbert Xu 276aa34a6cbSHerbert Xu out: 277aa34a6cbSHerbert Xu return err; 27897defe1eSThomas Graf } 27997defe1eSThomas Graf 280da20420fSHerbert Xu static int rhashtable_rehash_chain(struct rhashtable *ht, 281299e5c32SThomas Graf unsigned int old_hash) 28297defe1eSThomas Graf { 283aa34a6cbSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 284ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); 285e47877c7STejun Heo unsigned long flags; 286da20420fSHerbert Xu int err; 2877cd10db8SThomas Graf 2888f0db018SNeilBrown if (!bkt) 2898f0db018SNeilBrown return 0; 290e47877c7STejun Heo flags = rht_lock(old_tbl, bkt); 291aa34a6cbSHerbert Xu 2928f0db018SNeilBrown while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) 293aa34a6cbSHerbert Xu ; 294da20420fSHerbert Xu 2954feb7c7aSNeilBrown if (err == -ENOENT) 296da20420fSHerbert Xu err = 0; 297e47877c7STejun Heo rht_unlock(old_tbl, bkt, flags); 298da20420fSHerbert Xu 299da20420fSHerbert Xu return err; 300aa34a6cbSHerbert Xu } 301aa34a6cbSHerbert Xu 302b824478bSHerbert Xu static int rhashtable_rehash_attach(struct rhashtable *ht, 303b824478bSHerbert Xu struct bucket_table *old_tbl, 304aa34a6cbSHerbert Xu struct bucket_table *new_tbl) 305aa34a6cbSHerbert Xu { 306aa34a6cbSHerbert Xu /* Make insertions go into the new, empty table right away. Deletions 307aa34a6cbSHerbert Xu * and lookups will be attempted in both tables until we synchronize. 3080ad66449SNeilBrown * As cmpxchg() provides strong barriers, we do not need 3090ad66449SNeilBrown * rcu_assign_pointer(). 310aa34a6cbSHerbert Xu */ 311aa34a6cbSHerbert Xu 312e9458a4eSHerbert Xu if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL, 313e9458a4eSHerbert Xu new_tbl) != NULL) 3140ad66449SNeilBrown return -EEXIST; 315b824478bSHerbert Xu 316b824478bSHerbert Xu return 0; 317b824478bSHerbert Xu } 318b824478bSHerbert Xu 319b824478bSHerbert Xu static int rhashtable_rehash_table(struct rhashtable *ht) 320b824478bSHerbert Xu { 321b824478bSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 322b824478bSHerbert Xu struct bucket_table *new_tbl; 323b824478bSHerbert Xu struct rhashtable_walker *walker; 324299e5c32SThomas Graf unsigned int old_hash; 325da20420fSHerbert Xu int err; 326b824478bSHerbert Xu 327b824478bSHerbert Xu new_tbl = rht_dereference(old_tbl->future_tbl, ht); 328b824478bSHerbert Xu if (!new_tbl) 329b824478bSHerbert Xu return 0; 330b824478bSHerbert Xu 331da20420fSHerbert Xu for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { 332da20420fSHerbert Xu err = rhashtable_rehash_chain(ht, old_hash); 333da20420fSHerbert Xu if (err) 334da20420fSHerbert Xu return err; 335ae6da1f5SEric Dumazet cond_resched(); 336da20420fSHerbert Xu } 337aa34a6cbSHerbert Xu 338aa34a6cbSHerbert Xu /* Publish the new table pointer. */ 339aa34a6cbSHerbert Xu rcu_assign_pointer(ht->tbl, new_tbl); 340aa34a6cbSHerbert Xu 341ba7c95eaSHerbert Xu spin_lock(&ht->lock); 342eddee5baSHerbert Xu list_for_each_entry(walker, &old_tbl->walkers, list) 343eddee5baSHerbert Xu walker->tbl = NULL; 344eddee5baSHerbert Xu 345aa34a6cbSHerbert Xu /* Wait for readers. All new readers will see the new 346aa34a6cbSHerbert Xu * table, and thus no references to the old table will 347aa34a6cbSHerbert Xu * remain. 3484feb7c7aSNeilBrown * We do this inside the locked region so that 3494feb7c7aSNeilBrown * rhashtable_walk_stop() can use rcu_head_after_call_rcu() 3504feb7c7aSNeilBrown * to check if it should not re-link the table. 351aa34a6cbSHerbert Xu */ 3529d901bc0SHerbert Xu call_rcu(&old_tbl->rcu, bucket_table_free_rcu); 3534feb7c7aSNeilBrown spin_unlock(&ht->lock); 354b824478bSHerbert Xu 355b824478bSHerbert Xu return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; 3567e1e7763SThomas Graf } 3577e1e7763SThomas Graf 358da20420fSHerbert Xu static int rhashtable_rehash_alloc(struct rhashtable *ht, 359da20420fSHerbert Xu struct bucket_table *old_tbl, 360da20420fSHerbert Xu unsigned int size) 3617e1e7763SThomas Graf { 362da20420fSHerbert Xu struct bucket_table *new_tbl; 363b824478bSHerbert Xu int err; 3647e1e7763SThomas Graf 3657e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 3667e1e7763SThomas Graf 367da20420fSHerbert Xu new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 3687e1e7763SThomas Graf if (new_tbl == NULL) 3697e1e7763SThomas Graf return -ENOMEM; 3707e1e7763SThomas Graf 371b824478bSHerbert Xu err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); 372b824478bSHerbert Xu if (err) 373b824478bSHerbert Xu bucket_table_free(new_tbl); 374b824478bSHerbert Xu 375b824478bSHerbert Xu return err; 3767e1e7763SThomas Graf } 3777e1e7763SThomas Graf 3787e1e7763SThomas Graf /** 3797e1e7763SThomas Graf * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 3807e1e7763SThomas Graf * @ht: the hash table to shrink 3817e1e7763SThomas Graf * 38218093d1cSHerbert Xu * This function shrinks the hash table to fit, i.e., the smallest 38318093d1cSHerbert Xu * size would not cause it to expand right away automatically. 3847e1e7763SThomas Graf * 38597defe1eSThomas Graf * The caller must ensure that no concurrent resizing occurs by holding 38697defe1eSThomas Graf * ht->mutex. 38797defe1eSThomas Graf * 3887e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations take place. 3897e1e7763SThomas Graf * It is however valid to have concurrent lookups if they are RCU protected. 39097defe1eSThomas Graf * 39197defe1eSThomas Graf * It is valid to have concurrent insertions and deletions protected by per 39297defe1eSThomas Graf * bucket locks or concurrent RCU protected lookups and traversals. 3937e1e7763SThomas Graf */ 394b824478bSHerbert Xu static int rhashtable_shrink(struct rhashtable *ht) 3957e1e7763SThomas Graf { 396da20420fSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); 39712311959SVegard Nossum unsigned int nelems = atomic_read(&ht->nelems); 39812311959SVegard Nossum unsigned int size = 0; 3997e1e7763SThomas Graf 40012311959SVegard Nossum if (nelems) 40112311959SVegard Nossum size = roundup_pow_of_two(nelems * 3 / 2); 40218093d1cSHerbert Xu if (size < ht->p.min_size) 40318093d1cSHerbert Xu size = ht->p.min_size; 40418093d1cSHerbert Xu 40518093d1cSHerbert Xu if (old_tbl->size <= size) 40618093d1cSHerbert Xu return 0; 40718093d1cSHerbert Xu 408b824478bSHerbert Xu if (rht_dereference(old_tbl->future_tbl, ht)) 409b824478bSHerbert Xu return -EEXIST; 410b824478bSHerbert Xu 411da20420fSHerbert Xu return rhashtable_rehash_alloc(ht, old_tbl, size); 4127e1e7763SThomas Graf } 4137e1e7763SThomas Graf 41497defe1eSThomas Graf static void rht_deferred_worker(struct work_struct *work) 41597defe1eSThomas Graf { 41697defe1eSThomas Graf struct rhashtable *ht; 41797defe1eSThomas Graf struct bucket_table *tbl; 418b824478bSHerbert Xu int err = 0; 41997defe1eSThomas Graf 42057699a40SYing Xue ht = container_of(work, struct rhashtable, run_work); 42197defe1eSThomas Graf mutex_lock(&ht->mutex); 42228134a53SHerbert Xu 42397defe1eSThomas Graf tbl = rht_dereference(ht->tbl, ht); 424b824478bSHerbert Xu tbl = rhashtable_last_table(ht, tbl); 42597defe1eSThomas Graf 426a5b6846fSDaniel Borkmann if (rht_grow_above_75(ht, tbl)) 427da20420fSHerbert Xu err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); 428b5e2c150SThomas Graf else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) 429da20420fSHerbert Xu err = rhashtable_shrink(ht); 430da20420fSHerbert Xu else if (tbl->nest) 431da20420fSHerbert Xu err = rhashtable_rehash_alloc(ht, tbl, tbl->size); 432b824478bSHerbert Xu 433408f13efSHerbert Xu if (!err || err == -EEXIST) { 434408f13efSHerbert Xu int nerr; 435408f13efSHerbert Xu 436408f13efSHerbert Xu nerr = rhashtable_rehash_table(ht); 437408f13efSHerbert Xu err = err ?: nerr; 438408f13efSHerbert Xu } 439b824478bSHerbert Xu 44097defe1eSThomas Graf mutex_unlock(&ht->mutex); 441b824478bSHerbert Xu 442b824478bSHerbert Xu if (err) 443b824478bSHerbert Xu schedule_work(&ht->run_work); 44497defe1eSThomas Graf } 44597defe1eSThomas Graf 446ca26893fSHerbert Xu static int rhashtable_insert_rehash(struct rhashtable *ht, 4473cf92222SHerbert Xu struct bucket_table *tbl) 448ccd57b1bSHerbert Xu { 449ccd57b1bSHerbert Xu struct bucket_table *old_tbl; 450ccd57b1bSHerbert Xu struct bucket_table *new_tbl; 451ccd57b1bSHerbert Xu unsigned int size; 452ccd57b1bSHerbert Xu int err; 453ccd57b1bSHerbert Xu 454ccd57b1bSHerbert Xu old_tbl = rht_dereference_rcu(ht->tbl, ht); 455ccd57b1bSHerbert Xu 456ccd57b1bSHerbert Xu size = tbl->size; 457ccd57b1bSHerbert Xu 4583cf92222SHerbert Xu err = -EBUSY; 4593cf92222SHerbert Xu 460ccd57b1bSHerbert Xu if (rht_grow_above_75(ht, tbl)) 461ccd57b1bSHerbert Xu size *= 2; 462a87b9ebfSThomas Graf /* Do not schedule more than one rehash */ 463a87b9ebfSThomas Graf else if (old_tbl != tbl) 4643cf92222SHerbert Xu goto fail; 4653cf92222SHerbert Xu 4663cf92222SHerbert Xu err = -ENOMEM; 467ccd57b1bSHerbert Xu 46893f976b5SDavidlohr Bueso new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN); 4693cf92222SHerbert Xu if (new_tbl == NULL) 4703cf92222SHerbert Xu goto fail; 471ccd57b1bSHerbert Xu 472ccd57b1bSHerbert Xu err = rhashtable_rehash_attach(ht, tbl, new_tbl); 473ccd57b1bSHerbert Xu if (err) { 474ccd57b1bSHerbert Xu bucket_table_free(new_tbl); 475ccd57b1bSHerbert Xu if (err == -EEXIST) 476ccd57b1bSHerbert Xu err = 0; 477ccd57b1bSHerbert Xu } else 478ccd57b1bSHerbert Xu schedule_work(&ht->run_work); 479ccd57b1bSHerbert Xu 480ccd57b1bSHerbert Xu return err; 4813cf92222SHerbert Xu 4823cf92222SHerbert Xu fail: 4833cf92222SHerbert Xu /* Do not fail the insert if someone else did a rehash. */ 484c0690016SNeilBrown if (likely(rcu_access_pointer(tbl->future_tbl))) 4853cf92222SHerbert Xu return 0; 4863cf92222SHerbert Xu 4873cf92222SHerbert Xu /* Schedule async rehash to retry allocation in process context. */ 4883cf92222SHerbert Xu if (err == -ENOMEM) 4893cf92222SHerbert Xu schedule_work(&ht->run_work); 4903cf92222SHerbert Xu 4913cf92222SHerbert Xu return err; 492ccd57b1bSHerbert Xu } 493ccd57b1bSHerbert Xu 494ca26893fSHerbert Xu static void *rhashtable_lookup_one(struct rhashtable *ht, 495ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt, 496ca26893fSHerbert Xu struct bucket_table *tbl, unsigned int hash, 497ca26893fSHerbert Xu const void *key, struct rhash_head *obj) 49802fd97c3SHerbert Xu { 499ca26893fSHerbert Xu struct rhashtable_compare_arg arg = { 500ca26893fSHerbert Xu .ht = ht, 501ca26893fSHerbert Xu .key = key, 502ca26893fSHerbert Xu }; 503e4edbe3cSNeilBrown struct rhash_head __rcu **pprev = NULL; 50402fd97c3SHerbert Xu struct rhash_head *head; 505ca26893fSHerbert Xu int elasticity; 50602fd97c3SHerbert Xu 5075f8ddeabSFlorian Westphal elasticity = RHT_ELASTICITY; 508adc6a3abSNeilBrown rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { 509ca26893fSHerbert Xu struct rhlist_head *list; 510ca26893fSHerbert Xu struct rhlist_head *plist; 51102fd97c3SHerbert Xu 512ca26893fSHerbert Xu elasticity--; 513ca26893fSHerbert Xu if (!key || 514ca26893fSHerbert Xu (ht->p.obj_cmpfn ? 515ca26893fSHerbert Xu ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : 516d3dcf8ebSPaul Blakey rhashtable_compare(&arg, rht_obj(ht, head)))) { 517d3dcf8ebSPaul Blakey pprev = &head->next; 518ca26893fSHerbert Xu continue; 519d3dcf8ebSPaul Blakey } 520ca26893fSHerbert Xu 521ca26893fSHerbert Xu if (!ht->rhlist) 522ca26893fSHerbert Xu return rht_obj(ht, head); 523ca26893fSHerbert Xu 524ca26893fSHerbert Xu list = container_of(obj, struct rhlist_head, rhead); 525ca26893fSHerbert Xu plist = container_of(head, struct rhlist_head, rhead); 526ca26893fSHerbert Xu 527ca26893fSHerbert Xu RCU_INIT_POINTER(list->next, plist); 528ca26893fSHerbert Xu head = rht_dereference_bucket(head->next, tbl, hash); 529ca26893fSHerbert Xu RCU_INIT_POINTER(list->rhead.next, head); 5308f0db018SNeilBrown if (pprev) 531ca26893fSHerbert Xu rcu_assign_pointer(*pprev, obj); 5328f0db018SNeilBrown else 5338f0db018SNeilBrown /* Need to preserve the bit lock */ 534f4712b46SNeilBrown rht_assign_locked(bkt, obj); 535ca26893fSHerbert Xu 536ca26893fSHerbert Xu return NULL; 5375ca8cc5bSPablo Neira Ayuso } 53802fd97c3SHerbert Xu 539ca26893fSHerbert Xu if (elasticity <= 0) 540ca26893fSHerbert Xu return ERR_PTR(-EAGAIN); 541ca26893fSHerbert Xu 542ca26893fSHerbert Xu return ERR_PTR(-ENOENT); 543ca26893fSHerbert Xu } 544ca26893fSHerbert Xu 545ce9b362bSHerbert Xu static struct bucket_table *rhashtable_insert_one( 546ce9b362bSHerbert Xu struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, 547ce9b362bSHerbert Xu struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, 548ca26893fSHerbert Xu void *data) 549ca26893fSHerbert Xu { 550ca26893fSHerbert Xu struct bucket_table *new_tbl; 551ca26893fSHerbert Xu struct rhash_head *head; 552ca26893fSHerbert Xu 553ca26893fSHerbert Xu if (!IS_ERR_OR_NULL(data)) 554ca26893fSHerbert Xu return ERR_PTR(-EEXIST); 555ca26893fSHerbert Xu 556ca26893fSHerbert Xu if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) 557ca26893fSHerbert Xu return ERR_CAST(data); 558ca26893fSHerbert Xu 559c0690016SNeilBrown new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); 560ca26893fSHerbert Xu if (new_tbl) 561ca26893fSHerbert Xu return new_tbl; 562ca26893fSHerbert Xu 563ca26893fSHerbert Xu if (PTR_ERR(data) != -ENOENT) 564ca26893fSHerbert Xu return ERR_CAST(data); 565ca26893fSHerbert Xu 56607ee0722SHerbert Xu if (unlikely(rht_grow_above_max(ht, tbl))) 567ca26893fSHerbert Xu return ERR_PTR(-E2BIG); 56807ee0722SHerbert Xu 569ca26893fSHerbert Xu if (unlikely(rht_grow_above_100(ht, tbl))) 570ca26893fSHerbert Xu return ERR_PTR(-EAGAIN); 57102fd97c3SHerbert Xu 572adc6a3abSNeilBrown head = rht_ptr(bkt, tbl, hash); 57302fd97c3SHerbert Xu 57402fd97c3SHerbert Xu RCU_INIT_POINTER(obj->next, head); 575ca26893fSHerbert Xu if (ht->rhlist) { 576ca26893fSHerbert Xu struct rhlist_head *list; 577ca26893fSHerbert Xu 578ca26893fSHerbert Xu list = container_of(obj, struct rhlist_head, rhead); 579ca26893fSHerbert Xu RCU_INIT_POINTER(list->next, NULL); 580ca26893fSHerbert Xu } 58102fd97c3SHerbert Xu 5828f0db018SNeilBrown /* bkt is always the head of the list, so it holds 5838f0db018SNeilBrown * the lock, which we need to preserve 5848f0db018SNeilBrown */ 585f4712b46SNeilBrown rht_assign_locked(bkt, obj); 58602fd97c3SHerbert Xu 58702fd97c3SHerbert Xu atomic_inc(&ht->nelems); 588ca26893fSHerbert Xu if (rht_grow_above_75(ht, tbl)) 589ca26893fSHerbert Xu schedule_work(&ht->run_work); 59002fd97c3SHerbert Xu 5913cf92222SHerbert Xu return NULL; 592ca26893fSHerbert Xu } 593ca26893fSHerbert Xu 594ca26893fSHerbert Xu static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, 595ca26893fSHerbert Xu struct rhash_head *obj) 596ca26893fSHerbert Xu { 597ca26893fSHerbert Xu struct bucket_table *new_tbl; 598ca26893fSHerbert Xu struct bucket_table *tbl; 599ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt; 600e47877c7STejun Heo unsigned long flags; 601ca26893fSHerbert Xu unsigned int hash; 602ca26893fSHerbert Xu void *data; 603ca26893fSHerbert Xu 6044feb7c7aSNeilBrown new_tbl = rcu_dereference(ht->tbl); 605ca26893fSHerbert Xu 6064feb7c7aSNeilBrown do { 607ca26893fSHerbert Xu tbl = new_tbl; 608ca26893fSHerbert Xu hash = rht_head_hashfn(ht, tbl, obj, ht->p); 6098f0db018SNeilBrown if (rcu_access_pointer(tbl->future_tbl)) 6108f0db018SNeilBrown /* Failure is OK */ 6118f0db018SNeilBrown bkt = rht_bucket_var(tbl, hash); 6128f0db018SNeilBrown else 6138f0db018SNeilBrown bkt = rht_bucket_insert(ht, tbl, hash); 6148f0db018SNeilBrown if (bkt == NULL) { 6158f0db018SNeilBrown new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); 6168f0db018SNeilBrown data = ERR_PTR(-EAGAIN); 6178f0db018SNeilBrown } else { 618e47877c7STejun Heo flags = rht_lock(tbl, bkt); 6198f0db018SNeilBrown data = rhashtable_lookup_one(ht, bkt, tbl, 6208f0db018SNeilBrown hash, key, obj); 6218f0db018SNeilBrown new_tbl = rhashtable_insert_one(ht, bkt, tbl, 6228f0db018SNeilBrown hash, obj, data); 623ca26893fSHerbert Xu if (PTR_ERR(new_tbl) != -EEXIST) 624ca26893fSHerbert Xu data = ERR_CAST(new_tbl); 625ca26893fSHerbert Xu 626e47877c7STejun Heo rht_unlock(tbl, bkt, flags); 6278f0db018SNeilBrown } 6284feb7c7aSNeilBrown } while (!IS_ERR_OR_NULL(new_tbl)); 629ca26893fSHerbert Xu 630ca26893fSHerbert Xu if (PTR_ERR(data) == -EAGAIN) 631ca26893fSHerbert Xu data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: 632ca26893fSHerbert Xu -EAGAIN); 633ca26893fSHerbert Xu 634ca26893fSHerbert Xu return data; 635ca26893fSHerbert Xu } 636ca26893fSHerbert Xu 637ca26893fSHerbert Xu void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, 638ca26893fSHerbert Xu struct rhash_head *obj) 639ca26893fSHerbert Xu { 640ca26893fSHerbert Xu void *data; 641ca26893fSHerbert Xu 642ca26893fSHerbert Xu do { 643ca26893fSHerbert Xu rcu_read_lock(); 644ca26893fSHerbert Xu data = rhashtable_try_insert(ht, key, obj); 645ca26893fSHerbert Xu rcu_read_unlock(); 646ca26893fSHerbert Xu } while (PTR_ERR(data) == -EAGAIN); 647ca26893fSHerbert Xu 648ca26893fSHerbert Xu return data; 64902fd97c3SHerbert Xu } 65002fd97c3SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_insert_slow); 65102fd97c3SHerbert Xu 652f2dba9c6SHerbert Xu /** 653246779ddSHerbert Xu * rhashtable_walk_enter - Initialise an iterator 654f2dba9c6SHerbert Xu * @ht: Table to walk over 655f2dba9c6SHerbert Xu * @iter: Hash table Iterator 656f2dba9c6SHerbert Xu * 657f2dba9c6SHerbert Xu * This function prepares a hash table walk. 658f2dba9c6SHerbert Xu * 659f2dba9c6SHerbert Xu * Note that if you restart a walk after rhashtable_walk_stop you 660f2dba9c6SHerbert Xu * may see the same object twice. Also, you may miss objects if 661f2dba9c6SHerbert Xu * there are removals in between rhashtable_walk_stop and the next 662f2dba9c6SHerbert Xu * call to rhashtable_walk_start. 663f2dba9c6SHerbert Xu * 664f2dba9c6SHerbert Xu * For a completely stable walk you should construct your own data 665f2dba9c6SHerbert Xu * structure outside the hash table. 666f2dba9c6SHerbert Xu * 66782266e98SNeilBrown * This function may be called from any process context, including 66882266e98SNeilBrown * non-preemptable context, but cannot be called from softirq or 66982266e98SNeilBrown * hardirq context. 670f2dba9c6SHerbert Xu * 671246779ddSHerbert Xu * You must call rhashtable_walk_exit after this function returns. 672f2dba9c6SHerbert Xu */ 673246779ddSHerbert Xu void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) 674f2dba9c6SHerbert Xu { 675f2dba9c6SHerbert Xu iter->ht = ht; 676f2dba9c6SHerbert Xu iter->p = NULL; 677f2dba9c6SHerbert Xu iter->slot = 0; 678f2dba9c6SHerbert Xu iter->skip = 0; 6792db54b47STom Herbert iter->end_of_table = 0; 680f2dba9c6SHerbert Xu 681c6ff5268SHerbert Xu spin_lock(&ht->lock); 682246779ddSHerbert Xu iter->walker.tbl = 683179ccc0aSHerbert Xu rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); 684246779ddSHerbert Xu list_add(&iter->walker.list, &iter->walker.tbl->walkers); 685c6ff5268SHerbert Xu spin_unlock(&ht->lock); 686f2dba9c6SHerbert Xu } 687246779ddSHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_enter); 688f2dba9c6SHerbert Xu 689f2dba9c6SHerbert Xu /** 690f2dba9c6SHerbert Xu * rhashtable_walk_exit - Free an iterator 691f2dba9c6SHerbert Xu * @iter: Hash table Iterator 692f2dba9c6SHerbert Xu * 6936c4128f6SHerbert Xu * This function frees resources allocated by rhashtable_walk_enter. 694f2dba9c6SHerbert Xu */ 695f2dba9c6SHerbert Xu void rhashtable_walk_exit(struct rhashtable_iter *iter) 696f2dba9c6SHerbert Xu { 697c6ff5268SHerbert Xu spin_lock(&iter->ht->lock); 698246779ddSHerbert Xu if (iter->walker.tbl) 699246779ddSHerbert Xu list_del(&iter->walker.list); 700c6ff5268SHerbert Xu spin_unlock(&iter->ht->lock); 701f2dba9c6SHerbert Xu } 702f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_exit); 703f2dba9c6SHerbert Xu 704f2dba9c6SHerbert Xu /** 70597a6ec4aSTom Herbert * rhashtable_walk_start_check - Start a hash table walk 706f2dba9c6SHerbert Xu * @iter: Hash table iterator 707f2dba9c6SHerbert Xu * 7080647169cSAndreas Gruenbacher * Start a hash table walk at the current iterator position. Note that we take 7090647169cSAndreas Gruenbacher * the RCU lock in all cases including when we return an error. So you must 7100647169cSAndreas Gruenbacher * always call rhashtable_walk_stop to clean up. 711f2dba9c6SHerbert Xu * 712f2dba9c6SHerbert Xu * Returns zero if successful. 713f2dba9c6SHerbert Xu * 7149dbbc3b9SZhen Lei * Returns -EAGAIN if resize event occurred. Note that the iterator 715f2dba9c6SHerbert Xu * will rewind back to the beginning and you may use it immediately 716f2dba9c6SHerbert Xu * by calling rhashtable_walk_next. 71797a6ec4aSTom Herbert * 71897a6ec4aSTom Herbert * rhashtable_walk_start is defined as an inline variant that returns 71997a6ec4aSTom Herbert * void. This is preferred in cases where the caller would ignore 72097a6ec4aSTom Herbert * resize events and always continue. 721f2dba9c6SHerbert Xu */ 72297a6ec4aSTom Herbert int rhashtable_walk_start_check(struct rhashtable_iter *iter) 723db4374f4SThomas Graf __acquires(RCU) 724f2dba9c6SHerbert Xu { 725eddee5baSHerbert Xu struct rhashtable *ht = iter->ht; 7265d240a89SNeilBrown bool rhlist = ht->rhlist; 727eddee5baSHerbert Xu 728f2dba9c6SHerbert Xu rcu_read_lock(); 729f2dba9c6SHerbert Xu 730c6ff5268SHerbert Xu spin_lock(&ht->lock); 731246779ddSHerbert Xu if (iter->walker.tbl) 732246779ddSHerbert Xu list_del(&iter->walker.list); 733c6ff5268SHerbert Xu spin_unlock(&ht->lock); 734eddee5baSHerbert Xu 7355d240a89SNeilBrown if (iter->end_of_table) 7365d240a89SNeilBrown return 0; 7375d240a89SNeilBrown if (!iter->walker.tbl) { 738246779ddSHerbert Xu iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); 739b41cc04bSNeilBrown iter->slot = 0; 740b41cc04bSNeilBrown iter->skip = 0; 741f2dba9c6SHerbert Xu return -EAGAIN; 742f2dba9c6SHerbert Xu } 743f2dba9c6SHerbert Xu 7445d240a89SNeilBrown if (iter->p && !rhlist) { 7455d240a89SNeilBrown /* 7465d240a89SNeilBrown * We need to validate that 'p' is still in the table, and 7475d240a89SNeilBrown * if so, update 'skip' 7485d240a89SNeilBrown */ 7495d240a89SNeilBrown struct rhash_head *p; 7505d240a89SNeilBrown int skip = 0; 7515d240a89SNeilBrown rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { 7525d240a89SNeilBrown skip++; 7535d240a89SNeilBrown if (p == iter->p) { 7545d240a89SNeilBrown iter->skip = skip; 7555d240a89SNeilBrown goto found; 7565d240a89SNeilBrown } 7575d240a89SNeilBrown } 7585d240a89SNeilBrown iter->p = NULL; 7595d240a89SNeilBrown } else if (iter->p && rhlist) { 7605d240a89SNeilBrown /* Need to validate that 'list' is still in the table, and 7615d240a89SNeilBrown * if so, update 'skip' and 'p'. 7625d240a89SNeilBrown */ 7635d240a89SNeilBrown struct rhash_head *p; 7645d240a89SNeilBrown struct rhlist_head *list; 7655d240a89SNeilBrown int skip = 0; 7665d240a89SNeilBrown rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { 7675d240a89SNeilBrown for (list = container_of(p, struct rhlist_head, rhead); 7685d240a89SNeilBrown list; 7695d240a89SNeilBrown list = rcu_dereference(list->next)) { 7705d240a89SNeilBrown skip++; 7715d240a89SNeilBrown if (list == iter->list) { 7725d240a89SNeilBrown iter->p = p; 773c643ecf3SRishabh Bhatnagar iter->skip = skip; 7745d240a89SNeilBrown goto found; 7755d240a89SNeilBrown } 7765d240a89SNeilBrown } 7775d240a89SNeilBrown } 7785d240a89SNeilBrown iter->p = NULL; 7795d240a89SNeilBrown } 7805d240a89SNeilBrown found: 781f2dba9c6SHerbert Xu return 0; 782f2dba9c6SHerbert Xu } 78397a6ec4aSTom Herbert EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); 784f2dba9c6SHerbert Xu 785f2dba9c6SHerbert Xu /** 7862db54b47STom Herbert * __rhashtable_walk_find_next - Find the next element in a table (or the first 7872db54b47STom Herbert * one in case of a new walk). 7882db54b47STom Herbert * 789f2dba9c6SHerbert Xu * @iter: Hash table iterator 790f2dba9c6SHerbert Xu * 7912db54b47STom Herbert * Returns the found object or NULL when the end of the table is reached. 792f2dba9c6SHerbert Xu * 7932db54b47STom Herbert * Returns -EAGAIN if resize event occurred. 794f2dba9c6SHerbert Xu */ 7952db54b47STom Herbert static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) 796f2dba9c6SHerbert Xu { 797246779ddSHerbert Xu struct bucket_table *tbl = iter->walker.tbl; 798ca26893fSHerbert Xu struct rhlist_head *list = iter->list; 799f2dba9c6SHerbert Xu struct rhashtable *ht = iter->ht; 800f2dba9c6SHerbert Xu struct rhash_head *p = iter->p; 801ca26893fSHerbert Xu bool rhlist = ht->rhlist; 802f2dba9c6SHerbert Xu 8032db54b47STom Herbert if (!tbl) 8042db54b47STom Herbert return NULL; 805f2dba9c6SHerbert Xu 806f2dba9c6SHerbert Xu for (; iter->slot < tbl->size; iter->slot++) { 807f2dba9c6SHerbert Xu int skip = iter->skip; 808f2dba9c6SHerbert Xu 809f2dba9c6SHerbert Xu rht_for_each_rcu(p, tbl, iter->slot) { 810ca26893fSHerbert Xu if (rhlist) { 811ca26893fSHerbert Xu list = container_of(p, struct rhlist_head, 812ca26893fSHerbert Xu rhead); 813ca26893fSHerbert Xu do { 814ca26893fSHerbert Xu if (!skip) 815ca26893fSHerbert Xu goto next; 816ca26893fSHerbert Xu skip--; 817ca26893fSHerbert Xu list = rcu_dereference(list->next); 818ca26893fSHerbert Xu } while (list); 819ca26893fSHerbert Xu 820ca26893fSHerbert Xu continue; 821ca26893fSHerbert Xu } 822f2dba9c6SHerbert Xu if (!skip) 823f2dba9c6SHerbert Xu break; 824f2dba9c6SHerbert Xu skip--; 825f2dba9c6SHerbert Xu } 826f2dba9c6SHerbert Xu 827f2dba9c6SHerbert Xu next: 828f2dba9c6SHerbert Xu if (!rht_is_a_nulls(p)) { 829f2dba9c6SHerbert Xu iter->skip++; 830f2dba9c6SHerbert Xu iter->p = p; 831ca26893fSHerbert Xu iter->list = list; 832ca26893fSHerbert Xu return rht_obj(ht, rhlist ? &list->rhead : p); 833f2dba9c6SHerbert Xu } 834f2dba9c6SHerbert Xu 835f2dba9c6SHerbert Xu iter->skip = 0; 836f2dba9c6SHerbert Xu } 837f2dba9c6SHerbert Xu 838142b942aSPhil Sutter iter->p = NULL; 839142b942aSPhil Sutter 840d88252f9SHerbert Xu /* Ensure we see any new tables. */ 841d88252f9SHerbert Xu smp_rmb(); 842d88252f9SHerbert Xu 843246779ddSHerbert Xu iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); 844246779ddSHerbert Xu if (iter->walker.tbl) { 845eddee5baSHerbert Xu iter->slot = 0; 846eddee5baSHerbert Xu iter->skip = 0; 847eddee5baSHerbert Xu return ERR_PTR(-EAGAIN); 8482db54b47STom Herbert } else { 8492db54b47STom Herbert iter->end_of_table = true; 850eddee5baSHerbert Xu } 851eddee5baSHerbert Xu 852c936a79fSThomas Graf return NULL; 853f2dba9c6SHerbert Xu } 8542db54b47STom Herbert 8552db54b47STom Herbert /** 8562db54b47STom Herbert * rhashtable_walk_next - Return the next object and advance the iterator 8572db54b47STom Herbert * @iter: Hash table iterator 8582db54b47STom Herbert * 8592db54b47STom Herbert * Note that you must call rhashtable_walk_stop when you are finished 8602db54b47STom Herbert * with the walk. 8612db54b47STom Herbert * 8622db54b47STom Herbert * Returns the next object or NULL when the end of the table is reached. 8632db54b47STom Herbert * 8642db54b47STom Herbert * Returns -EAGAIN if resize event occurred. Note that the iterator 8652db54b47STom Herbert * will rewind back to the beginning and you may continue to use it. 8662db54b47STom Herbert */ 8672db54b47STom Herbert void *rhashtable_walk_next(struct rhashtable_iter *iter) 8682db54b47STom Herbert { 8692db54b47STom Herbert struct rhlist_head *list = iter->list; 8702db54b47STom Herbert struct rhashtable *ht = iter->ht; 8712db54b47STom Herbert struct rhash_head *p = iter->p; 8722db54b47STom Herbert bool rhlist = ht->rhlist; 8732db54b47STom Herbert 8742db54b47STom Herbert if (p) { 8752db54b47STom Herbert if (!rhlist || !(list = rcu_dereference(list->next))) { 8762db54b47STom Herbert p = rcu_dereference(p->next); 8772db54b47STom Herbert list = container_of(p, struct rhlist_head, rhead); 8782db54b47STom Herbert } 8792db54b47STom Herbert if (!rht_is_a_nulls(p)) { 8802db54b47STom Herbert iter->skip++; 8812db54b47STom Herbert iter->p = p; 8822db54b47STom Herbert iter->list = list; 8832db54b47STom Herbert return rht_obj(ht, rhlist ? &list->rhead : p); 8842db54b47STom Herbert } 8852db54b47STom Herbert 8862db54b47STom Herbert /* At the end of this slot, switch to next one and then find 8872db54b47STom Herbert * next entry from that point. 8882db54b47STom Herbert */ 8892db54b47STom Herbert iter->skip = 0; 8902db54b47STom Herbert iter->slot++; 8912db54b47STom Herbert } 8922db54b47STom Herbert 8932db54b47STom Herbert return __rhashtable_walk_find_next(iter); 8942db54b47STom Herbert } 895f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_next); 896f2dba9c6SHerbert Xu 897f2dba9c6SHerbert Xu /** 8982db54b47STom Herbert * rhashtable_walk_peek - Return the next object but don't advance the iterator 8992db54b47STom Herbert * @iter: Hash table iterator 9002db54b47STom Herbert * 9012db54b47STom Herbert * Returns the next object or NULL when the end of the table is reached. 9022db54b47STom Herbert * 9032db54b47STom Herbert * Returns -EAGAIN if resize event occurred. Note that the iterator 9042db54b47STom Herbert * will rewind back to the beginning and you may continue to use it. 9052db54b47STom Herbert */ 9062db54b47STom Herbert void *rhashtable_walk_peek(struct rhashtable_iter *iter) 9072db54b47STom Herbert { 9082db54b47STom Herbert struct rhlist_head *list = iter->list; 9092db54b47STom Herbert struct rhashtable *ht = iter->ht; 9102db54b47STom Herbert struct rhash_head *p = iter->p; 9112db54b47STom Herbert 9122db54b47STom Herbert if (p) 9132db54b47STom Herbert return rht_obj(ht, ht->rhlist ? &list->rhead : p); 9142db54b47STom Herbert 9152db54b47STom Herbert /* No object found in current iter, find next one in the table. */ 9162db54b47STom Herbert 9172db54b47STom Herbert if (iter->skip) { 9182db54b47STom Herbert /* A nonzero skip value points to the next entry in the table 9192db54b47STom Herbert * beyond that last one that was found. Decrement skip so 9202db54b47STom Herbert * we find the current value. __rhashtable_walk_find_next 9212db54b47STom Herbert * will restore the original value of skip assuming that 9222db54b47STom Herbert * the table hasn't changed. 9232db54b47STom Herbert */ 9242db54b47STom Herbert iter->skip--; 9252db54b47STom Herbert } 9262db54b47STom Herbert 9272db54b47STom Herbert return __rhashtable_walk_find_next(iter); 9282db54b47STom Herbert } 9292db54b47STom Herbert EXPORT_SYMBOL_GPL(rhashtable_walk_peek); 9302db54b47STom Herbert 9312db54b47STom Herbert /** 932f2dba9c6SHerbert Xu * rhashtable_walk_stop - Finish a hash table walk 933f2dba9c6SHerbert Xu * @iter: Hash table iterator 934f2dba9c6SHerbert Xu * 9350647169cSAndreas Gruenbacher * Finish a hash table walk. Does not reset the iterator to the start of the 9360647169cSAndreas Gruenbacher * hash table. 937f2dba9c6SHerbert Xu */ 938f2dba9c6SHerbert Xu void rhashtable_walk_stop(struct rhashtable_iter *iter) 939db4374f4SThomas Graf __releases(RCU) 940f2dba9c6SHerbert Xu { 941eddee5baSHerbert Xu struct rhashtable *ht; 942246779ddSHerbert Xu struct bucket_table *tbl = iter->walker.tbl; 943eddee5baSHerbert Xu 944eddee5baSHerbert Xu if (!tbl) 945963ecbd4SHerbert Xu goto out; 946eddee5baSHerbert Xu 947eddee5baSHerbert Xu ht = iter->ht; 948eddee5baSHerbert Xu 949ba7c95eaSHerbert Xu spin_lock(&ht->lock); 9504feb7c7aSNeilBrown if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) 9514feb7c7aSNeilBrown /* This bucket table is being freed, don't re-link it. */ 952246779ddSHerbert Xu iter->walker.tbl = NULL; 9534feb7c7aSNeilBrown else 9544feb7c7aSNeilBrown list_add(&iter->walker.list, &tbl->walkers); 955ba7c95eaSHerbert Xu spin_unlock(&ht->lock); 956eddee5baSHerbert Xu 957963ecbd4SHerbert Xu out: 958963ecbd4SHerbert Xu rcu_read_unlock(); 959f2dba9c6SHerbert Xu } 960f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_stop); 961f2dba9c6SHerbert Xu 962488fb86eSHerbert Xu static size_t rounded_hashtable_size(const struct rhashtable_params *params) 9637e1e7763SThomas Graf { 964107d01f5SDavidlohr Bueso size_t retsize; 965107d01f5SDavidlohr Bueso 966107d01f5SDavidlohr Bueso if (params->nelem_hint) 967107d01f5SDavidlohr Bueso retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 968e2e21c1cSHerbert Xu (unsigned long)params->min_size); 969107d01f5SDavidlohr Bueso else 970107d01f5SDavidlohr Bueso retsize = max(HASH_DEFAULT_SIZE, 971107d01f5SDavidlohr Bueso (unsigned long)params->min_size); 972107d01f5SDavidlohr Bueso 973107d01f5SDavidlohr Bueso return retsize; 9747e1e7763SThomas Graf } 9757e1e7763SThomas Graf 97631ccde2dSHerbert Xu static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) 97731ccde2dSHerbert Xu { 97831ccde2dSHerbert Xu return jhash2(key, length, seed); 97931ccde2dSHerbert Xu } 98031ccde2dSHerbert Xu 9817e1e7763SThomas Graf /** 9827e1e7763SThomas Graf * rhashtable_init - initialize a new hash table 9837e1e7763SThomas Graf * @ht: hash table to be initialized 9847e1e7763SThomas Graf * @params: configuration parameters 9857e1e7763SThomas Graf * 9867e1e7763SThomas Graf * Initializes a new hash table based on the provided configuration 9877e1e7763SThomas Graf * parameters. A table can be configured either with a variable or 9887e1e7763SThomas Graf * fixed length key: 9897e1e7763SThomas Graf * 9907e1e7763SThomas Graf * Configuration Example 1: Fixed length keys 9917e1e7763SThomas Graf * struct test_obj { 9927e1e7763SThomas Graf * int key; 9937e1e7763SThomas Graf * void * my_member; 9947e1e7763SThomas Graf * struct rhash_head node; 9957e1e7763SThomas Graf * }; 9967e1e7763SThomas Graf * 9977e1e7763SThomas Graf * struct rhashtable_params params = { 9987e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node), 9997e1e7763SThomas Graf * .key_offset = offsetof(struct test_obj, key), 10007e1e7763SThomas Graf * .key_len = sizeof(int), 100187545899SDaniel Borkmann * .hashfn = jhash, 10027e1e7763SThomas Graf * }; 10037e1e7763SThomas Graf * 10047e1e7763SThomas Graf * Configuration Example 2: Variable length keys 10057e1e7763SThomas Graf * struct test_obj { 10067e1e7763SThomas Graf * [...] 10077e1e7763SThomas Graf * struct rhash_head node; 10087e1e7763SThomas Graf * }; 10097e1e7763SThomas Graf * 101049f7b33eSPatrick McHardy * u32 my_hash_fn(const void *data, u32 len, u32 seed) 10117e1e7763SThomas Graf * { 10127e1e7763SThomas Graf * struct test_obj *obj = data; 10137e1e7763SThomas Graf * 10147e1e7763SThomas Graf * return [... hash ...]; 10157e1e7763SThomas Graf * } 10167e1e7763SThomas Graf * 10177e1e7763SThomas Graf * struct rhashtable_params params = { 10187e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node), 101987545899SDaniel Borkmann * .hashfn = jhash, 10207e1e7763SThomas Graf * .obj_hashfn = my_hash_fn, 10217e1e7763SThomas Graf * }; 10227e1e7763SThomas Graf */ 1023*9e54dd8bSKent Overstreet int rhashtable_init_noprof(struct rhashtable *ht, 1024488fb86eSHerbert Xu const struct rhashtable_params *params) 10257e1e7763SThomas Graf { 10267e1e7763SThomas Graf struct bucket_table *tbl; 10277e1e7763SThomas Graf size_t size; 10287e1e7763SThomas Graf 102931ccde2dSHerbert Xu if ((!params->key_len && !params->obj_hashfn) || 103002fd97c3SHerbert Xu (params->obj_hashfn && !params->obj_cmpfn)) 10317e1e7763SThomas Graf return -EINVAL; 10327e1e7763SThomas Graf 103397defe1eSThomas Graf memset(ht, 0, sizeof(*ht)); 103497defe1eSThomas Graf mutex_init(&ht->mutex); 1035ba7c95eaSHerbert Xu spin_lock_init(&ht->lock); 103697defe1eSThomas Graf memcpy(&ht->p, params, sizeof(*params)); 103797defe1eSThomas Graf 1038*9e54dd8bSKent Overstreet alloc_tag_record(ht->alloc_tag); 1039*9e54dd8bSKent Overstreet 1040a998f712SThomas Graf if (params->min_size) 1041a998f712SThomas Graf ht->p.min_size = roundup_pow_of_two(params->min_size); 1042a998f712SThomas Graf 10436d684e54SHerbert Xu /* Cap total entries at 2^31 to avoid nelems overflow. */ 10446d684e54SHerbert Xu ht->max_elems = 1u << 31; 10452d2ab658SHerbert Xu 10462d2ab658SHerbert Xu if (params->max_size) { 10472d2ab658SHerbert Xu ht->p.max_size = rounddown_pow_of_two(params->max_size); 10486d684e54SHerbert Xu if (ht->p.max_size < ht->max_elems / 2) 10496d684e54SHerbert Xu ht->max_elems = ht->p.max_size * 2; 10502d2ab658SHerbert Xu } 10516d684e54SHerbert Xu 105248e75b43SFlorian Westphal ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); 1053a998f712SThomas Graf 10543a324606SHerbert Xu size = rounded_hashtable_size(&ht->p); 10553a324606SHerbert Xu 105631ccde2dSHerbert Xu ht->key_len = ht->p.key_len; 105731ccde2dSHerbert Xu if (!params->hashfn) { 105831ccde2dSHerbert Xu ht->p.hashfn = jhash; 105931ccde2dSHerbert Xu 106031ccde2dSHerbert Xu if (!(ht->key_len & (sizeof(u32) - 1))) { 106131ccde2dSHerbert Xu ht->key_len /= sizeof(u32); 106231ccde2dSHerbert Xu ht->p.hashfn = rhashtable_jhash2; 106331ccde2dSHerbert Xu } 106431ccde2dSHerbert Xu } 106531ccde2dSHerbert Xu 10662d22ecf6SDavidlohr Bueso /* 10672d22ecf6SDavidlohr Bueso * This is api initialization and thus we need to guarantee the 10682d22ecf6SDavidlohr Bueso * initial rhashtable allocation. Upon failure, retry with the 10692d22ecf6SDavidlohr Bueso * smallest possible size with __GFP_NOFAIL semantics. 10702d22ecf6SDavidlohr Bueso */ 1071b9ecfdaaSHerbert Xu tbl = bucket_table_alloc(ht, size, GFP_KERNEL); 10722d22ecf6SDavidlohr Bueso if (unlikely(tbl == NULL)) { 10732d22ecf6SDavidlohr Bueso size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); 10742d22ecf6SDavidlohr Bueso tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL); 10752d22ecf6SDavidlohr Bueso } 10767e1e7763SThomas Graf 1077545a148eSYing Xue atomic_set(&ht->nelems, 0); 1078a5b6846fSDaniel Borkmann 10797e1e7763SThomas Graf RCU_INIT_POINTER(ht->tbl, tbl); 10807e1e7763SThomas Graf 108157699a40SYing Xue INIT_WORK(&ht->run_work, rht_deferred_worker); 108297defe1eSThomas Graf 10837e1e7763SThomas Graf return 0; 10847e1e7763SThomas Graf } 1085*9e54dd8bSKent Overstreet EXPORT_SYMBOL_GPL(rhashtable_init_noprof); 10867e1e7763SThomas Graf 10877e1e7763SThomas Graf /** 1088ca26893fSHerbert Xu * rhltable_init - initialize a new hash list table 1089ca26893fSHerbert Xu * @hlt: hash list table to be initialized 1090ca26893fSHerbert Xu * @params: configuration parameters 1091ca26893fSHerbert Xu * 1092ca26893fSHerbert Xu * Initializes a new hash list table. 1093ca26893fSHerbert Xu * 1094ca26893fSHerbert Xu * See documentation for rhashtable_init. 1095ca26893fSHerbert Xu */ 1096*9e54dd8bSKent Overstreet int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params) 1097ca26893fSHerbert Xu { 1098ca26893fSHerbert Xu int err; 1099ca26893fSHerbert Xu 1100*9e54dd8bSKent Overstreet err = rhashtable_init_noprof(&hlt->ht, params); 1101ca26893fSHerbert Xu hlt->ht.rhlist = true; 1102ca26893fSHerbert Xu return err; 1103ca26893fSHerbert Xu } 1104*9e54dd8bSKent Overstreet EXPORT_SYMBOL_GPL(rhltable_init_noprof); 1105ca26893fSHerbert Xu 1106ca26893fSHerbert Xu static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, 1107ca26893fSHerbert Xu void (*free_fn)(void *ptr, void *arg), 1108ca26893fSHerbert Xu void *arg) 1109ca26893fSHerbert Xu { 1110ca26893fSHerbert Xu struct rhlist_head *list; 1111ca26893fSHerbert Xu 1112ca26893fSHerbert Xu if (!ht->rhlist) { 1113ca26893fSHerbert Xu free_fn(rht_obj(ht, obj), arg); 1114ca26893fSHerbert Xu return; 1115ca26893fSHerbert Xu } 1116ca26893fSHerbert Xu 1117ca26893fSHerbert Xu list = container_of(obj, struct rhlist_head, rhead); 1118ca26893fSHerbert Xu do { 1119ca26893fSHerbert Xu obj = &list->rhead; 1120ca26893fSHerbert Xu list = rht_dereference(list->next, ht); 1121ca26893fSHerbert Xu free_fn(rht_obj(ht, obj), arg); 1122ca26893fSHerbert Xu } while (list); 1123ca26893fSHerbert Xu } 1124ca26893fSHerbert Xu 1125ca26893fSHerbert Xu /** 11266b6f302cSThomas Graf * rhashtable_free_and_destroy - free elements and destroy hash table 11277e1e7763SThomas Graf * @ht: the hash table to destroy 11286b6f302cSThomas Graf * @free_fn: callback to release resources of element 11296b6f302cSThomas Graf * @arg: pointer passed to free_fn 11307e1e7763SThomas Graf * 11316b6f302cSThomas Graf * Stops an eventual async resize. If defined, invokes free_fn for each 11326b6f302cSThomas Graf * element to releasal resources. Please note that RCU protected 11336b6f302cSThomas Graf * readers may still be accessing the elements. Releasing of resources 11346b6f302cSThomas Graf * must occur in a compatible manner. Then frees the bucket array. 11356b6f302cSThomas Graf * 11366b6f302cSThomas Graf * This function will eventually sleep to wait for an async resize 11376b6f302cSThomas Graf * to complete. The caller is responsible that no further write operations 11386b6f302cSThomas Graf * occurs in parallel. 11397e1e7763SThomas Graf */ 11406b6f302cSThomas Graf void rhashtable_free_and_destroy(struct rhashtable *ht, 11416b6f302cSThomas Graf void (*free_fn)(void *ptr, void *arg), 11426b6f302cSThomas Graf void *arg) 11437e1e7763SThomas Graf { 11440026129cSTaehee Yoo struct bucket_table *tbl, *next_tbl; 11456b6f302cSThomas Graf unsigned int i; 114697defe1eSThomas Graf 114757699a40SYing Xue cancel_work_sync(&ht->run_work); 114857699a40SYing Xue 114997defe1eSThomas Graf mutex_lock(&ht->mutex); 11506b6f302cSThomas Graf tbl = rht_dereference(ht->tbl, ht); 11510026129cSTaehee Yoo restart: 11526b6f302cSThomas Graf if (free_fn) { 11536b6f302cSThomas Graf for (i = 0; i < tbl->size; i++) { 11546b6f302cSThomas Graf struct rhash_head *pos, *next; 11556b6f302cSThomas Graf 1156ae6da1f5SEric Dumazet cond_resched(); 1157adc6a3abSNeilBrown for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), 11586b6f302cSThomas Graf next = !rht_is_a_nulls(pos) ? 11596b6f302cSThomas Graf rht_dereference(pos->next, ht) : NULL; 11606b6f302cSThomas Graf !rht_is_a_nulls(pos); 11616b6f302cSThomas Graf pos = next, 11626b6f302cSThomas Graf next = !rht_is_a_nulls(pos) ? 11636b6f302cSThomas Graf rht_dereference(pos->next, ht) : NULL) 1164ca26893fSHerbert Xu rhashtable_free_one(ht, pos, free_fn, arg); 11656b6f302cSThomas Graf } 11666b6f302cSThomas Graf } 11676b6f302cSThomas Graf 11680026129cSTaehee Yoo next_tbl = rht_dereference(tbl->future_tbl, ht); 11696b6f302cSThomas Graf bucket_table_free(tbl); 11700026129cSTaehee Yoo if (next_tbl) { 11710026129cSTaehee Yoo tbl = next_tbl; 11720026129cSTaehee Yoo goto restart; 11730026129cSTaehee Yoo } 117497defe1eSThomas Graf mutex_unlock(&ht->mutex); 11757e1e7763SThomas Graf } 11766b6f302cSThomas Graf EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); 11776b6f302cSThomas Graf 11786b6f302cSThomas Graf void rhashtable_destroy(struct rhashtable *ht) 11796b6f302cSThomas Graf { 11806b6f302cSThomas Graf return rhashtable_free_and_destroy(ht, NULL, NULL); 11816b6f302cSThomas Graf } 11827e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_destroy); 1183da20420fSHerbert Xu 1184ce9b362bSHerbert Xu struct rhash_lock_head __rcu **__rht_bucket_nested( 1185ce9b362bSHerbert Xu const struct bucket_table *tbl, unsigned int hash) 1186da20420fSHerbert Xu { 1187da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 1188da20420fSHerbert Xu unsigned int index = hash & ((1 << tbl->nest) - 1); 1189da20420fSHerbert Xu unsigned int size = tbl->size >> tbl->nest; 1190da20420fSHerbert Xu unsigned int subhash = hash; 1191da20420fSHerbert Xu union nested_table *ntbl; 1192da20420fSHerbert Xu 11934a3084aaSHerbert Xu ntbl = nested_table_top(tbl); 1194c4d2603dSHerbert Xu ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); 1195da20420fSHerbert Xu subhash >>= tbl->nest; 1196da20420fSHerbert Xu 1197da20420fSHerbert Xu while (ntbl && size > (1 << shift)) { 1198da20420fSHerbert Xu index = subhash & ((1 << shift) - 1); 1199c4d2603dSHerbert Xu ntbl = rht_dereference_bucket_rcu(ntbl[index].table, 1200c4d2603dSHerbert Xu tbl, hash); 1201da20420fSHerbert Xu size >>= shift; 1202da20420fSHerbert Xu subhash >>= shift; 1203da20420fSHerbert Xu } 1204da20420fSHerbert Xu 1205ff302db9SNeilBrown if (!ntbl) 1206ff302db9SNeilBrown return NULL; 1207da20420fSHerbert Xu 1208da20420fSHerbert Xu return &ntbl[subhash].bucket; 1209da20420fSHerbert Xu 1210da20420fSHerbert Xu } 1211ff302db9SNeilBrown EXPORT_SYMBOL_GPL(__rht_bucket_nested); 1212ff302db9SNeilBrown 1213ce9b362bSHerbert Xu struct rhash_lock_head __rcu **rht_bucket_nested( 1214ce9b362bSHerbert Xu const struct bucket_table *tbl, unsigned int hash) 1215ff302db9SNeilBrown { 1216ce9b362bSHerbert Xu static struct rhash_lock_head __rcu *rhnull; 1217ff302db9SNeilBrown 1218ff302db9SNeilBrown if (!rhnull) 1219ff302db9SNeilBrown INIT_RHT_NULLS_HEAD(rhnull); 1220ff302db9SNeilBrown return __rht_bucket_nested(tbl, hash) ?: &rhnull; 1221ff302db9SNeilBrown } 1222da20420fSHerbert Xu EXPORT_SYMBOL_GPL(rht_bucket_nested); 1223da20420fSHerbert Xu 1224ce9b362bSHerbert Xu struct rhash_lock_head __rcu **rht_bucket_nested_insert( 1225ce9b362bSHerbert Xu struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) 1226da20420fSHerbert Xu { 1227da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); 1228da20420fSHerbert Xu unsigned int index = hash & ((1 << tbl->nest) - 1); 1229da20420fSHerbert Xu unsigned int size = tbl->size >> tbl->nest; 1230da20420fSHerbert Xu union nested_table *ntbl; 1231da20420fSHerbert Xu 12324a3084aaSHerbert Xu ntbl = nested_table_top(tbl); 1233da20420fSHerbert Xu hash >>= tbl->nest; 1234da20420fSHerbert Xu ntbl = nested_table_alloc(ht, &ntbl[index].table, 12355af68ef7SNeilBrown size <= (1 << shift)); 1236da20420fSHerbert Xu 1237da20420fSHerbert Xu while (ntbl && size > (1 << shift)) { 1238da20420fSHerbert Xu index = hash & ((1 << shift) - 1); 1239da20420fSHerbert Xu size >>= shift; 1240da20420fSHerbert Xu hash >>= shift; 1241da20420fSHerbert Xu ntbl = nested_table_alloc(ht, &ntbl[index].table, 12425af68ef7SNeilBrown size <= (1 << shift)); 1243da20420fSHerbert Xu } 1244da20420fSHerbert Xu 1245da20420fSHerbert Xu if (!ntbl) 1246da20420fSHerbert Xu return NULL; 1247da20420fSHerbert Xu 1248da20420fSHerbert Xu return &ntbl[hash].bucket; 1249da20420fSHerbert Xu 1250da20420fSHerbert Xu } 1251da20420fSHerbert Xu EXPORT_SYMBOL_GPL(rht_bucket_nested_insert); 1252