xref: /linux/lib/rhashtable.c (revision 2d2ab658d2debcb4c0e29c9e6f18e5683f3077bf)
17e1e7763SThomas Graf /*
27e1e7763SThomas Graf  * Resizable, Scalable, Concurrent Hash Table
37e1e7763SThomas Graf  *
402fd97c3SHerbert Xu  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
5a5ec68e3SThomas Graf  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
67e1e7763SThomas Graf  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
77e1e7763SThomas Graf  *
87e1e7763SThomas Graf  * Code partially derived from nft_hash
902fd97c3SHerbert Xu  * Rewritten with rehash code from br_multicast plus single list
1002fd97c3SHerbert Xu  * pointer as suggested by Josh Triplett
117e1e7763SThomas Graf  *
127e1e7763SThomas Graf  * This program is free software; you can redistribute it and/or modify
137e1e7763SThomas Graf  * it under the terms of the GNU General Public License version 2 as
147e1e7763SThomas Graf  * published by the Free Software Foundation.
157e1e7763SThomas Graf  */
167e1e7763SThomas Graf 
1707ee0722SHerbert Xu #include <linux/atomic.h>
187e1e7763SThomas Graf #include <linux/kernel.h>
197e1e7763SThomas Graf #include <linux/init.h>
207e1e7763SThomas Graf #include <linux/log2.h>
215beb5c90SEric Dumazet #include <linux/sched.h>
22b2d09103SIngo Molnar #include <linux/rculist.h>
237e1e7763SThomas Graf #include <linux/slab.h>
247e1e7763SThomas Graf #include <linux/vmalloc.h>
257e1e7763SThomas Graf #include <linux/mm.h>
2687545899SDaniel Borkmann #include <linux/jhash.h>
277e1e7763SThomas Graf #include <linux/random.h>
287e1e7763SThomas Graf #include <linux/rhashtable.h>
2961d7b097SStephen Rothwell #include <linux/err.h>
306d795413SHauke Mehrtens #include <linux/export.h>
317e1e7763SThomas Graf 
327e1e7763SThomas Graf #define HASH_DEFAULT_SIZE	64UL
33c2e213cfSHerbert Xu #define HASH_MIN_SIZE		4U
344cf0b354SFlorian Westphal #define BUCKET_LOCKS_PER_CPU	32UL
3597defe1eSThomas Graf 
36da20420fSHerbert Xu union nested_table {
37da20420fSHerbert Xu 	union nested_table __rcu *table;
38da20420fSHerbert Xu 	struct rhash_head __rcu *bucket;
39da20420fSHerbert Xu };
40da20420fSHerbert Xu 
41988dfbd7SHerbert Xu static u32 head_hashfn(struct rhashtable *ht,
428d24c0b4SThomas Graf 		       const struct bucket_table *tbl,
438d24c0b4SThomas Graf 		       const struct rhash_head *he)
447e1e7763SThomas Graf {
4502fd97c3SHerbert Xu 	return rht_head_hashfn(ht, tbl, he, ht->p);
467e1e7763SThomas Graf }
477e1e7763SThomas Graf 
48a03eaec0SThomas Graf #ifdef CONFIG_PROVE_LOCKING
49a03eaec0SThomas Graf #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
50a03eaec0SThomas Graf 
51a03eaec0SThomas Graf int lockdep_rht_mutex_is_held(struct rhashtable *ht)
52a03eaec0SThomas Graf {
53a03eaec0SThomas Graf 	return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
54a03eaec0SThomas Graf }
55a03eaec0SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
56a03eaec0SThomas Graf 
57a03eaec0SThomas Graf int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
58a03eaec0SThomas Graf {
5902fd97c3SHerbert Xu 	spinlock_t *lock = rht_bucket_lock(tbl, hash);
60a03eaec0SThomas Graf 
61a03eaec0SThomas Graf 	return (debug_locks) ? lockdep_is_held(lock) : 1;
62a03eaec0SThomas Graf }
63a03eaec0SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
64a03eaec0SThomas Graf #else
65a03eaec0SThomas Graf #define ASSERT_RHT_MUTEX(HT)
66a03eaec0SThomas Graf #endif
67a03eaec0SThomas Graf 
68a03eaec0SThomas Graf 
69b9ecfdaaSHerbert Xu static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
70b9ecfdaaSHerbert Xu 			      gfp_t gfp)
7197defe1eSThomas Graf {
7297defe1eSThomas Graf 	unsigned int i, size;
7397defe1eSThomas Graf #if defined(CONFIG_PROVE_LOCKING)
7497defe1eSThomas Graf 	unsigned int nr_pcpus = 2;
7597defe1eSThomas Graf #else
7697defe1eSThomas Graf 	unsigned int nr_pcpus = num_possible_cpus();
7797defe1eSThomas Graf #endif
7897defe1eSThomas Graf 
794cf0b354SFlorian Westphal 	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
8097defe1eSThomas Graf 	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
8197defe1eSThomas Graf 
82a5ec68e3SThomas Graf 	/* Never allocate more than 0.5 locks per bucket */
83a5ec68e3SThomas Graf 	size = min_t(unsigned int, size, tbl->size >> 1);
8497defe1eSThomas Graf 
85da20420fSHerbert Xu 	if (tbl->nest)
86da20420fSHerbert Xu 		size = min(size, 1U << tbl->nest);
87da20420fSHerbert Xu 
8897defe1eSThomas Graf 	if (sizeof(spinlock_t) != 0) {
899dbeea7fSEric Dumazet 		tbl->locks = NULL;
9097defe1eSThomas Graf #ifdef CONFIG_NUMA
91b9ecfdaaSHerbert Xu 		if (size * sizeof(spinlock_t) > PAGE_SIZE &&
92b9ecfdaaSHerbert Xu 		    gfp == GFP_KERNEL)
9397defe1eSThomas Graf 			tbl->locks = vmalloc(size * sizeof(spinlock_t));
9497defe1eSThomas Graf #endif
954cf0b354SFlorian Westphal 		if (gfp != GFP_KERNEL)
964cf0b354SFlorian Westphal 			gfp |= __GFP_NOWARN | __GFP_NORETRY;
974cf0b354SFlorian Westphal 
989dbeea7fSEric Dumazet 		if (!tbl->locks)
9997defe1eSThomas Graf 			tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
100b9ecfdaaSHerbert Xu 						   gfp);
10197defe1eSThomas Graf 		if (!tbl->locks)
10297defe1eSThomas Graf 			return -ENOMEM;
10397defe1eSThomas Graf 		for (i = 0; i < size; i++)
10497defe1eSThomas Graf 			spin_lock_init(&tbl->locks[i]);
10597defe1eSThomas Graf 	}
10697defe1eSThomas Graf 	tbl->locks_mask = size - 1;
10797defe1eSThomas Graf 
10897defe1eSThomas Graf 	return 0;
10997defe1eSThomas Graf }
11097defe1eSThomas Graf 
111da20420fSHerbert Xu static void nested_table_free(union nested_table *ntbl, unsigned int size)
112da20420fSHerbert Xu {
113da20420fSHerbert Xu 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
114da20420fSHerbert Xu 	const unsigned int len = 1 << shift;
115da20420fSHerbert Xu 	unsigned int i;
116da20420fSHerbert Xu 
117da20420fSHerbert Xu 	ntbl = rcu_dereference_raw(ntbl->table);
118da20420fSHerbert Xu 	if (!ntbl)
119da20420fSHerbert Xu 		return;
120da20420fSHerbert Xu 
121da20420fSHerbert Xu 	if (size > len) {
122da20420fSHerbert Xu 		size >>= shift;
123da20420fSHerbert Xu 		for (i = 0; i < len; i++)
124da20420fSHerbert Xu 			nested_table_free(ntbl + i, size);
125da20420fSHerbert Xu 	}
126da20420fSHerbert Xu 
127da20420fSHerbert Xu 	kfree(ntbl);
128da20420fSHerbert Xu }
129da20420fSHerbert Xu 
130da20420fSHerbert Xu static void nested_bucket_table_free(const struct bucket_table *tbl)
131da20420fSHerbert Xu {
132da20420fSHerbert Xu 	unsigned int size = tbl->size >> tbl->nest;
133da20420fSHerbert Xu 	unsigned int len = 1 << tbl->nest;
134da20420fSHerbert Xu 	union nested_table *ntbl;
135da20420fSHerbert Xu 	unsigned int i;
136da20420fSHerbert Xu 
137da20420fSHerbert Xu 	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
138da20420fSHerbert Xu 
139da20420fSHerbert Xu 	for (i = 0; i < len; i++)
140da20420fSHerbert Xu 		nested_table_free(ntbl + i, size);
141da20420fSHerbert Xu 
142da20420fSHerbert Xu 	kfree(ntbl);
143da20420fSHerbert Xu }
144da20420fSHerbert Xu 
14597defe1eSThomas Graf static void bucket_table_free(const struct bucket_table *tbl)
14697defe1eSThomas Graf {
147da20420fSHerbert Xu 	if (tbl->nest)
148da20420fSHerbert Xu 		nested_bucket_table_free(tbl);
149da20420fSHerbert Xu 
15097defe1eSThomas Graf 	kvfree(tbl->locks);
15197defe1eSThomas Graf 	kvfree(tbl);
15297defe1eSThomas Graf }
15397defe1eSThomas Graf 
1549d901bc0SHerbert Xu static void bucket_table_free_rcu(struct rcu_head *head)
1559d901bc0SHerbert Xu {
1569d901bc0SHerbert Xu 	bucket_table_free(container_of(head, struct bucket_table, rcu));
1579d901bc0SHerbert Xu }
1589d901bc0SHerbert Xu 
159da20420fSHerbert Xu static union nested_table *nested_table_alloc(struct rhashtable *ht,
160da20420fSHerbert Xu 					      union nested_table __rcu **prev,
161da20420fSHerbert Xu 					      unsigned int shifted,
162da20420fSHerbert Xu 					      unsigned int nhash)
163da20420fSHerbert Xu {
164da20420fSHerbert Xu 	union nested_table *ntbl;
165da20420fSHerbert Xu 	int i;
166da20420fSHerbert Xu 
167da20420fSHerbert Xu 	ntbl = rcu_dereference(*prev);
168da20420fSHerbert Xu 	if (ntbl)
169da20420fSHerbert Xu 		return ntbl;
170da20420fSHerbert Xu 
171da20420fSHerbert Xu 	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
172da20420fSHerbert Xu 
173da20420fSHerbert Xu 	if (ntbl && shifted) {
174da20420fSHerbert Xu 		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
175da20420fSHerbert Xu 			INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
176da20420fSHerbert Xu 					    (i << shifted) | nhash);
177da20420fSHerbert Xu 	}
178da20420fSHerbert Xu 
179da20420fSHerbert Xu 	rcu_assign_pointer(*prev, ntbl);
180da20420fSHerbert Xu 
181da20420fSHerbert Xu 	return ntbl;
182da20420fSHerbert Xu }
183da20420fSHerbert Xu 
184da20420fSHerbert Xu static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
185da20420fSHerbert Xu 						      size_t nbuckets,
186da20420fSHerbert Xu 						      gfp_t gfp)
187da20420fSHerbert Xu {
188da20420fSHerbert Xu 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
189da20420fSHerbert Xu 	struct bucket_table *tbl;
190da20420fSHerbert Xu 	size_t size;
191da20420fSHerbert Xu 
192da20420fSHerbert Xu 	if (nbuckets < (1 << (shift + 1)))
193da20420fSHerbert Xu 		return NULL;
194da20420fSHerbert Xu 
195da20420fSHerbert Xu 	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
196da20420fSHerbert Xu 
197da20420fSHerbert Xu 	tbl = kzalloc(size, gfp);
198da20420fSHerbert Xu 	if (!tbl)
199da20420fSHerbert Xu 		return NULL;
200da20420fSHerbert Xu 
201da20420fSHerbert Xu 	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
202da20420fSHerbert Xu 				0, 0)) {
203da20420fSHerbert Xu 		kfree(tbl);
204da20420fSHerbert Xu 		return NULL;
205da20420fSHerbert Xu 	}
206da20420fSHerbert Xu 
207da20420fSHerbert Xu 	tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
208da20420fSHerbert Xu 
209da20420fSHerbert Xu 	return tbl;
210da20420fSHerbert Xu }
211da20420fSHerbert Xu 
21297defe1eSThomas Graf static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
213b9ecfdaaSHerbert Xu 					       size_t nbuckets,
214b9ecfdaaSHerbert Xu 					       gfp_t gfp)
2157e1e7763SThomas Graf {
216eb6d1abfSDaniel Borkmann 	struct bucket_table *tbl = NULL;
2177e1e7763SThomas Graf 	size_t size;
218f89bd6f8SThomas Graf 	int i;
2197e1e7763SThomas Graf 
2207e1e7763SThomas Graf 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
221b9ecfdaaSHerbert Xu 	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
222b9ecfdaaSHerbert Xu 	    gfp != GFP_KERNEL)
223b9ecfdaaSHerbert Xu 		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
224b9ecfdaaSHerbert Xu 	if (tbl == NULL && gfp == GFP_KERNEL)
2257e1e7763SThomas Graf 		tbl = vzalloc(size);
226da20420fSHerbert Xu 
227da20420fSHerbert Xu 	size = nbuckets;
228da20420fSHerbert Xu 
229da20420fSHerbert Xu 	if (tbl == NULL && gfp != GFP_KERNEL) {
230da20420fSHerbert Xu 		tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
231da20420fSHerbert Xu 		nbuckets = 0;
232da20420fSHerbert Xu 	}
2337e1e7763SThomas Graf 	if (tbl == NULL)
2347e1e7763SThomas Graf 		return NULL;
2357e1e7763SThomas Graf 
236da20420fSHerbert Xu 	tbl->size = size;
2377e1e7763SThomas Graf 
238b9ecfdaaSHerbert Xu 	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
23997defe1eSThomas Graf 		bucket_table_free(tbl);
24097defe1eSThomas Graf 		return NULL;
2417e1e7763SThomas Graf 	}
2427e1e7763SThomas Graf 
243eddee5baSHerbert Xu 	INIT_LIST_HEAD(&tbl->walkers);
244eddee5baSHerbert Xu 
2455269b53dSHerbert Xu 	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
2465269b53dSHerbert Xu 
247f89bd6f8SThomas Graf 	for (i = 0; i < nbuckets; i++)
248f89bd6f8SThomas Graf 		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
249f89bd6f8SThomas Graf 
25097defe1eSThomas Graf 	return tbl;
2517e1e7763SThomas Graf }
2527e1e7763SThomas Graf 
253b824478bSHerbert Xu static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
254b824478bSHerbert Xu 						  struct bucket_table *tbl)
255b824478bSHerbert Xu {
256b824478bSHerbert Xu 	struct bucket_table *new_tbl;
257b824478bSHerbert Xu 
258b824478bSHerbert Xu 	do {
259b824478bSHerbert Xu 		new_tbl = tbl;
260b824478bSHerbert Xu 		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
261b824478bSHerbert Xu 	} while (tbl);
262b824478bSHerbert Xu 
263b824478bSHerbert Xu 	return new_tbl;
264b824478bSHerbert Xu }
265b824478bSHerbert Xu 
266299e5c32SThomas Graf static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
267a5ec68e3SThomas Graf {
268aa34a6cbSHerbert Xu 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
269b824478bSHerbert Xu 	struct bucket_table *new_tbl = rhashtable_last_table(ht,
270b824478bSHerbert Xu 		rht_dereference_rcu(old_tbl->future_tbl, ht));
271da20420fSHerbert Xu 	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
272da20420fSHerbert Xu 	int err = -EAGAIN;
273aa34a6cbSHerbert Xu 	struct rhash_head *head, *next, *entry;
274aa34a6cbSHerbert Xu 	spinlock_t *new_bucket_lock;
275299e5c32SThomas Graf 	unsigned int new_hash;
276a5ec68e3SThomas Graf 
277da20420fSHerbert Xu 	if (new_tbl->nest)
278da20420fSHerbert Xu 		goto out;
279da20420fSHerbert Xu 
280da20420fSHerbert Xu 	err = -ENOENT;
281da20420fSHerbert Xu 
282aa34a6cbSHerbert Xu 	rht_for_each(entry, old_tbl, old_hash) {
283aa34a6cbSHerbert Xu 		err = 0;
284aa34a6cbSHerbert Xu 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
285a5ec68e3SThomas Graf 
286aa34a6cbSHerbert Xu 		if (rht_is_a_nulls(next))
2877e1e7763SThomas Graf 			break;
28897defe1eSThomas Graf 
289aa34a6cbSHerbert Xu 		pprev = &entry->next;
2907e1e7763SThomas Graf 	}
2917e1e7763SThomas Graf 
292aa34a6cbSHerbert Xu 	if (err)
293aa34a6cbSHerbert Xu 		goto out;
29497defe1eSThomas Graf 
295aa34a6cbSHerbert Xu 	new_hash = head_hashfn(ht, new_tbl, entry);
296a5ec68e3SThomas Graf 
29702fd97c3SHerbert Xu 	new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
298aa34a6cbSHerbert Xu 
2998f2484bdSHerbert Xu 	spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
300aa34a6cbSHerbert Xu 	head = rht_dereference_bucket(new_tbl->buckets[new_hash],
301aa34a6cbSHerbert Xu 				      new_tbl, new_hash);
302aa34a6cbSHerbert Xu 
303aa34a6cbSHerbert Xu 	RCU_INIT_POINTER(entry->next, head);
304aa34a6cbSHerbert Xu 
305aa34a6cbSHerbert Xu 	rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
306aa34a6cbSHerbert Xu 	spin_unlock(new_bucket_lock);
307aa34a6cbSHerbert Xu 
308aa34a6cbSHerbert Xu 	rcu_assign_pointer(*pprev, next);
309aa34a6cbSHerbert Xu 
310aa34a6cbSHerbert Xu out:
311aa34a6cbSHerbert Xu 	return err;
31297defe1eSThomas Graf }
31397defe1eSThomas Graf 
314da20420fSHerbert Xu static int rhashtable_rehash_chain(struct rhashtable *ht,
315299e5c32SThomas Graf 				    unsigned int old_hash)
31697defe1eSThomas Graf {
317aa34a6cbSHerbert Xu 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
318aa34a6cbSHerbert Xu 	spinlock_t *old_bucket_lock;
319da20420fSHerbert Xu 	int err;
3207cd10db8SThomas Graf 
32102fd97c3SHerbert Xu 	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
322aa34a6cbSHerbert Xu 
323aa34a6cbSHerbert Xu 	spin_lock_bh(old_bucket_lock);
324da20420fSHerbert Xu 	while (!(err = rhashtable_rehash_one(ht, old_hash)))
325aa34a6cbSHerbert Xu 		;
326da20420fSHerbert Xu 
327da20420fSHerbert Xu 	if (err == -ENOENT) {
32863d512d0SHerbert Xu 		old_tbl->rehash++;
329da20420fSHerbert Xu 		err = 0;
330da20420fSHerbert Xu 	}
331aa34a6cbSHerbert Xu 	spin_unlock_bh(old_bucket_lock);
332da20420fSHerbert Xu 
333da20420fSHerbert Xu 	return err;
334aa34a6cbSHerbert Xu }
335aa34a6cbSHerbert Xu 
336b824478bSHerbert Xu static int rhashtable_rehash_attach(struct rhashtable *ht,
337b824478bSHerbert Xu 				    struct bucket_table *old_tbl,
338aa34a6cbSHerbert Xu 				    struct bucket_table *new_tbl)
339aa34a6cbSHerbert Xu {
340b824478bSHerbert Xu 	/* Protect future_tbl using the first bucket lock. */
341b824478bSHerbert Xu 	spin_lock_bh(old_tbl->locks);
342b824478bSHerbert Xu 
343b824478bSHerbert Xu 	/* Did somebody beat us to it? */
344b824478bSHerbert Xu 	if (rcu_access_pointer(old_tbl->future_tbl)) {
345b824478bSHerbert Xu 		spin_unlock_bh(old_tbl->locks);
346b824478bSHerbert Xu 		return -EEXIST;
347b824478bSHerbert Xu 	}
348aa34a6cbSHerbert Xu 
349aa34a6cbSHerbert Xu 	/* Make insertions go into the new, empty table right away. Deletions
350aa34a6cbSHerbert Xu 	 * and lookups will be attempted in both tables until we synchronize.
351aa34a6cbSHerbert Xu 	 */
352c4db8848SHerbert Xu 	rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
353aa34a6cbSHerbert Xu 
354b824478bSHerbert Xu 	spin_unlock_bh(old_tbl->locks);
355b824478bSHerbert Xu 
356b824478bSHerbert Xu 	return 0;
357b824478bSHerbert Xu }
358b824478bSHerbert Xu 
359b824478bSHerbert Xu static int rhashtable_rehash_table(struct rhashtable *ht)
360b824478bSHerbert Xu {
361b824478bSHerbert Xu 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
362b824478bSHerbert Xu 	struct bucket_table *new_tbl;
363b824478bSHerbert Xu 	struct rhashtable_walker *walker;
364299e5c32SThomas Graf 	unsigned int old_hash;
365da20420fSHerbert Xu 	int err;
366b824478bSHerbert Xu 
367b824478bSHerbert Xu 	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
368b824478bSHerbert Xu 	if (!new_tbl)
369b824478bSHerbert Xu 		return 0;
370b824478bSHerbert Xu 
371da20420fSHerbert Xu 	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
372da20420fSHerbert Xu 		err = rhashtable_rehash_chain(ht, old_hash);
373da20420fSHerbert Xu 		if (err)
374da20420fSHerbert Xu 			return err;
375da20420fSHerbert Xu 	}
376aa34a6cbSHerbert Xu 
377aa34a6cbSHerbert Xu 	/* Publish the new table pointer. */
378aa34a6cbSHerbert Xu 	rcu_assign_pointer(ht->tbl, new_tbl);
379aa34a6cbSHerbert Xu 
380ba7c95eaSHerbert Xu 	spin_lock(&ht->lock);
381eddee5baSHerbert Xu 	list_for_each_entry(walker, &old_tbl->walkers, list)
382eddee5baSHerbert Xu 		walker->tbl = NULL;
383ba7c95eaSHerbert Xu 	spin_unlock(&ht->lock);
384eddee5baSHerbert Xu 
385aa34a6cbSHerbert Xu 	/* Wait for readers. All new readers will see the new
386aa34a6cbSHerbert Xu 	 * table, and thus no references to the old table will
387aa34a6cbSHerbert Xu 	 * remain.
388aa34a6cbSHerbert Xu 	 */
3899d901bc0SHerbert Xu 	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
390b824478bSHerbert Xu 
391b824478bSHerbert Xu 	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
3927e1e7763SThomas Graf }
3937e1e7763SThomas Graf 
394da20420fSHerbert Xu static int rhashtable_rehash_alloc(struct rhashtable *ht,
395da20420fSHerbert Xu 				   struct bucket_table *old_tbl,
396da20420fSHerbert Xu 				   unsigned int size)
3977e1e7763SThomas Graf {
398da20420fSHerbert Xu 	struct bucket_table *new_tbl;
399b824478bSHerbert Xu 	int err;
4007e1e7763SThomas Graf 
4017e1e7763SThomas Graf 	ASSERT_RHT_MUTEX(ht);
4027e1e7763SThomas Graf 
403da20420fSHerbert Xu 	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
4047e1e7763SThomas Graf 	if (new_tbl == NULL)
4057e1e7763SThomas Graf 		return -ENOMEM;
4067e1e7763SThomas Graf 
407b824478bSHerbert Xu 	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
408b824478bSHerbert Xu 	if (err)
409b824478bSHerbert Xu 		bucket_table_free(new_tbl);
410b824478bSHerbert Xu 
411b824478bSHerbert Xu 	return err;
4127e1e7763SThomas Graf }
4137e1e7763SThomas Graf 
4147e1e7763SThomas Graf /**
4157e1e7763SThomas Graf  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
4167e1e7763SThomas Graf  * @ht:		the hash table to shrink
4177e1e7763SThomas Graf  *
41818093d1cSHerbert Xu  * This function shrinks the hash table to fit, i.e., the smallest
41918093d1cSHerbert Xu  * size would not cause it to expand right away automatically.
4207e1e7763SThomas Graf  *
42197defe1eSThomas Graf  * The caller must ensure that no concurrent resizing occurs by holding
42297defe1eSThomas Graf  * ht->mutex.
42397defe1eSThomas Graf  *
4247e1e7763SThomas Graf  * The caller must ensure that no concurrent table mutations take place.
4257e1e7763SThomas Graf  * It is however valid to have concurrent lookups if they are RCU protected.
42697defe1eSThomas Graf  *
42797defe1eSThomas Graf  * It is valid to have concurrent insertions and deletions protected by per
42897defe1eSThomas Graf  * bucket locks or concurrent RCU protected lookups and traversals.
4297e1e7763SThomas Graf  */
430b824478bSHerbert Xu static int rhashtable_shrink(struct rhashtable *ht)
4317e1e7763SThomas Graf {
432da20420fSHerbert Xu 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
43312311959SVegard Nossum 	unsigned int nelems = atomic_read(&ht->nelems);
43412311959SVegard Nossum 	unsigned int size = 0;
4357e1e7763SThomas Graf 
43612311959SVegard Nossum 	if (nelems)
43712311959SVegard Nossum 		size = roundup_pow_of_two(nelems * 3 / 2);
43818093d1cSHerbert Xu 	if (size < ht->p.min_size)
43918093d1cSHerbert Xu 		size = ht->p.min_size;
44018093d1cSHerbert Xu 
44118093d1cSHerbert Xu 	if (old_tbl->size <= size)
44218093d1cSHerbert Xu 		return 0;
44318093d1cSHerbert Xu 
444b824478bSHerbert Xu 	if (rht_dereference(old_tbl->future_tbl, ht))
445b824478bSHerbert Xu 		return -EEXIST;
446b824478bSHerbert Xu 
447da20420fSHerbert Xu 	return rhashtable_rehash_alloc(ht, old_tbl, size);
4487e1e7763SThomas Graf }
4497e1e7763SThomas Graf 
45097defe1eSThomas Graf static void rht_deferred_worker(struct work_struct *work)
45197defe1eSThomas Graf {
45297defe1eSThomas Graf 	struct rhashtable *ht;
45397defe1eSThomas Graf 	struct bucket_table *tbl;
454b824478bSHerbert Xu 	int err = 0;
45597defe1eSThomas Graf 
45657699a40SYing Xue 	ht = container_of(work, struct rhashtable, run_work);
45797defe1eSThomas Graf 	mutex_lock(&ht->mutex);
45828134a53SHerbert Xu 
45997defe1eSThomas Graf 	tbl = rht_dereference(ht->tbl, ht);
460b824478bSHerbert Xu 	tbl = rhashtable_last_table(ht, tbl);
46197defe1eSThomas Graf 
462a5b6846fSDaniel Borkmann 	if (rht_grow_above_75(ht, tbl))
463da20420fSHerbert Xu 		err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
464b5e2c150SThomas Graf 	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
465da20420fSHerbert Xu 		err = rhashtable_shrink(ht);
466da20420fSHerbert Xu 	else if (tbl->nest)
467da20420fSHerbert Xu 		err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
468b824478bSHerbert Xu 
469da20420fSHerbert Xu 	if (!err)
470b824478bSHerbert Xu 		err = rhashtable_rehash_table(ht);
471b824478bSHerbert Xu 
47297defe1eSThomas Graf 	mutex_unlock(&ht->mutex);
473b824478bSHerbert Xu 
474b824478bSHerbert Xu 	if (err)
475b824478bSHerbert Xu 		schedule_work(&ht->run_work);
47697defe1eSThomas Graf }
47797defe1eSThomas Graf 
478ca26893fSHerbert Xu static int rhashtable_insert_rehash(struct rhashtable *ht,
4793cf92222SHerbert Xu 				    struct bucket_table *tbl)
480ccd57b1bSHerbert Xu {
481ccd57b1bSHerbert Xu 	struct bucket_table *old_tbl;
482ccd57b1bSHerbert Xu 	struct bucket_table *new_tbl;
483ccd57b1bSHerbert Xu 	unsigned int size;
484ccd57b1bSHerbert Xu 	int err;
485ccd57b1bSHerbert Xu 
486ccd57b1bSHerbert Xu 	old_tbl = rht_dereference_rcu(ht->tbl, ht);
487ccd57b1bSHerbert Xu 
488ccd57b1bSHerbert Xu 	size = tbl->size;
489ccd57b1bSHerbert Xu 
4903cf92222SHerbert Xu 	err = -EBUSY;
4913cf92222SHerbert Xu 
492ccd57b1bSHerbert Xu 	if (rht_grow_above_75(ht, tbl))
493ccd57b1bSHerbert Xu 		size *= 2;
494a87b9ebfSThomas Graf 	/* Do not schedule more than one rehash */
495a87b9ebfSThomas Graf 	else if (old_tbl != tbl)
4963cf92222SHerbert Xu 		goto fail;
4973cf92222SHerbert Xu 
4983cf92222SHerbert Xu 	err = -ENOMEM;
499ccd57b1bSHerbert Xu 
500ccd57b1bSHerbert Xu 	new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
5013cf92222SHerbert Xu 	if (new_tbl == NULL)
5023cf92222SHerbert Xu 		goto fail;
503ccd57b1bSHerbert Xu 
504ccd57b1bSHerbert Xu 	err = rhashtable_rehash_attach(ht, tbl, new_tbl);
505ccd57b1bSHerbert Xu 	if (err) {
506ccd57b1bSHerbert Xu 		bucket_table_free(new_tbl);
507ccd57b1bSHerbert Xu 		if (err == -EEXIST)
508ccd57b1bSHerbert Xu 			err = 0;
509ccd57b1bSHerbert Xu 	} else
510ccd57b1bSHerbert Xu 		schedule_work(&ht->run_work);
511ccd57b1bSHerbert Xu 
512ccd57b1bSHerbert Xu 	return err;
5133cf92222SHerbert Xu 
5143cf92222SHerbert Xu fail:
5153cf92222SHerbert Xu 	/* Do not fail the insert if someone else did a rehash. */
5163cf92222SHerbert Xu 	if (likely(rcu_dereference_raw(tbl->future_tbl)))
5173cf92222SHerbert Xu 		return 0;
5183cf92222SHerbert Xu 
5193cf92222SHerbert Xu 	/* Schedule async rehash to retry allocation in process context. */
5203cf92222SHerbert Xu 	if (err == -ENOMEM)
5213cf92222SHerbert Xu 		schedule_work(&ht->run_work);
5223cf92222SHerbert Xu 
5233cf92222SHerbert Xu 	return err;
524ccd57b1bSHerbert Xu }
525ccd57b1bSHerbert Xu 
526ca26893fSHerbert Xu static void *rhashtable_lookup_one(struct rhashtable *ht,
527ca26893fSHerbert Xu 				   struct bucket_table *tbl, unsigned int hash,
528ca26893fSHerbert Xu 				   const void *key, struct rhash_head *obj)
52902fd97c3SHerbert Xu {
530ca26893fSHerbert Xu 	struct rhashtable_compare_arg arg = {
531ca26893fSHerbert Xu 		.ht = ht,
532ca26893fSHerbert Xu 		.key = key,
533ca26893fSHerbert Xu 	};
534ca26893fSHerbert Xu 	struct rhash_head __rcu **pprev;
53502fd97c3SHerbert Xu 	struct rhash_head *head;
536ca26893fSHerbert Xu 	int elasticity;
53702fd97c3SHerbert Xu 
5385f8ddeabSFlorian Westphal 	elasticity = RHT_ELASTICITY;
539da20420fSHerbert Xu 	pprev = rht_bucket_var(tbl, hash);
540da20420fSHerbert Xu 	rht_for_each_continue(head, *pprev, tbl, hash) {
541ca26893fSHerbert Xu 		struct rhlist_head *list;
542ca26893fSHerbert Xu 		struct rhlist_head *plist;
54302fd97c3SHerbert Xu 
544ca26893fSHerbert Xu 		elasticity--;
545ca26893fSHerbert Xu 		if (!key ||
546ca26893fSHerbert Xu 		    (ht->p.obj_cmpfn ?
547ca26893fSHerbert Xu 		     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
548ca26893fSHerbert Xu 		     rhashtable_compare(&arg, rht_obj(ht, head))))
549ca26893fSHerbert Xu 			continue;
550ca26893fSHerbert Xu 
551ca26893fSHerbert Xu 		if (!ht->rhlist)
552ca26893fSHerbert Xu 			return rht_obj(ht, head);
553ca26893fSHerbert Xu 
554ca26893fSHerbert Xu 		list = container_of(obj, struct rhlist_head, rhead);
555ca26893fSHerbert Xu 		plist = container_of(head, struct rhlist_head, rhead);
556ca26893fSHerbert Xu 
557ca26893fSHerbert Xu 		RCU_INIT_POINTER(list->next, plist);
558ca26893fSHerbert Xu 		head = rht_dereference_bucket(head->next, tbl, hash);
559ca26893fSHerbert Xu 		RCU_INIT_POINTER(list->rhead.next, head);
560ca26893fSHerbert Xu 		rcu_assign_pointer(*pprev, obj);
561ca26893fSHerbert Xu 
562ca26893fSHerbert Xu 		return NULL;
5635ca8cc5bSPablo Neira Ayuso 	}
56402fd97c3SHerbert Xu 
565ca26893fSHerbert Xu 	if (elasticity <= 0)
566ca26893fSHerbert Xu 		return ERR_PTR(-EAGAIN);
567ca26893fSHerbert Xu 
568ca26893fSHerbert Xu 	return ERR_PTR(-ENOENT);
569ca26893fSHerbert Xu }
570ca26893fSHerbert Xu 
571ca26893fSHerbert Xu static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
572ca26893fSHerbert Xu 						  struct bucket_table *tbl,
573ca26893fSHerbert Xu 						  unsigned int hash,
574ca26893fSHerbert Xu 						  struct rhash_head *obj,
575ca26893fSHerbert Xu 						  void *data)
576ca26893fSHerbert Xu {
577da20420fSHerbert Xu 	struct rhash_head __rcu **pprev;
578ca26893fSHerbert Xu 	struct bucket_table *new_tbl;
579ca26893fSHerbert Xu 	struct rhash_head *head;
580ca26893fSHerbert Xu 
581ca26893fSHerbert Xu 	if (!IS_ERR_OR_NULL(data))
582ca26893fSHerbert Xu 		return ERR_PTR(-EEXIST);
583ca26893fSHerbert Xu 
584ca26893fSHerbert Xu 	if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
585ca26893fSHerbert Xu 		return ERR_CAST(data);
586ca26893fSHerbert Xu 
587ca26893fSHerbert Xu 	new_tbl = rcu_dereference(tbl->future_tbl);
588ca26893fSHerbert Xu 	if (new_tbl)
589ca26893fSHerbert Xu 		return new_tbl;
590ca26893fSHerbert Xu 
591ca26893fSHerbert Xu 	if (PTR_ERR(data) != -ENOENT)
592ca26893fSHerbert Xu 		return ERR_CAST(data);
593ca26893fSHerbert Xu 
59407ee0722SHerbert Xu 	if (unlikely(rht_grow_above_max(ht, tbl)))
595ca26893fSHerbert Xu 		return ERR_PTR(-E2BIG);
59607ee0722SHerbert Xu 
597ca26893fSHerbert Xu 	if (unlikely(rht_grow_above_100(ht, tbl)))
598ca26893fSHerbert Xu 		return ERR_PTR(-EAGAIN);
59902fd97c3SHerbert Xu 
600da20420fSHerbert Xu 	pprev = rht_bucket_insert(ht, tbl, hash);
601da20420fSHerbert Xu 	if (!pprev)
602da20420fSHerbert Xu 		return ERR_PTR(-ENOMEM);
603da20420fSHerbert Xu 
604da20420fSHerbert Xu 	head = rht_dereference_bucket(*pprev, tbl, hash);
60502fd97c3SHerbert Xu 
60602fd97c3SHerbert Xu 	RCU_INIT_POINTER(obj->next, head);
607ca26893fSHerbert Xu 	if (ht->rhlist) {
608ca26893fSHerbert Xu 		struct rhlist_head *list;
609ca26893fSHerbert Xu 
610ca26893fSHerbert Xu 		list = container_of(obj, struct rhlist_head, rhead);
611ca26893fSHerbert Xu 		RCU_INIT_POINTER(list->next, NULL);
612ca26893fSHerbert Xu 	}
61302fd97c3SHerbert Xu 
614da20420fSHerbert Xu 	rcu_assign_pointer(*pprev, obj);
61502fd97c3SHerbert Xu 
61602fd97c3SHerbert Xu 	atomic_inc(&ht->nelems);
617ca26893fSHerbert Xu 	if (rht_grow_above_75(ht, tbl))
618ca26893fSHerbert Xu 		schedule_work(&ht->run_work);
61902fd97c3SHerbert Xu 
6203cf92222SHerbert Xu 	return NULL;
621ca26893fSHerbert Xu }
622ca26893fSHerbert Xu 
623ca26893fSHerbert Xu static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
624ca26893fSHerbert Xu 				   struct rhash_head *obj)
625ca26893fSHerbert Xu {
626ca26893fSHerbert Xu 	struct bucket_table *new_tbl;
627ca26893fSHerbert Xu 	struct bucket_table *tbl;
628ca26893fSHerbert Xu 	unsigned int hash;
629ca26893fSHerbert Xu 	spinlock_t *lock;
630ca26893fSHerbert Xu 	void *data;
631ca26893fSHerbert Xu 
632ca26893fSHerbert Xu 	tbl = rcu_dereference(ht->tbl);
633ca26893fSHerbert Xu 
634ca26893fSHerbert Xu 	/* All insertions must grab the oldest table containing
635ca26893fSHerbert Xu 	 * the hashed bucket that is yet to be rehashed.
636ca26893fSHerbert Xu 	 */
637ca26893fSHerbert Xu 	for (;;) {
638ca26893fSHerbert Xu 		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
639ca26893fSHerbert Xu 		lock = rht_bucket_lock(tbl, hash);
640ca26893fSHerbert Xu 		spin_lock_bh(lock);
641ca26893fSHerbert Xu 
642ca26893fSHerbert Xu 		if (tbl->rehash <= hash)
643ca26893fSHerbert Xu 			break;
644ca26893fSHerbert Xu 
645ca26893fSHerbert Xu 		spin_unlock_bh(lock);
646ca26893fSHerbert Xu 		tbl = rcu_dereference(tbl->future_tbl);
647ca26893fSHerbert Xu 	}
648ca26893fSHerbert Xu 
649ca26893fSHerbert Xu 	data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
650ca26893fSHerbert Xu 	new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
651ca26893fSHerbert Xu 	if (PTR_ERR(new_tbl) != -EEXIST)
652ca26893fSHerbert Xu 		data = ERR_CAST(new_tbl);
653ca26893fSHerbert Xu 
654ca26893fSHerbert Xu 	while (!IS_ERR_OR_NULL(new_tbl)) {
655ca26893fSHerbert Xu 		tbl = new_tbl;
656ca26893fSHerbert Xu 		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
657ca26893fSHerbert Xu 		spin_lock_nested(rht_bucket_lock(tbl, hash),
658ca26893fSHerbert Xu 				 SINGLE_DEPTH_NESTING);
659ca26893fSHerbert Xu 
660ca26893fSHerbert Xu 		data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
661ca26893fSHerbert Xu 		new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
662ca26893fSHerbert Xu 		if (PTR_ERR(new_tbl) != -EEXIST)
663ca26893fSHerbert Xu 			data = ERR_CAST(new_tbl);
664ca26893fSHerbert Xu 
665ca26893fSHerbert Xu 		spin_unlock(rht_bucket_lock(tbl, hash));
666ca26893fSHerbert Xu 	}
667ca26893fSHerbert Xu 
668ca26893fSHerbert Xu 	spin_unlock_bh(lock);
669ca26893fSHerbert Xu 
670ca26893fSHerbert Xu 	if (PTR_ERR(data) == -EAGAIN)
671ca26893fSHerbert Xu 		data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
672ca26893fSHerbert Xu 			       -EAGAIN);
673ca26893fSHerbert Xu 
674ca26893fSHerbert Xu 	return data;
675ca26893fSHerbert Xu }
676ca26893fSHerbert Xu 
677ca26893fSHerbert Xu void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
678ca26893fSHerbert Xu 			     struct rhash_head *obj)
679ca26893fSHerbert Xu {
680ca26893fSHerbert Xu 	void *data;
681ca26893fSHerbert Xu 
682ca26893fSHerbert Xu 	do {
683ca26893fSHerbert Xu 		rcu_read_lock();
684ca26893fSHerbert Xu 		data = rhashtable_try_insert(ht, key, obj);
685ca26893fSHerbert Xu 		rcu_read_unlock();
686ca26893fSHerbert Xu 	} while (PTR_ERR(data) == -EAGAIN);
687ca26893fSHerbert Xu 
688ca26893fSHerbert Xu 	return data;
68902fd97c3SHerbert Xu }
69002fd97c3SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
69102fd97c3SHerbert Xu 
692f2dba9c6SHerbert Xu /**
693246779ddSHerbert Xu  * rhashtable_walk_enter - Initialise an iterator
694f2dba9c6SHerbert Xu  * @ht:		Table to walk over
695f2dba9c6SHerbert Xu  * @iter:	Hash table Iterator
696f2dba9c6SHerbert Xu  *
697f2dba9c6SHerbert Xu  * This function prepares a hash table walk.
698f2dba9c6SHerbert Xu  *
699f2dba9c6SHerbert Xu  * Note that if you restart a walk after rhashtable_walk_stop you
700f2dba9c6SHerbert Xu  * may see the same object twice.  Also, you may miss objects if
701f2dba9c6SHerbert Xu  * there are removals in between rhashtable_walk_stop and the next
702f2dba9c6SHerbert Xu  * call to rhashtable_walk_start.
703f2dba9c6SHerbert Xu  *
704f2dba9c6SHerbert Xu  * For a completely stable walk you should construct your own data
705f2dba9c6SHerbert Xu  * structure outside the hash table.
706f2dba9c6SHerbert Xu  *
707f2dba9c6SHerbert Xu  * This function may sleep so you must not call it from interrupt
708f2dba9c6SHerbert Xu  * context or with spin locks held.
709f2dba9c6SHerbert Xu  *
710246779ddSHerbert Xu  * You must call rhashtable_walk_exit after this function returns.
711f2dba9c6SHerbert Xu  */
712246779ddSHerbert Xu void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
713f2dba9c6SHerbert Xu {
714f2dba9c6SHerbert Xu 	iter->ht = ht;
715f2dba9c6SHerbert Xu 	iter->p = NULL;
716f2dba9c6SHerbert Xu 	iter->slot = 0;
717f2dba9c6SHerbert Xu 	iter->skip = 0;
718f2dba9c6SHerbert Xu 
719c6ff5268SHerbert Xu 	spin_lock(&ht->lock);
720246779ddSHerbert Xu 	iter->walker.tbl =
721179ccc0aSHerbert Xu 		rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
722246779ddSHerbert Xu 	list_add(&iter->walker.list, &iter->walker.tbl->walkers);
723c6ff5268SHerbert Xu 	spin_unlock(&ht->lock);
724f2dba9c6SHerbert Xu }
725246779ddSHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
726f2dba9c6SHerbert Xu 
727f2dba9c6SHerbert Xu /**
728f2dba9c6SHerbert Xu  * rhashtable_walk_exit - Free an iterator
729f2dba9c6SHerbert Xu  * @iter:	Hash table Iterator
730f2dba9c6SHerbert Xu  *
731f2dba9c6SHerbert Xu  * This function frees resources allocated by rhashtable_walk_init.
732f2dba9c6SHerbert Xu  */
733f2dba9c6SHerbert Xu void rhashtable_walk_exit(struct rhashtable_iter *iter)
734f2dba9c6SHerbert Xu {
735c6ff5268SHerbert Xu 	spin_lock(&iter->ht->lock);
736246779ddSHerbert Xu 	if (iter->walker.tbl)
737246779ddSHerbert Xu 		list_del(&iter->walker.list);
738c6ff5268SHerbert Xu 	spin_unlock(&iter->ht->lock);
739f2dba9c6SHerbert Xu }
740f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
741f2dba9c6SHerbert Xu 
742f2dba9c6SHerbert Xu /**
743f2dba9c6SHerbert Xu  * rhashtable_walk_start - Start a hash table walk
744f2dba9c6SHerbert Xu  * @iter:	Hash table iterator
745f2dba9c6SHerbert Xu  *
746f2dba9c6SHerbert Xu  * Start a hash table walk.  Note that we take the RCU lock in all
747f2dba9c6SHerbert Xu  * cases including when we return an error.  So you must always call
748f2dba9c6SHerbert Xu  * rhashtable_walk_stop to clean up.
749f2dba9c6SHerbert Xu  *
750f2dba9c6SHerbert Xu  * Returns zero if successful.
751f2dba9c6SHerbert Xu  *
752f2dba9c6SHerbert Xu  * Returns -EAGAIN if resize event occured.  Note that the iterator
753f2dba9c6SHerbert Xu  * will rewind back to the beginning and you may use it immediately
754f2dba9c6SHerbert Xu  * by calling rhashtable_walk_next.
755f2dba9c6SHerbert Xu  */
756f2dba9c6SHerbert Xu int rhashtable_walk_start(struct rhashtable_iter *iter)
757db4374f4SThomas Graf 	__acquires(RCU)
758f2dba9c6SHerbert Xu {
759eddee5baSHerbert Xu 	struct rhashtable *ht = iter->ht;
760eddee5baSHerbert Xu 
761f2dba9c6SHerbert Xu 	rcu_read_lock();
762f2dba9c6SHerbert Xu 
763c6ff5268SHerbert Xu 	spin_lock(&ht->lock);
764246779ddSHerbert Xu 	if (iter->walker.tbl)
765246779ddSHerbert Xu 		list_del(&iter->walker.list);
766c6ff5268SHerbert Xu 	spin_unlock(&ht->lock);
767eddee5baSHerbert Xu 
768246779ddSHerbert Xu 	if (!iter->walker.tbl) {
769246779ddSHerbert Xu 		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
770f2dba9c6SHerbert Xu 		return -EAGAIN;
771f2dba9c6SHerbert Xu 	}
772f2dba9c6SHerbert Xu 
773f2dba9c6SHerbert Xu 	return 0;
774f2dba9c6SHerbert Xu }
775f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_start);
776f2dba9c6SHerbert Xu 
777f2dba9c6SHerbert Xu /**
778f2dba9c6SHerbert Xu  * rhashtable_walk_next - Return the next object and advance the iterator
779f2dba9c6SHerbert Xu  * @iter:	Hash table iterator
780f2dba9c6SHerbert Xu  *
781f2dba9c6SHerbert Xu  * Note that you must call rhashtable_walk_stop when you are finished
782f2dba9c6SHerbert Xu  * with the walk.
783f2dba9c6SHerbert Xu  *
784f2dba9c6SHerbert Xu  * Returns the next object or NULL when the end of the table is reached.
785f2dba9c6SHerbert Xu  *
786f2dba9c6SHerbert Xu  * Returns -EAGAIN if resize event occured.  Note that the iterator
787f2dba9c6SHerbert Xu  * will rewind back to the beginning and you may continue to use it.
788f2dba9c6SHerbert Xu  */
789f2dba9c6SHerbert Xu void *rhashtable_walk_next(struct rhashtable_iter *iter)
790f2dba9c6SHerbert Xu {
791246779ddSHerbert Xu 	struct bucket_table *tbl = iter->walker.tbl;
792ca26893fSHerbert Xu 	struct rhlist_head *list = iter->list;
793f2dba9c6SHerbert Xu 	struct rhashtable *ht = iter->ht;
794f2dba9c6SHerbert Xu 	struct rhash_head *p = iter->p;
795ca26893fSHerbert Xu 	bool rhlist = ht->rhlist;
796f2dba9c6SHerbert Xu 
797f2dba9c6SHerbert Xu 	if (p) {
798ca26893fSHerbert Xu 		if (!rhlist || !(list = rcu_dereference(list->next))) {
799ca26893fSHerbert Xu 			p = rcu_dereference(p->next);
800ca26893fSHerbert Xu 			list = container_of(p, struct rhlist_head, rhead);
801ca26893fSHerbert Xu 		}
802f2dba9c6SHerbert Xu 		goto next;
803f2dba9c6SHerbert Xu 	}
804f2dba9c6SHerbert Xu 
805f2dba9c6SHerbert Xu 	for (; iter->slot < tbl->size; iter->slot++) {
806f2dba9c6SHerbert Xu 		int skip = iter->skip;
807f2dba9c6SHerbert Xu 
808f2dba9c6SHerbert Xu 		rht_for_each_rcu(p, tbl, iter->slot) {
809ca26893fSHerbert Xu 			if (rhlist) {
810ca26893fSHerbert Xu 				list = container_of(p, struct rhlist_head,
811ca26893fSHerbert Xu 						    rhead);
812ca26893fSHerbert Xu 				do {
813ca26893fSHerbert Xu 					if (!skip)
814ca26893fSHerbert Xu 						goto next;
815ca26893fSHerbert Xu 					skip--;
816ca26893fSHerbert Xu 					list = rcu_dereference(list->next);
817ca26893fSHerbert Xu 				} while (list);
818ca26893fSHerbert Xu 
819ca26893fSHerbert Xu 				continue;
820ca26893fSHerbert Xu 			}
821f2dba9c6SHerbert Xu 			if (!skip)
822f2dba9c6SHerbert Xu 				break;
823f2dba9c6SHerbert Xu 			skip--;
824f2dba9c6SHerbert Xu 		}
825f2dba9c6SHerbert Xu 
826f2dba9c6SHerbert Xu next:
827f2dba9c6SHerbert Xu 		if (!rht_is_a_nulls(p)) {
828f2dba9c6SHerbert Xu 			iter->skip++;
829f2dba9c6SHerbert Xu 			iter->p = p;
830ca26893fSHerbert Xu 			iter->list = list;
831ca26893fSHerbert Xu 			return rht_obj(ht, rhlist ? &list->rhead : p);
832f2dba9c6SHerbert Xu 		}
833f2dba9c6SHerbert Xu 
834f2dba9c6SHerbert Xu 		iter->skip = 0;
835f2dba9c6SHerbert Xu 	}
836f2dba9c6SHerbert Xu 
837142b942aSPhil Sutter 	iter->p = NULL;
838142b942aSPhil Sutter 
839d88252f9SHerbert Xu 	/* Ensure we see any new tables. */
840d88252f9SHerbert Xu 	smp_rmb();
841d88252f9SHerbert Xu 
842246779ddSHerbert Xu 	iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
843246779ddSHerbert Xu 	if (iter->walker.tbl) {
844eddee5baSHerbert Xu 		iter->slot = 0;
845eddee5baSHerbert Xu 		iter->skip = 0;
846eddee5baSHerbert Xu 		return ERR_PTR(-EAGAIN);
847eddee5baSHerbert Xu 	}
848eddee5baSHerbert Xu 
849c936a79fSThomas Graf 	return NULL;
850f2dba9c6SHerbert Xu }
851f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_next);
852f2dba9c6SHerbert Xu 
853f2dba9c6SHerbert Xu /**
854f2dba9c6SHerbert Xu  * rhashtable_walk_stop - Finish a hash table walk
855f2dba9c6SHerbert Xu  * @iter:	Hash table iterator
856f2dba9c6SHerbert Xu  *
857f2dba9c6SHerbert Xu  * Finish a hash table walk.
858f2dba9c6SHerbert Xu  */
859f2dba9c6SHerbert Xu void rhashtable_walk_stop(struct rhashtable_iter *iter)
860db4374f4SThomas Graf 	__releases(RCU)
861f2dba9c6SHerbert Xu {
862eddee5baSHerbert Xu 	struct rhashtable *ht;
863246779ddSHerbert Xu 	struct bucket_table *tbl = iter->walker.tbl;
864eddee5baSHerbert Xu 
865eddee5baSHerbert Xu 	if (!tbl)
866963ecbd4SHerbert Xu 		goto out;
867eddee5baSHerbert Xu 
868eddee5baSHerbert Xu 	ht = iter->ht;
869eddee5baSHerbert Xu 
870ba7c95eaSHerbert Xu 	spin_lock(&ht->lock);
871c4db8848SHerbert Xu 	if (tbl->rehash < tbl->size)
872246779ddSHerbert Xu 		list_add(&iter->walker.list, &tbl->walkers);
873eddee5baSHerbert Xu 	else
874246779ddSHerbert Xu 		iter->walker.tbl = NULL;
875ba7c95eaSHerbert Xu 	spin_unlock(&ht->lock);
876eddee5baSHerbert Xu 
877f2dba9c6SHerbert Xu 	iter->p = NULL;
878963ecbd4SHerbert Xu 
879963ecbd4SHerbert Xu out:
880963ecbd4SHerbert Xu 	rcu_read_unlock();
881f2dba9c6SHerbert Xu }
882f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
883f2dba9c6SHerbert Xu 
884488fb86eSHerbert Xu static size_t rounded_hashtable_size(const struct rhashtable_params *params)
8857e1e7763SThomas Graf {
88694000176SYing Xue 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
887e2e21c1cSHerbert Xu 		   (unsigned long)params->min_size);
8887e1e7763SThomas Graf }
8897e1e7763SThomas Graf 
89031ccde2dSHerbert Xu static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
89131ccde2dSHerbert Xu {
89231ccde2dSHerbert Xu 	return jhash2(key, length, seed);
89331ccde2dSHerbert Xu }
89431ccde2dSHerbert Xu 
8957e1e7763SThomas Graf /**
8967e1e7763SThomas Graf  * rhashtable_init - initialize a new hash table
8977e1e7763SThomas Graf  * @ht:		hash table to be initialized
8987e1e7763SThomas Graf  * @params:	configuration parameters
8997e1e7763SThomas Graf  *
9007e1e7763SThomas Graf  * Initializes a new hash table based on the provided configuration
9017e1e7763SThomas Graf  * parameters. A table can be configured either with a variable or
9027e1e7763SThomas Graf  * fixed length key:
9037e1e7763SThomas Graf  *
9047e1e7763SThomas Graf  * Configuration Example 1: Fixed length keys
9057e1e7763SThomas Graf  * struct test_obj {
9067e1e7763SThomas Graf  *	int			key;
9077e1e7763SThomas Graf  *	void *			my_member;
9087e1e7763SThomas Graf  *	struct rhash_head	node;
9097e1e7763SThomas Graf  * };
9107e1e7763SThomas Graf  *
9117e1e7763SThomas Graf  * struct rhashtable_params params = {
9127e1e7763SThomas Graf  *	.head_offset = offsetof(struct test_obj, node),
9137e1e7763SThomas Graf  *	.key_offset = offsetof(struct test_obj, key),
9147e1e7763SThomas Graf  *	.key_len = sizeof(int),
91587545899SDaniel Borkmann  *	.hashfn = jhash,
916f89bd6f8SThomas Graf  *	.nulls_base = (1U << RHT_BASE_SHIFT),
9177e1e7763SThomas Graf  * };
9187e1e7763SThomas Graf  *
9197e1e7763SThomas Graf  * Configuration Example 2: Variable length keys
9207e1e7763SThomas Graf  * struct test_obj {
9217e1e7763SThomas Graf  *	[...]
9227e1e7763SThomas Graf  *	struct rhash_head	node;
9237e1e7763SThomas Graf  * };
9247e1e7763SThomas Graf  *
92549f7b33eSPatrick McHardy  * u32 my_hash_fn(const void *data, u32 len, u32 seed)
9267e1e7763SThomas Graf  * {
9277e1e7763SThomas Graf  *	struct test_obj *obj = data;
9287e1e7763SThomas Graf  *
9297e1e7763SThomas Graf  *	return [... hash ...];
9307e1e7763SThomas Graf  * }
9317e1e7763SThomas Graf  *
9327e1e7763SThomas Graf  * struct rhashtable_params params = {
9337e1e7763SThomas Graf  *	.head_offset = offsetof(struct test_obj, node),
93487545899SDaniel Borkmann  *	.hashfn = jhash,
9357e1e7763SThomas Graf  *	.obj_hashfn = my_hash_fn,
9367e1e7763SThomas Graf  * };
9377e1e7763SThomas Graf  */
938488fb86eSHerbert Xu int rhashtable_init(struct rhashtable *ht,
939488fb86eSHerbert Xu 		    const struct rhashtable_params *params)
9407e1e7763SThomas Graf {
9417e1e7763SThomas Graf 	struct bucket_table *tbl;
9427e1e7763SThomas Graf 	size_t size;
9437e1e7763SThomas Graf 
9447e1e7763SThomas Graf 	size = HASH_DEFAULT_SIZE;
9457e1e7763SThomas Graf 
94631ccde2dSHerbert Xu 	if ((!params->key_len && !params->obj_hashfn) ||
94702fd97c3SHerbert Xu 	    (params->obj_hashfn && !params->obj_cmpfn))
9487e1e7763SThomas Graf 		return -EINVAL;
9497e1e7763SThomas Graf 
950f89bd6f8SThomas Graf 	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
951f89bd6f8SThomas Graf 		return -EINVAL;
952f89bd6f8SThomas Graf 
95397defe1eSThomas Graf 	memset(ht, 0, sizeof(*ht));
95497defe1eSThomas Graf 	mutex_init(&ht->mutex);
955ba7c95eaSHerbert Xu 	spin_lock_init(&ht->lock);
95697defe1eSThomas Graf 	memcpy(&ht->p, params, sizeof(*params));
95797defe1eSThomas Graf 
958a998f712SThomas Graf 	if (params->min_size)
959a998f712SThomas Graf 		ht->p.min_size = roundup_pow_of_two(params->min_size);
960a998f712SThomas Graf 
9616d684e54SHerbert Xu 	/* Cap total entries at 2^31 to avoid nelems overflow. */
9626d684e54SHerbert Xu 	ht->max_elems = 1u << 31;
963*2d2ab658SHerbert Xu 
964*2d2ab658SHerbert Xu 	if (params->max_size) {
965*2d2ab658SHerbert Xu 		ht->p.max_size = rounddown_pow_of_two(params->max_size);
9666d684e54SHerbert Xu 		if (ht->p.max_size < ht->max_elems / 2)
9676d684e54SHerbert Xu 			ht->max_elems = ht->p.max_size * 2;
968*2d2ab658SHerbert Xu 	}
9696d684e54SHerbert Xu 
970488fb86eSHerbert Xu 	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
971a998f712SThomas Graf 
9723a324606SHerbert Xu 	if (params->nelem_hint)
9733a324606SHerbert Xu 		size = rounded_hashtable_size(&ht->p);
9743a324606SHerbert Xu 
97597defe1eSThomas Graf 	if (params->locks_mul)
97697defe1eSThomas Graf 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
97797defe1eSThomas Graf 	else
97897defe1eSThomas Graf 		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
97997defe1eSThomas Graf 
98031ccde2dSHerbert Xu 	ht->key_len = ht->p.key_len;
98131ccde2dSHerbert Xu 	if (!params->hashfn) {
98231ccde2dSHerbert Xu 		ht->p.hashfn = jhash;
98331ccde2dSHerbert Xu 
98431ccde2dSHerbert Xu 		if (!(ht->key_len & (sizeof(u32) - 1))) {
98531ccde2dSHerbert Xu 			ht->key_len /= sizeof(u32);
98631ccde2dSHerbert Xu 			ht->p.hashfn = rhashtable_jhash2;
98731ccde2dSHerbert Xu 		}
98831ccde2dSHerbert Xu 	}
98931ccde2dSHerbert Xu 
990b9ecfdaaSHerbert Xu 	tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
9917e1e7763SThomas Graf 	if (tbl == NULL)
9927e1e7763SThomas Graf 		return -ENOMEM;
9937e1e7763SThomas Graf 
994545a148eSYing Xue 	atomic_set(&ht->nelems, 0);
995a5b6846fSDaniel Borkmann 
9967e1e7763SThomas Graf 	RCU_INIT_POINTER(ht->tbl, tbl);
9977e1e7763SThomas Graf 
99857699a40SYing Xue 	INIT_WORK(&ht->run_work, rht_deferred_worker);
99997defe1eSThomas Graf 
10007e1e7763SThomas Graf 	return 0;
10017e1e7763SThomas Graf }
10027e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_init);
10037e1e7763SThomas Graf 
10047e1e7763SThomas Graf /**
1005ca26893fSHerbert Xu  * rhltable_init - initialize a new hash list table
1006ca26893fSHerbert Xu  * @hlt:	hash list table to be initialized
1007ca26893fSHerbert Xu  * @params:	configuration parameters
1008ca26893fSHerbert Xu  *
1009ca26893fSHerbert Xu  * Initializes a new hash list table.
1010ca26893fSHerbert Xu  *
1011ca26893fSHerbert Xu  * See documentation for rhashtable_init.
1012ca26893fSHerbert Xu  */
1013ca26893fSHerbert Xu int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
1014ca26893fSHerbert Xu {
1015ca26893fSHerbert Xu 	int err;
1016ca26893fSHerbert Xu 
1017ca26893fSHerbert Xu 	/* No rhlist NULLs marking for now. */
1018ca26893fSHerbert Xu 	if (params->nulls_base)
1019ca26893fSHerbert Xu 		return -EINVAL;
1020ca26893fSHerbert Xu 
1021ca26893fSHerbert Xu 	err = rhashtable_init(&hlt->ht, params);
1022ca26893fSHerbert Xu 	hlt->ht.rhlist = true;
1023ca26893fSHerbert Xu 	return err;
1024ca26893fSHerbert Xu }
1025ca26893fSHerbert Xu EXPORT_SYMBOL_GPL(rhltable_init);
1026ca26893fSHerbert Xu 
1027ca26893fSHerbert Xu static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1028ca26893fSHerbert Xu 				void (*free_fn)(void *ptr, void *arg),
1029ca26893fSHerbert Xu 				void *arg)
1030ca26893fSHerbert Xu {
1031ca26893fSHerbert Xu 	struct rhlist_head *list;
1032ca26893fSHerbert Xu 
1033ca26893fSHerbert Xu 	if (!ht->rhlist) {
1034ca26893fSHerbert Xu 		free_fn(rht_obj(ht, obj), arg);
1035ca26893fSHerbert Xu 		return;
1036ca26893fSHerbert Xu 	}
1037ca26893fSHerbert Xu 
1038ca26893fSHerbert Xu 	list = container_of(obj, struct rhlist_head, rhead);
1039ca26893fSHerbert Xu 	do {
1040ca26893fSHerbert Xu 		obj = &list->rhead;
1041ca26893fSHerbert Xu 		list = rht_dereference(list->next, ht);
1042ca26893fSHerbert Xu 		free_fn(rht_obj(ht, obj), arg);
1043ca26893fSHerbert Xu 	} while (list);
1044ca26893fSHerbert Xu }
1045ca26893fSHerbert Xu 
1046ca26893fSHerbert Xu /**
10476b6f302cSThomas Graf  * rhashtable_free_and_destroy - free elements and destroy hash table
10487e1e7763SThomas Graf  * @ht:		the hash table to destroy
10496b6f302cSThomas Graf  * @free_fn:	callback to release resources of element
10506b6f302cSThomas Graf  * @arg:	pointer passed to free_fn
10517e1e7763SThomas Graf  *
10526b6f302cSThomas Graf  * Stops an eventual async resize. If defined, invokes free_fn for each
10536b6f302cSThomas Graf  * element to releasal resources. Please note that RCU protected
10546b6f302cSThomas Graf  * readers may still be accessing the elements. Releasing of resources
10556b6f302cSThomas Graf  * must occur in a compatible manner. Then frees the bucket array.
10566b6f302cSThomas Graf  *
10576b6f302cSThomas Graf  * This function will eventually sleep to wait for an async resize
10586b6f302cSThomas Graf  * to complete. The caller is responsible that no further write operations
10596b6f302cSThomas Graf  * occurs in parallel.
10607e1e7763SThomas Graf  */
10616b6f302cSThomas Graf void rhashtable_free_and_destroy(struct rhashtable *ht,
10626b6f302cSThomas Graf 				 void (*free_fn)(void *ptr, void *arg),
10636b6f302cSThomas Graf 				 void *arg)
10647e1e7763SThomas Graf {
1065da20420fSHerbert Xu 	struct bucket_table *tbl;
10666b6f302cSThomas Graf 	unsigned int i;
106797defe1eSThomas Graf 
106857699a40SYing Xue 	cancel_work_sync(&ht->run_work);
106957699a40SYing Xue 
107097defe1eSThomas Graf 	mutex_lock(&ht->mutex);
10716b6f302cSThomas Graf 	tbl = rht_dereference(ht->tbl, ht);
10726b6f302cSThomas Graf 	if (free_fn) {
10736b6f302cSThomas Graf 		for (i = 0; i < tbl->size; i++) {
10746b6f302cSThomas Graf 			struct rhash_head *pos, *next;
10756b6f302cSThomas Graf 
1076da20420fSHerbert Xu 			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
10776b6f302cSThomas Graf 			     next = !rht_is_a_nulls(pos) ?
10786b6f302cSThomas Graf 					rht_dereference(pos->next, ht) : NULL;
10796b6f302cSThomas Graf 			     !rht_is_a_nulls(pos);
10806b6f302cSThomas Graf 			     pos = next,
10816b6f302cSThomas Graf 			     next = !rht_is_a_nulls(pos) ?
10826b6f302cSThomas Graf 					rht_dereference(pos->next, ht) : NULL)
1083ca26893fSHerbert Xu 				rhashtable_free_one(ht, pos, free_fn, arg);
10846b6f302cSThomas Graf 		}
10856b6f302cSThomas Graf 	}
10866b6f302cSThomas Graf 
10876b6f302cSThomas Graf 	bucket_table_free(tbl);
108897defe1eSThomas Graf 	mutex_unlock(&ht->mutex);
10897e1e7763SThomas Graf }
10906b6f302cSThomas Graf EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
10916b6f302cSThomas Graf 
10926b6f302cSThomas Graf void rhashtable_destroy(struct rhashtable *ht)
10936b6f302cSThomas Graf {
10946b6f302cSThomas Graf 	return rhashtable_free_and_destroy(ht, NULL, NULL);
10956b6f302cSThomas Graf }
10967e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_destroy);
1097da20420fSHerbert Xu 
1098da20420fSHerbert Xu struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
1099da20420fSHerbert Xu 					    unsigned int hash)
1100da20420fSHerbert Xu {
1101da20420fSHerbert Xu 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1102da20420fSHerbert Xu 	static struct rhash_head __rcu *rhnull =
1103da20420fSHerbert Xu 		(struct rhash_head __rcu *)NULLS_MARKER(0);
1104da20420fSHerbert Xu 	unsigned int index = hash & ((1 << tbl->nest) - 1);
1105da20420fSHerbert Xu 	unsigned int size = tbl->size >> tbl->nest;
1106da20420fSHerbert Xu 	unsigned int subhash = hash;
1107da20420fSHerbert Xu 	union nested_table *ntbl;
1108da20420fSHerbert Xu 
1109da20420fSHerbert Xu 	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1110c4d2603dSHerbert Xu 	ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1111da20420fSHerbert Xu 	subhash >>= tbl->nest;
1112da20420fSHerbert Xu 
1113da20420fSHerbert Xu 	while (ntbl && size > (1 << shift)) {
1114da20420fSHerbert Xu 		index = subhash & ((1 << shift) - 1);
1115c4d2603dSHerbert Xu 		ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1116c4d2603dSHerbert Xu 						  tbl, hash);
1117da20420fSHerbert Xu 		size >>= shift;
1118da20420fSHerbert Xu 		subhash >>= shift;
1119da20420fSHerbert Xu 	}
1120da20420fSHerbert Xu 
1121da20420fSHerbert Xu 	if (!ntbl)
1122da20420fSHerbert Xu 		return &rhnull;
1123da20420fSHerbert Xu 
1124da20420fSHerbert Xu 	return &ntbl[subhash].bucket;
1125da20420fSHerbert Xu 
1126da20420fSHerbert Xu }
1127da20420fSHerbert Xu EXPORT_SYMBOL_GPL(rht_bucket_nested);
1128da20420fSHerbert Xu 
1129da20420fSHerbert Xu struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
1130da20420fSHerbert Xu 						   struct bucket_table *tbl,
1131da20420fSHerbert Xu 						   unsigned int hash)
1132da20420fSHerbert Xu {
1133da20420fSHerbert Xu 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1134da20420fSHerbert Xu 	unsigned int index = hash & ((1 << tbl->nest) - 1);
1135da20420fSHerbert Xu 	unsigned int size = tbl->size >> tbl->nest;
1136da20420fSHerbert Xu 	union nested_table *ntbl;
1137da20420fSHerbert Xu 	unsigned int shifted;
1138da20420fSHerbert Xu 	unsigned int nhash;
1139da20420fSHerbert Xu 
1140da20420fSHerbert Xu 	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1141da20420fSHerbert Xu 	hash >>= tbl->nest;
1142da20420fSHerbert Xu 	nhash = index;
1143da20420fSHerbert Xu 	shifted = tbl->nest;
1144da20420fSHerbert Xu 	ntbl = nested_table_alloc(ht, &ntbl[index].table,
1145da20420fSHerbert Xu 				  size <= (1 << shift) ? shifted : 0, nhash);
1146da20420fSHerbert Xu 
1147da20420fSHerbert Xu 	while (ntbl && size > (1 << shift)) {
1148da20420fSHerbert Xu 		index = hash & ((1 << shift) - 1);
1149da20420fSHerbert Xu 		size >>= shift;
1150da20420fSHerbert Xu 		hash >>= shift;
1151da20420fSHerbert Xu 		nhash |= index << shifted;
1152da20420fSHerbert Xu 		shifted += shift;
1153da20420fSHerbert Xu 		ntbl = nested_table_alloc(ht, &ntbl[index].table,
1154da20420fSHerbert Xu 					  size <= (1 << shift) ? shifted : 0,
1155da20420fSHerbert Xu 					  nhash);
1156da20420fSHerbert Xu 	}
1157da20420fSHerbert Xu 
1158da20420fSHerbert Xu 	if (!ntbl)
1159da20420fSHerbert Xu 		return NULL;
1160da20420fSHerbert Xu 
1161da20420fSHerbert Xu 	return &ntbl[hash].bucket;
1162da20420fSHerbert Xu 
1163da20420fSHerbert Xu }
1164da20420fSHerbert Xu EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
1165