xref: /linux/include/linux/rhashtable.h (revision dd5b2498d845f925904cb2afabb6ba11bfc317c5)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Resizable, Scalable, Concurrent Hash Table
4  *
5  * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
6  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8  *
9  * Code partially derived from nft_hash
10  * Rewritten with rehash code from br_multicast plus single list
11  * pointer as suggested by Josh Triplett
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License version 2 as
15  * published by the Free Software Foundation.
16  */
17 
18 #ifndef _LINUX_RHASHTABLE_H
19 #define _LINUX_RHASHTABLE_H
20 
21 #include <linux/err.h>
22 #include <linux/errno.h>
23 #include <linux/jhash.h>
24 #include <linux/list_nulls.h>
25 #include <linux/workqueue.h>
26 #include <linux/rculist.h>
27 #include <linux/bit_spinlock.h>
28 
29 #include <linux/rhashtable-types.h>
30 /*
31  * Objects in an rhashtable have an embedded struct rhash_head
32  * which is linked into as hash chain from the hash table - or one
33  * of two or more hash tables when the rhashtable is being resized.
34  * The end of the chain is marked with a special nulls marks which has
35  * the least significant bit set but otherwise stores the address of
36  * the hash bucket.  This allows us to be be sure we've found the end
37  * of the right list.
38  * The value stored in the hash bucket has BIT(2) used as a lock bit.
39  * This bit must be atomically set before any changes are made to
40  * the chain.  To avoid dereferencing this pointer without clearing
41  * the bit first, we use an opaque 'struct rhash_lock_head *' for the
42  * pointer stored in the bucket.  This struct needs to be defined so
43  * that rcu_derefernce() works on it, but it has no content so a
44  * cast is needed for it to be useful.  This ensures it isn't
45  * used by mistake with clearing the lock bit first.
46  */
47 struct rhash_lock_head {};
48 
49 /* Maximum chain length before rehash
50  *
51  * The maximum (not average) chain length grows with the size of the hash
52  * table, at a rate of (log N)/(log log N).
53  *
54  * The value of 16 is selected so that even if the hash table grew to
55  * 2^32 you would not expect the maximum chain length to exceed it
56  * unless we are under attack (or extremely unlucky).
57  *
58  * As this limit is only to detect attacks, we don't need to set it to a
59  * lower value as you'd need the chain length to vastly exceed 16 to have
60  * any real effect on the system.
61  */
62 #define RHT_ELASTICITY	16u
63 
64 /**
65  * struct bucket_table - Table of hash buckets
66  * @size: Number of hash buckets
67  * @nest: Number of bits of first-level nested table.
68  * @rehash: Current bucket being rehashed
69  * @hash_rnd: Random seed to fold into hash
70  * @walkers: List of active walkers
71  * @rcu: RCU structure for freeing the table
72  * @future_tbl: Table under construction during rehashing
73  * @ntbl: Nested table used when out of memory.
74  * @buckets: size * hash buckets
75  */
76 struct bucket_table {
77 	unsigned int		size;
78 	unsigned int		nest;
79 	u32			hash_rnd;
80 	struct list_head	walkers;
81 	struct rcu_head		rcu;
82 
83 	struct bucket_table __rcu *future_tbl;
84 
85 	struct lockdep_map	dep_map;
86 
87 	struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
88 };
89 
90 /*
91  * We lock a bucket by setting BIT(1) in the pointer - this is always
92  * zero in real pointers and in the nulls marker.
93  * bit_spin_locks do not handle contention well, but the whole point
94  * of the hashtable design is to achieve minimum per-bucket contention.
95  * A nested hash table might not have a bucket pointer.  In that case
96  * we cannot get a lock.  For remove and replace the bucket cannot be
97  * interesting and doesn't need locking.
98  * For insert we allocate the bucket if this is the last bucket_table,
99  * and then take the lock.
100  * Sometimes we unlock a bucket by writing a new pointer there.  In that
101  * case we don't need to unlock, but we do need to reset state such as
102  * local_bh. For that we have rht_assign_unlock().  As rcu_assign_pointer()
103  * provides the same release semantics that bit_spin_unlock() provides,
104  * this is safe.
105  */
106 
107 static inline void rht_lock(struct bucket_table *tbl,
108 			    struct rhash_lock_head **bkt)
109 {
110 	local_bh_disable();
111 	bit_spin_lock(1, (unsigned long *)bkt);
112 	lock_map_acquire(&tbl->dep_map);
113 }
114 
115 static inline void rht_lock_nested(struct bucket_table *tbl,
116 				   struct rhash_lock_head **bucket,
117 				   unsigned int subclass)
118 {
119 	local_bh_disable();
120 	bit_spin_lock(1, (unsigned long *)bucket);
121 	lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
122 }
123 
124 static inline void rht_unlock(struct bucket_table *tbl,
125 			      struct rhash_lock_head **bkt)
126 {
127 	lock_map_release(&tbl->dep_map);
128 	bit_spin_unlock(1, (unsigned long *)bkt);
129 	local_bh_enable();
130 }
131 
132 static inline void rht_assign_unlock(struct bucket_table *tbl,
133 				     struct rhash_lock_head **bkt,
134 				     struct rhash_head *obj)
135 {
136 	struct rhash_head **p = (struct rhash_head **)bkt;
137 
138 	lock_map_release(&tbl->dep_map);
139 	rcu_assign_pointer(*p, obj);
140 	preempt_enable();
141 	__release(bitlock);
142 	local_bh_enable();
143 }
144 
145 /*
146  * If 'p' is a bucket head and might be locked:
147  *   rht_ptr() returns the address without the lock bit.
148  *   rht_ptr_locked() returns the address WITH the lock bit.
149  */
150 static inline struct rhash_head __rcu *rht_ptr(const struct rhash_lock_head *p)
151 {
152 	return (void *)(((unsigned long)p) & ~BIT(1));
153 }
154 
155 static inline struct rhash_lock_head __rcu *rht_ptr_locked(const
156 							   struct rhash_head *p)
157 {
158 	return (void *)(((unsigned long)p) | BIT(1));
159 }
160 
161 /*
162  * NULLS_MARKER() expects a hash value with the low
163  * bits mostly likely to be significant, and it discards
164  * the msb.
165  * We git it an address, in which the bottom 2 bits are
166  * always 0, and the msb might be significant.
167  * So we shift the address down one bit to align with
168  * expectations and avoid losing a significant bit.
169  */
170 #define	RHT_NULLS_MARKER(ptr)	\
171 	((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
172 #define INIT_RHT_NULLS_HEAD(ptr)	\
173 	((ptr) = RHT_NULLS_MARKER(&(ptr)))
174 
175 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
176 {
177 	return ((unsigned long) ptr & 1);
178 }
179 
180 static inline void *rht_obj(const struct rhashtable *ht,
181 			    const struct rhash_head *he)
182 {
183 	return (char *)he - ht->p.head_offset;
184 }
185 
186 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
187 					    unsigned int hash)
188 {
189 	return hash & (tbl->size - 1);
190 }
191 
192 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
193 	const void *key, const struct rhashtable_params params,
194 	unsigned int hash_rnd)
195 {
196 	unsigned int hash;
197 
198 	/* params must be equal to ht->p if it isn't constant. */
199 	if (!__builtin_constant_p(params.key_len))
200 		hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
201 	else if (params.key_len) {
202 		unsigned int key_len = params.key_len;
203 
204 		if (params.hashfn)
205 			hash = params.hashfn(key, key_len, hash_rnd);
206 		else if (key_len & (sizeof(u32) - 1))
207 			hash = jhash(key, key_len, hash_rnd);
208 		else
209 			hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
210 	} else {
211 		unsigned int key_len = ht->p.key_len;
212 
213 		if (params.hashfn)
214 			hash = params.hashfn(key, key_len, hash_rnd);
215 		else
216 			hash = jhash(key, key_len, hash_rnd);
217 	}
218 
219 	return hash;
220 }
221 
222 static inline unsigned int rht_key_hashfn(
223 	struct rhashtable *ht, const struct bucket_table *tbl,
224 	const void *key, const struct rhashtable_params params)
225 {
226 	unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
227 
228 	return rht_bucket_index(tbl, hash);
229 }
230 
231 static inline unsigned int rht_head_hashfn(
232 	struct rhashtable *ht, const struct bucket_table *tbl,
233 	const struct rhash_head *he, const struct rhashtable_params params)
234 {
235 	const char *ptr = rht_obj(ht, he);
236 
237 	return likely(params.obj_hashfn) ?
238 	       rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
239 							    ht->p.key_len,
240 						       tbl->hash_rnd)) :
241 	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
242 }
243 
244 /**
245  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
246  * @ht:		hash table
247  * @tbl:	current table
248  */
249 static inline bool rht_grow_above_75(const struct rhashtable *ht,
250 				     const struct bucket_table *tbl)
251 {
252 	/* Expand table when exceeding 75% load */
253 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
254 	       (!ht->p.max_size || tbl->size < ht->p.max_size);
255 }
256 
257 /**
258  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
259  * @ht:		hash table
260  * @tbl:	current table
261  */
262 static inline bool rht_shrink_below_30(const struct rhashtable *ht,
263 				       const struct bucket_table *tbl)
264 {
265 	/* Shrink table beneath 30% load */
266 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
267 	       tbl->size > ht->p.min_size;
268 }
269 
270 /**
271  * rht_grow_above_100 - returns true if nelems > table-size
272  * @ht:		hash table
273  * @tbl:	current table
274  */
275 static inline bool rht_grow_above_100(const struct rhashtable *ht,
276 				      const struct bucket_table *tbl)
277 {
278 	return atomic_read(&ht->nelems) > tbl->size &&
279 		(!ht->p.max_size || tbl->size < ht->p.max_size);
280 }
281 
282 /**
283  * rht_grow_above_max - returns true if table is above maximum
284  * @ht:		hash table
285  * @tbl:	current table
286  */
287 static inline bool rht_grow_above_max(const struct rhashtable *ht,
288 				      const struct bucket_table *tbl)
289 {
290 	return atomic_read(&ht->nelems) >= ht->max_elems;
291 }
292 
293 #ifdef CONFIG_PROVE_LOCKING
294 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
295 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
296 #else
297 static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
298 {
299 	return 1;
300 }
301 
302 static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
303 					     u32 hash)
304 {
305 	return 1;
306 }
307 #endif /* CONFIG_PROVE_LOCKING */
308 
309 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
310 			     struct rhash_head *obj);
311 
312 void rhashtable_walk_enter(struct rhashtable *ht,
313 			   struct rhashtable_iter *iter);
314 void rhashtable_walk_exit(struct rhashtable_iter *iter);
315 int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
316 
317 static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
318 {
319 	(void)rhashtable_walk_start_check(iter);
320 }
321 
322 void *rhashtable_walk_next(struct rhashtable_iter *iter);
323 void *rhashtable_walk_peek(struct rhashtable_iter *iter);
324 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
325 
326 void rhashtable_free_and_destroy(struct rhashtable *ht,
327 				 void (*free_fn)(void *ptr, void *arg),
328 				 void *arg);
329 void rhashtable_destroy(struct rhashtable *ht);
330 
331 struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
332 						 unsigned int hash);
333 struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
334 						   unsigned int hash);
335 struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
336 							struct bucket_table *tbl,
337 							unsigned int hash);
338 
339 #define rht_dereference(p, ht) \
340 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
341 
342 #define rht_dereference_rcu(p, ht) \
343 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
344 
345 #define rht_dereference_bucket(p, tbl, hash) \
346 	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
347 
348 #define rht_dereference_bucket_rcu(p, tbl, hash) \
349 	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
350 
351 #define rht_entry(tpos, pos, member) \
352 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
353 
354 static inline struct rhash_lock_head __rcu *const *rht_bucket(
355 	const struct bucket_table *tbl, unsigned int hash)
356 {
357 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
358 				     &tbl->buckets[hash];
359 }
360 
361 static inline struct rhash_lock_head __rcu **rht_bucket_var(
362 	struct bucket_table *tbl, unsigned int hash)
363 {
364 	return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
365 				     &tbl->buckets[hash];
366 }
367 
368 static inline struct rhash_lock_head __rcu **rht_bucket_insert(
369 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
370 {
371 	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
372 				     &tbl->buckets[hash];
373 }
374 
375 /**
376  * rht_for_each_from - iterate over hash chain from given head
377  * @pos:	the &struct rhash_head to use as a loop cursor.
378  * @head:	the &struct rhash_head to start from
379  * @tbl:	the &struct bucket_table
380  * @hash:	the hash value / bucket index
381  */
382 #define rht_for_each_from(pos, head, tbl, hash) \
383 	for (pos = rht_dereference_bucket(head, tbl, hash); \
384 	     !rht_is_a_nulls(pos); \
385 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
386 
387 /**
388  * rht_for_each - iterate over hash chain
389  * @pos:	the &struct rhash_head to use as a loop cursor.
390  * @tbl:	the &struct bucket_table
391  * @hash:	the hash value / bucket index
392  */
393 #define rht_for_each(pos, tbl, hash) \
394 	rht_for_each_from(pos, rht_ptr(*rht_bucket(tbl, hash)), tbl, hash)
395 
396 /**
397  * rht_for_each_entry_from - iterate over hash chain from given head
398  * @tpos:	the type * to use as a loop cursor.
399  * @pos:	the &struct rhash_head to use as a loop cursor.
400  * @head:	the &struct rhash_head to start from
401  * @tbl:	the &struct bucket_table
402  * @hash:	the hash value / bucket index
403  * @member:	name of the &struct rhash_head within the hashable struct.
404  */
405 #define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member)	\
406 	for (pos = rht_dereference_bucket(head, tbl, hash);		\
407 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
408 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
409 
410 /**
411  * rht_for_each_entry - iterate over hash chain of given type
412  * @tpos:	the type * to use as a loop cursor.
413  * @pos:	the &struct rhash_head to use as a loop cursor.
414  * @tbl:	the &struct bucket_table
415  * @hash:	the hash value / bucket index
416  * @member:	name of the &struct rhash_head within the hashable struct.
417  */
418 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
419 	rht_for_each_entry_from(tpos, pos, rht_ptr(*rht_bucket(tbl, hash)), \
420 				    tbl, hash, member)
421 
422 /**
423  * rht_for_each_entry_safe - safely iterate over hash chain of given type
424  * @tpos:	the type * to use as a loop cursor.
425  * @pos:	the &struct rhash_head to use as a loop cursor.
426  * @next:	the &struct rhash_head to use as next in loop cursor.
427  * @tbl:	the &struct bucket_table
428  * @hash:	the hash value / bucket index
429  * @member:	name of the &struct rhash_head within the hashable struct.
430  *
431  * This hash chain list-traversal primitive allows for the looped code to
432  * remove the loop cursor from the list.
433  */
434 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
435 	for (pos = rht_dereference_bucket(rht_ptr(*rht_bucket(tbl, hash)),    \
436 					  tbl, hash),			      \
437 	     next = !rht_is_a_nulls(pos) ?				      \
438 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
439 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
440 	     pos = next,						      \
441 	     next = !rht_is_a_nulls(pos) ?				      \
442 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
443 
444 /**
445  * rht_for_each_rcu_from - iterate over rcu hash chain from given head
446  * @pos:	the &struct rhash_head to use as a loop cursor.
447  * @head:	the &struct rhash_head to start from
448  * @tbl:	the &struct bucket_table
449  * @hash:	the hash value / bucket index
450  *
451  * This hash chain list-traversal primitive may safely run concurrently with
452  * the _rcu mutation primitives such as rhashtable_insert() as long as the
453  * traversal is guarded by rcu_read_lock().
454  */
455 #define rht_for_each_rcu_from(pos, head, tbl, hash)			\
456 	for (({barrier(); }),						\
457 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
458 	     !rht_is_a_nulls(pos);					\
459 	     pos = rcu_dereference_raw(pos->next))
460 
461 /**
462  * rht_for_each_rcu - iterate over rcu hash chain
463  * @pos:	the &struct rhash_head to use as a loop cursor.
464  * @tbl:	the &struct bucket_table
465  * @hash:	the hash value / bucket index
466  *
467  * This hash chain list-traversal primitive may safely run concurrently with
468  * the _rcu mutation primitives such as rhashtable_insert() as long as the
469  * traversal is guarded by rcu_read_lock().
470  */
471 #define rht_for_each_rcu(pos, tbl, hash)			\
472 	for (({barrier(); }),						\
473 	     pos = rht_ptr(rht_dereference_bucket_rcu(			\
474 				   *rht_bucket(tbl, hash), tbl, hash));	\
475 	     !rht_is_a_nulls(pos);					\
476 	     pos = rcu_dereference_raw(pos->next))
477 
478 /**
479  * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
480  * @tpos:	the type * to use as a loop cursor.
481  * @pos:	the &struct rhash_head to use as a loop cursor.
482  * @head:	the &struct rhash_head to start from
483  * @tbl:	the &struct bucket_table
484  * @hash:	the hash value / bucket index
485  * @member:	name of the &struct rhash_head within the hashable struct.
486  *
487  * This hash chain list-traversal primitive may safely run concurrently with
488  * the _rcu mutation primitives such as rhashtable_insert() as long as the
489  * traversal is guarded by rcu_read_lock().
490  */
491 #define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
492 	for (({barrier(); }),						    \
493 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
494 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
495 	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
496 
497 /**
498  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
499  * @tpos:	the type * to use as a loop cursor.
500  * @pos:	the &struct rhash_head to use as a loop cursor.
501  * @tbl:	the &struct bucket_table
502  * @hash:	the hash value / bucket index
503  * @member:	name of the &struct rhash_head within the hashable struct.
504  *
505  * This hash chain list-traversal primitive may safely run concurrently with
506  * the _rcu mutation primitives such as rhashtable_insert() as long as the
507  * traversal is guarded by rcu_read_lock().
508  */
509 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
510 	rht_for_each_entry_rcu_from(tpos, pos,				   \
511 					rht_ptr(*rht_bucket(tbl, hash)),   \
512 					tbl, hash, member)
513 
514 /**
515  * rhl_for_each_rcu - iterate over rcu hash table list
516  * @pos:	the &struct rlist_head to use as a loop cursor.
517  * @list:	the head of the list
518  *
519  * This hash chain list-traversal primitive should be used on the
520  * list returned by rhltable_lookup.
521  */
522 #define rhl_for_each_rcu(pos, list)					\
523 	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
524 
525 /**
526  * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
527  * @tpos:	the type * to use as a loop cursor.
528  * @pos:	the &struct rlist_head to use as a loop cursor.
529  * @list:	the head of the list
530  * @member:	name of the &struct rlist_head within the hashable struct.
531  *
532  * This hash chain list-traversal primitive should be used on the
533  * list returned by rhltable_lookup.
534  */
535 #define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
536 	for (pos = list; pos && rht_entry(tpos, pos, member);		\
537 	     pos = rcu_dereference_raw(pos->next))
538 
539 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
540 				     const void *obj)
541 {
542 	struct rhashtable *ht = arg->ht;
543 	const char *ptr = obj;
544 
545 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
546 }
547 
548 /* Internal function, do not use. */
549 static inline struct rhash_head *__rhashtable_lookup(
550 	struct rhashtable *ht, const void *key,
551 	const struct rhashtable_params params)
552 {
553 	struct rhashtable_compare_arg arg = {
554 		.ht = ht,
555 		.key = key,
556 	};
557 	struct rhash_lock_head __rcu * const *bkt;
558 	struct bucket_table *tbl;
559 	struct rhash_head *he;
560 	unsigned int hash;
561 
562 	tbl = rht_dereference_rcu(ht->tbl, ht);
563 restart:
564 	hash = rht_key_hashfn(ht, tbl, key, params);
565 	bkt = rht_bucket(tbl, hash);
566 	do {
567 		he = rht_ptr(rht_dereference_bucket_rcu(*bkt, tbl, hash));
568 		rht_for_each_rcu_from(he, he, tbl, hash) {
569 			if (params.obj_cmpfn ?
570 			    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
571 			    rhashtable_compare(&arg, rht_obj(ht, he)))
572 				continue;
573 			return he;
574 		}
575 		/* An object might have been moved to a different hash chain,
576 		 * while we walk along it - better check and retry.
577 		 */
578 	} while (he != RHT_NULLS_MARKER(bkt));
579 
580 	/* Ensure we see any new tables. */
581 	smp_rmb();
582 
583 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
584 	if (unlikely(tbl))
585 		goto restart;
586 
587 	return NULL;
588 }
589 
590 /**
591  * rhashtable_lookup - search hash table
592  * @ht:		hash table
593  * @key:	the pointer to the key
594  * @params:	hash table parameters
595  *
596  * Computes the hash value for the key and traverses the bucket chain looking
597  * for a entry with an identical key. The first matching entry is returned.
598  *
599  * This must only be called under the RCU read lock.
600  *
601  * Returns the first entry on which the compare function returned true.
602  */
603 static inline void *rhashtable_lookup(
604 	struct rhashtable *ht, const void *key,
605 	const struct rhashtable_params params)
606 {
607 	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
608 
609 	return he ? rht_obj(ht, he) : NULL;
610 }
611 
612 /**
613  * rhashtable_lookup_fast - search hash table, without RCU read lock
614  * @ht:		hash table
615  * @key:	the pointer to the key
616  * @params:	hash table parameters
617  *
618  * Computes the hash value for the key and traverses the bucket chain looking
619  * for a entry with an identical key. The first matching entry is returned.
620  *
621  * Only use this function when you have other mechanisms guaranteeing
622  * that the object won't go away after the RCU read lock is released.
623  *
624  * Returns the first entry on which the compare function returned true.
625  */
626 static inline void *rhashtable_lookup_fast(
627 	struct rhashtable *ht, const void *key,
628 	const struct rhashtable_params params)
629 {
630 	void *obj;
631 
632 	rcu_read_lock();
633 	obj = rhashtable_lookup(ht, key, params);
634 	rcu_read_unlock();
635 
636 	return obj;
637 }
638 
639 /**
640  * rhltable_lookup - search hash list table
641  * @hlt:	hash table
642  * @key:	the pointer to the key
643  * @params:	hash table parameters
644  *
645  * Computes the hash value for the key and traverses the bucket chain looking
646  * for a entry with an identical key.  All matching entries are returned
647  * in a list.
648  *
649  * This must only be called under the RCU read lock.
650  *
651  * Returns the list of entries that match the given key.
652  */
653 static inline struct rhlist_head *rhltable_lookup(
654 	struct rhltable *hlt, const void *key,
655 	const struct rhashtable_params params)
656 {
657 	struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
658 
659 	return he ? container_of(he, struct rhlist_head, rhead) : NULL;
660 }
661 
662 /* Internal function, please use rhashtable_insert_fast() instead. This
663  * function returns the existing element already in hashes in there is a clash,
664  * otherwise it returns an error via ERR_PTR().
665  */
666 static inline void *__rhashtable_insert_fast(
667 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
668 	const struct rhashtable_params params, bool rhlist)
669 {
670 	struct rhashtable_compare_arg arg = {
671 		.ht = ht,
672 		.key = key,
673 	};
674 	struct rhash_lock_head __rcu **bkt;
675 	struct rhash_head __rcu **pprev;
676 	struct bucket_table *tbl;
677 	struct rhash_head *head;
678 	unsigned int hash;
679 	int elasticity;
680 	void *data;
681 
682 	rcu_read_lock();
683 
684 	tbl = rht_dereference_rcu(ht->tbl, ht);
685 	hash = rht_head_hashfn(ht, tbl, obj, params);
686 	elasticity = RHT_ELASTICITY;
687 	bkt = rht_bucket_insert(ht, tbl, hash);
688 	data = ERR_PTR(-ENOMEM);
689 	if (!bkt)
690 		goto out;
691 	pprev = NULL;
692 	rht_lock(tbl, bkt);
693 
694 	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
695 slow_path:
696 		rht_unlock(tbl, bkt);
697 		rcu_read_unlock();
698 		return rhashtable_insert_slow(ht, key, obj);
699 	}
700 
701 	rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
702 		struct rhlist_head *plist;
703 		struct rhlist_head *list;
704 
705 		elasticity--;
706 		if (!key ||
707 		    (params.obj_cmpfn ?
708 		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
709 		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
710 			pprev = &head->next;
711 			continue;
712 		}
713 
714 		data = rht_obj(ht, head);
715 
716 		if (!rhlist)
717 			goto out_unlock;
718 
719 
720 		list = container_of(obj, struct rhlist_head, rhead);
721 		plist = container_of(head, struct rhlist_head, rhead);
722 
723 		RCU_INIT_POINTER(list->next, plist);
724 		head = rht_dereference_bucket(head->next, tbl, hash);
725 		RCU_INIT_POINTER(list->rhead.next, head);
726 		if (pprev) {
727 			rcu_assign_pointer(*pprev, obj);
728 			rht_unlock(tbl, bkt);
729 		} else
730 			rht_assign_unlock(tbl, bkt, obj);
731 		data = NULL;
732 		goto out;
733 	}
734 
735 	if (elasticity <= 0)
736 		goto slow_path;
737 
738 	data = ERR_PTR(-E2BIG);
739 	if (unlikely(rht_grow_above_max(ht, tbl)))
740 		goto out_unlock;
741 
742 	if (unlikely(rht_grow_above_100(ht, tbl)))
743 		goto slow_path;
744 
745 	/* Inserting at head of list makes unlocking free. */
746 	head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
747 
748 	RCU_INIT_POINTER(obj->next, head);
749 	if (rhlist) {
750 		struct rhlist_head *list;
751 
752 		list = container_of(obj, struct rhlist_head, rhead);
753 		RCU_INIT_POINTER(list->next, NULL);
754 	}
755 
756 	atomic_inc(&ht->nelems);
757 	rht_assign_unlock(tbl, bkt, obj);
758 
759 	if (rht_grow_above_75(ht, tbl))
760 		schedule_work(&ht->run_work);
761 
762 	data = NULL;
763 out:
764 	rcu_read_unlock();
765 
766 	return data;
767 
768 out_unlock:
769 	rht_unlock(tbl, bkt);
770 	goto out;
771 }
772 
773 /**
774  * rhashtable_insert_fast - insert object into hash table
775  * @ht:		hash table
776  * @obj:	pointer to hash head inside object
777  * @params:	hash table parameters
778  *
779  * Will take the per bucket bitlock to protect against mutual mutations
780  * on the same bucket. Multiple insertions may occur in parallel unless
781  * they map to the same bucket.
782  *
783  * It is safe to call this function from atomic context.
784  *
785  * Will trigger an automatic deferred table resizing if residency in the
786  * table grows beyond 70%.
787  */
788 static inline int rhashtable_insert_fast(
789 	struct rhashtable *ht, struct rhash_head *obj,
790 	const struct rhashtable_params params)
791 {
792 	void *ret;
793 
794 	ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
795 	if (IS_ERR(ret))
796 		return PTR_ERR(ret);
797 
798 	return ret == NULL ? 0 : -EEXIST;
799 }
800 
801 /**
802  * rhltable_insert_key - insert object into hash list table
803  * @hlt:	hash list table
804  * @key:	the pointer to the key
805  * @list:	pointer to hash list head inside object
806  * @params:	hash table parameters
807  *
808  * Will take the per bucket bitlock to protect against mutual mutations
809  * on the same bucket. Multiple insertions may occur in parallel unless
810  * they map to the same bucket.
811  *
812  * It is safe to call this function from atomic context.
813  *
814  * Will trigger an automatic deferred table resizing if residency in the
815  * table grows beyond 70%.
816  */
817 static inline int rhltable_insert_key(
818 	struct rhltable *hlt, const void *key, struct rhlist_head *list,
819 	const struct rhashtable_params params)
820 {
821 	return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
822 						params, true));
823 }
824 
825 /**
826  * rhltable_insert - insert object into hash list table
827  * @hlt:	hash list table
828  * @list:	pointer to hash list head inside object
829  * @params:	hash table parameters
830  *
831  * Will take the per bucket bitlock to protect against mutual mutations
832  * on the same bucket. Multiple insertions may occur in parallel unless
833  * they map to the same bucket.
834  *
835  * It is safe to call this function from atomic context.
836  *
837  * Will trigger an automatic deferred table resizing if residency in the
838  * table grows beyond 70%.
839  */
840 static inline int rhltable_insert(
841 	struct rhltable *hlt, struct rhlist_head *list,
842 	const struct rhashtable_params params)
843 {
844 	const char *key = rht_obj(&hlt->ht, &list->rhead);
845 
846 	key += params.key_offset;
847 
848 	return rhltable_insert_key(hlt, key, list, params);
849 }
850 
851 /**
852  * rhashtable_lookup_insert_fast - lookup and insert object into hash table
853  * @ht:		hash table
854  * @obj:	pointer to hash head inside object
855  * @params:	hash table parameters
856  *
857  * This lookup function may only be used for fixed key hash table (key_len
858  * parameter set). It will BUG() if used inappropriately.
859  *
860  * It is safe to call this function from atomic context.
861  *
862  * Will trigger an automatic deferred table resizing if residency in the
863  * table grows beyond 70%.
864  */
865 static inline int rhashtable_lookup_insert_fast(
866 	struct rhashtable *ht, struct rhash_head *obj,
867 	const struct rhashtable_params params)
868 {
869 	const char *key = rht_obj(ht, obj);
870 	void *ret;
871 
872 	BUG_ON(ht->p.obj_hashfn);
873 
874 	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
875 				       false);
876 	if (IS_ERR(ret))
877 		return PTR_ERR(ret);
878 
879 	return ret == NULL ? 0 : -EEXIST;
880 }
881 
882 /**
883  * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
884  * @ht:		hash table
885  * @obj:	pointer to hash head inside object
886  * @params:	hash table parameters
887  *
888  * Just like rhashtable_lookup_insert_fast(), but this function returns the
889  * object if it exists, NULL if it did not and the insertion was successful,
890  * and an ERR_PTR otherwise.
891  */
892 static inline void *rhashtable_lookup_get_insert_fast(
893 	struct rhashtable *ht, struct rhash_head *obj,
894 	const struct rhashtable_params params)
895 {
896 	const char *key = rht_obj(ht, obj);
897 
898 	BUG_ON(ht->p.obj_hashfn);
899 
900 	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
901 					false);
902 }
903 
904 /**
905  * rhashtable_lookup_insert_key - search and insert object to hash table
906  *				  with explicit key
907  * @ht:		hash table
908  * @key:	key
909  * @obj:	pointer to hash head inside object
910  * @params:	hash table parameters
911  *
912  * Lookups may occur in parallel with hashtable mutations and resizing.
913  *
914  * Will trigger an automatic deferred table resizing if residency in the
915  * table grows beyond 70%.
916  *
917  * Returns zero on success.
918  */
919 static inline int rhashtable_lookup_insert_key(
920 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
921 	const struct rhashtable_params params)
922 {
923 	void *ret;
924 
925 	BUG_ON(!ht->p.obj_hashfn || !key);
926 
927 	ret = __rhashtable_insert_fast(ht, key, obj, params, false);
928 	if (IS_ERR(ret))
929 		return PTR_ERR(ret);
930 
931 	return ret == NULL ? 0 : -EEXIST;
932 }
933 
934 /**
935  * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
936  * @ht:		hash table
937  * @obj:	pointer to hash head inside object
938  * @params:	hash table parameters
939  * @data:	pointer to element data already in hashes
940  *
941  * Just like rhashtable_lookup_insert_key(), but this function returns the
942  * object if it exists, NULL if it does not and the insertion was successful,
943  * and an ERR_PTR otherwise.
944  */
945 static inline void *rhashtable_lookup_get_insert_key(
946 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
947 	const struct rhashtable_params params)
948 {
949 	BUG_ON(!ht->p.obj_hashfn || !key);
950 
951 	return __rhashtable_insert_fast(ht, key, obj, params, false);
952 }
953 
954 /* Internal function, please use rhashtable_remove_fast() instead */
955 static inline int __rhashtable_remove_fast_one(
956 	struct rhashtable *ht, struct bucket_table *tbl,
957 	struct rhash_head *obj, const struct rhashtable_params params,
958 	bool rhlist)
959 {
960 	struct rhash_lock_head __rcu **bkt;
961 	struct rhash_head __rcu **pprev;
962 	struct rhash_head *he;
963 	unsigned int hash;
964 	int err = -ENOENT;
965 
966 	hash = rht_head_hashfn(ht, tbl, obj, params);
967 	bkt = rht_bucket_var(tbl, hash);
968 	if (!bkt)
969 		return -ENOENT;
970 	pprev = NULL;
971 	rht_lock(tbl, bkt);
972 
973 	rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
974 		struct rhlist_head *list;
975 
976 		list = container_of(he, struct rhlist_head, rhead);
977 
978 		if (he != obj) {
979 			struct rhlist_head __rcu **lpprev;
980 
981 			pprev = &he->next;
982 
983 			if (!rhlist)
984 				continue;
985 
986 			do {
987 				lpprev = &list->next;
988 				list = rht_dereference_bucket(list->next,
989 							      tbl, hash);
990 			} while (list && obj != &list->rhead);
991 
992 			if (!list)
993 				continue;
994 
995 			list = rht_dereference_bucket(list->next, tbl, hash);
996 			RCU_INIT_POINTER(*lpprev, list);
997 			err = 0;
998 			break;
999 		}
1000 
1001 		obj = rht_dereference_bucket(obj->next, tbl, hash);
1002 		err = 1;
1003 
1004 		if (rhlist) {
1005 			list = rht_dereference_bucket(list->next, tbl, hash);
1006 			if (list) {
1007 				RCU_INIT_POINTER(list->rhead.next, obj);
1008 				obj = &list->rhead;
1009 				err = 0;
1010 			}
1011 		}
1012 
1013 		if (pprev) {
1014 			rcu_assign_pointer(*pprev, obj);
1015 			rht_unlock(tbl, bkt);
1016 		} else {
1017 			rht_assign_unlock(tbl, bkt, obj);
1018 		}
1019 		goto unlocked;
1020 	}
1021 
1022 	rht_unlock(tbl, bkt);
1023 unlocked:
1024 	if (err > 0) {
1025 		atomic_dec(&ht->nelems);
1026 		if (unlikely(ht->p.automatic_shrinking &&
1027 			     rht_shrink_below_30(ht, tbl)))
1028 			schedule_work(&ht->run_work);
1029 		err = 0;
1030 	}
1031 
1032 	return err;
1033 }
1034 
1035 /* Internal function, please use rhashtable_remove_fast() instead */
1036 static inline int __rhashtable_remove_fast(
1037 	struct rhashtable *ht, struct rhash_head *obj,
1038 	const struct rhashtable_params params, bool rhlist)
1039 {
1040 	struct bucket_table *tbl;
1041 	int err;
1042 
1043 	rcu_read_lock();
1044 
1045 	tbl = rht_dereference_rcu(ht->tbl, ht);
1046 
1047 	/* Because we have already taken (and released) the bucket
1048 	 * lock in old_tbl, if we find that future_tbl is not yet
1049 	 * visible then that guarantees the entry to still be in
1050 	 * the old tbl if it exists.
1051 	 */
1052 	while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
1053 						   rhlist)) &&
1054 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1055 		;
1056 
1057 	rcu_read_unlock();
1058 
1059 	return err;
1060 }
1061 
1062 /**
1063  * rhashtable_remove_fast - remove object from hash table
1064  * @ht:		hash table
1065  * @obj:	pointer to hash head inside object
1066  * @params:	hash table parameters
1067  *
1068  * Since the hash chain is single linked, the removal operation needs to
1069  * walk the bucket chain upon removal. The removal operation is thus
1070  * considerable slow if the hash table is not correctly sized.
1071  *
1072  * Will automatically shrink the table if permitted when residency drops
1073  * below 30%.
1074  *
1075  * Returns zero on success, -ENOENT if the entry could not be found.
1076  */
1077 static inline int rhashtable_remove_fast(
1078 	struct rhashtable *ht, struct rhash_head *obj,
1079 	const struct rhashtable_params params)
1080 {
1081 	return __rhashtable_remove_fast(ht, obj, params, false);
1082 }
1083 
1084 /**
1085  * rhltable_remove - remove object from hash list table
1086  * @hlt:	hash list table
1087  * @list:	pointer to hash list head inside object
1088  * @params:	hash table parameters
1089  *
1090  * Since the hash chain is single linked, the removal operation needs to
1091  * walk the bucket chain upon removal. The removal operation is thus
1092  * considerable slow if the hash table is not correctly sized.
1093  *
1094  * Will automatically shrink the table if permitted when residency drops
1095  * below 30%
1096  *
1097  * Returns zero on success, -ENOENT if the entry could not be found.
1098  */
1099 static inline int rhltable_remove(
1100 	struct rhltable *hlt, struct rhlist_head *list,
1101 	const struct rhashtable_params params)
1102 {
1103 	return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
1104 }
1105 
1106 /* Internal function, please use rhashtable_replace_fast() instead */
1107 static inline int __rhashtable_replace_fast(
1108 	struct rhashtable *ht, struct bucket_table *tbl,
1109 	struct rhash_head *obj_old, struct rhash_head *obj_new,
1110 	const struct rhashtable_params params)
1111 {
1112 	struct rhash_lock_head __rcu **bkt;
1113 	struct rhash_head __rcu **pprev;
1114 	struct rhash_head *he;
1115 	unsigned int hash;
1116 	int err = -ENOENT;
1117 
1118 	/* Minimally, the old and new objects must have same hash
1119 	 * (which should mean identifiers are the same).
1120 	 */
1121 	hash = rht_head_hashfn(ht, tbl, obj_old, params);
1122 	if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
1123 		return -EINVAL;
1124 
1125 	bkt = rht_bucket_var(tbl, hash);
1126 	if (!bkt)
1127 		return -ENOENT;
1128 
1129 	pprev = NULL;
1130 	rht_lock(tbl, bkt);
1131 
1132 	rht_for_each_from(he, rht_ptr(*bkt), tbl, hash) {
1133 		if (he != obj_old) {
1134 			pprev = &he->next;
1135 			continue;
1136 		}
1137 
1138 		rcu_assign_pointer(obj_new->next, obj_old->next);
1139 		if (pprev) {
1140 			rcu_assign_pointer(*pprev, obj_new);
1141 			rht_unlock(tbl, bkt);
1142 		} else {
1143 			rht_assign_unlock(tbl, bkt, obj_new);
1144 		}
1145 		err = 0;
1146 		goto unlocked;
1147 	}
1148 
1149 	rht_unlock(tbl, bkt);
1150 
1151 unlocked:
1152 	return err;
1153 }
1154 
1155 /**
1156  * rhashtable_replace_fast - replace an object in hash table
1157  * @ht:		hash table
1158  * @obj_old:	pointer to hash head inside object being replaced
1159  * @obj_new:	pointer to hash head inside object which is new
1160  * @params:	hash table parameters
1161  *
1162  * Replacing an object doesn't affect the number of elements in the hash table
1163  * or bucket, so we don't need to worry about shrinking or expanding the
1164  * table here.
1165  *
1166  * Returns zero on success, -ENOENT if the entry could not be found,
1167  * -EINVAL if hash is not the same for the old and new objects.
1168  */
1169 static inline int rhashtable_replace_fast(
1170 	struct rhashtable *ht, struct rhash_head *obj_old,
1171 	struct rhash_head *obj_new,
1172 	const struct rhashtable_params params)
1173 {
1174 	struct bucket_table *tbl;
1175 	int err;
1176 
1177 	rcu_read_lock();
1178 
1179 	tbl = rht_dereference_rcu(ht->tbl, ht);
1180 
1181 	/* Because we have already taken (and released) the bucket
1182 	 * lock in old_tbl, if we find that future_tbl is not yet
1183 	 * visible then that guarantees the entry to still be in
1184 	 * the old tbl if it exists.
1185 	 */
1186 	while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
1187 						obj_new, params)) &&
1188 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1189 		;
1190 
1191 	rcu_read_unlock();
1192 
1193 	return err;
1194 }
1195 
1196 /**
1197  * rhltable_walk_enter - Initialise an iterator
1198  * @hlt:	Table to walk over
1199  * @iter:	Hash table Iterator
1200  *
1201  * This function prepares a hash table walk.
1202  *
1203  * Note that if you restart a walk after rhashtable_walk_stop you
1204  * may see the same object twice.  Also, you may miss objects if
1205  * there are removals in between rhashtable_walk_stop and the next
1206  * call to rhashtable_walk_start.
1207  *
1208  * For a completely stable walk you should construct your own data
1209  * structure outside the hash table.
1210  *
1211  * This function may be called from any process context, including
1212  * non-preemptable context, but cannot be called from softirq or
1213  * hardirq context.
1214  *
1215  * You must call rhashtable_walk_exit after this function returns.
1216  */
1217 static inline void rhltable_walk_enter(struct rhltable *hlt,
1218 				       struct rhashtable_iter *iter)
1219 {
1220 	return rhashtable_walk_enter(&hlt->ht, iter);
1221 }
1222 
1223 /**
1224  * rhltable_free_and_destroy - free elements and destroy hash list table
1225  * @hlt:	the hash list table to destroy
1226  * @free_fn:	callback to release resources of element
1227  * @arg:	pointer passed to free_fn
1228  *
1229  * See documentation for rhashtable_free_and_destroy.
1230  */
1231 static inline void rhltable_free_and_destroy(struct rhltable *hlt,
1232 					     void (*free_fn)(void *ptr,
1233 							     void *arg),
1234 					     void *arg)
1235 {
1236 	return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
1237 }
1238 
1239 static inline void rhltable_destroy(struct rhltable *hlt)
1240 {
1241 	return rhltable_free_and_destroy(hlt, NULL, NULL);
1242 }
1243 
1244 #endif /* _LINUX_RHASHTABLE_H */
1245