xref: /linux/include/linux/rhashtable.h (revision dd9a41bc61cc62d38306465ed62373b98df0049e)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Resizable, Scalable, Concurrent Hash Table
4  *
5  * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
6  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8  *
9  * Code partially derived from nft_hash
10  * Rewritten with rehash code from br_multicast plus single list
11  * pointer as suggested by Josh Triplett
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License version 2 as
15  * published by the Free Software Foundation.
16  */
17 
18 #ifndef _LINUX_RHASHTABLE_H
19 #define _LINUX_RHASHTABLE_H
20 
21 #include <linux/err.h>
22 #include <linux/errno.h>
23 #include <linux/jhash.h>
24 #include <linux/list_nulls.h>
25 #include <linux/workqueue.h>
26 #include <linux/rculist.h>
27 #include <linux/bit_spinlock.h>
28 
29 #include <linux/rhashtable-types.h>
30 /*
31  * Objects in an rhashtable have an embedded struct rhash_head
32  * which is linked into as hash chain from the hash table - or one
33  * of two or more hash tables when the rhashtable is being resized.
34  * The end of the chain is marked with a special nulls marks which has
35  * the least significant bit set but otherwise stores the address of
36  * the hash bucket.  This allows us to be be sure we've found the end
37  * of the right list.
38  * The value stored in the hash bucket has BIT(0) used as a lock bit.
39  * This bit must be atomically set before any changes are made to
40  * the chain.  To avoid dereferencing this pointer without clearing
41  * the bit first, we use an opaque 'struct rhash_lock_head *' for the
42  * pointer stored in the bucket.  This struct needs to be defined so
43  * that rcu_dereference() works on it, but it has no content so a
44  * cast is needed for it to be useful.  This ensures it isn't
45  * used by mistake with clearing the lock bit first.
46  */
47 struct rhash_lock_head {};
48 
49 /* Maximum chain length before rehash
50  *
51  * The maximum (not average) chain length grows with the size of the hash
52  * table, at a rate of (log N)/(log log N).
53  *
54  * The value of 16 is selected so that even if the hash table grew to
55  * 2^32 you would not expect the maximum chain length to exceed it
56  * unless we are under attack (or extremely unlucky).
57  *
58  * As this limit is only to detect attacks, we don't need to set it to a
59  * lower value as you'd need the chain length to vastly exceed 16 to have
60  * any real effect on the system.
61  */
62 #define RHT_ELASTICITY	16u
63 
64 /**
65  * struct bucket_table - Table of hash buckets
66  * @size: Number of hash buckets
67  * @nest: Number of bits of first-level nested table.
68  * @rehash: Current bucket being rehashed
69  * @hash_rnd: Random seed to fold into hash
70  * @walkers: List of active walkers
71  * @rcu: RCU structure for freeing the table
72  * @future_tbl: Table under construction during rehashing
73  * @ntbl: Nested table used when out of memory.
74  * @buckets: size * hash buckets
75  */
76 struct bucket_table {
77 	unsigned int		size;
78 	unsigned int		nest;
79 	u32			hash_rnd;
80 	struct list_head	walkers;
81 	struct rcu_head		rcu;
82 
83 	struct bucket_table __rcu *future_tbl;
84 
85 	struct lockdep_map	dep_map;
86 
87 	struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp;
88 };
89 
90 /*
91  * NULLS_MARKER() expects a hash value with the low
92  * bits mostly likely to be significant, and it discards
93  * the msb.
94  * We give it an address, in which the bottom bit is
95  * always 0, and the msb might be significant.
96  * So we shift the address down one bit to align with
97  * expectations and avoid losing a significant bit.
98  *
99  * We never store the NULLS_MARKER in the hash table
100  * itself as we need the lsb for locking.
101  * Instead we store a NULL
102  */
103 #define	RHT_NULLS_MARKER(ptr)	\
104 	((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
105 #define INIT_RHT_NULLS_HEAD(ptr)	\
106 	((ptr) = NULL)
107 
108 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
109 {
110 	return ((unsigned long) ptr & 1);
111 }
112 
113 static inline void *rht_obj(const struct rhashtable *ht,
114 			    const struct rhash_head *he)
115 {
116 	return (char *)he - ht->p.head_offset;
117 }
118 
119 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
120 					    unsigned int hash)
121 {
122 	return hash & (tbl->size - 1);
123 }
124 
125 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
126 	const void *key, const struct rhashtable_params params,
127 	unsigned int hash_rnd)
128 {
129 	unsigned int hash;
130 
131 	/* params must be equal to ht->p if it isn't constant. */
132 	if (!__builtin_constant_p(params.key_len))
133 		hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
134 	else if (params.key_len) {
135 		unsigned int key_len = params.key_len;
136 
137 		if (params.hashfn)
138 			hash = params.hashfn(key, key_len, hash_rnd);
139 		else if (key_len & (sizeof(u32) - 1))
140 			hash = jhash(key, key_len, hash_rnd);
141 		else
142 			hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
143 	} else {
144 		unsigned int key_len = ht->p.key_len;
145 
146 		if (params.hashfn)
147 			hash = params.hashfn(key, key_len, hash_rnd);
148 		else
149 			hash = jhash(key, key_len, hash_rnd);
150 	}
151 
152 	return hash;
153 }
154 
155 static inline unsigned int rht_key_hashfn(
156 	struct rhashtable *ht, const struct bucket_table *tbl,
157 	const void *key, const struct rhashtable_params params)
158 {
159 	unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
160 
161 	return rht_bucket_index(tbl, hash);
162 }
163 
164 static inline unsigned int rht_head_hashfn(
165 	struct rhashtable *ht, const struct bucket_table *tbl,
166 	const struct rhash_head *he, const struct rhashtable_params params)
167 {
168 	const char *ptr = rht_obj(ht, he);
169 
170 	return likely(params.obj_hashfn) ?
171 	       rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
172 							    ht->p.key_len,
173 						       tbl->hash_rnd)) :
174 	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
175 }
176 
177 /**
178  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
179  * @ht:		hash table
180  * @tbl:	current table
181  */
182 static inline bool rht_grow_above_75(const struct rhashtable *ht,
183 				     const struct bucket_table *tbl)
184 {
185 	/* Expand table when exceeding 75% load */
186 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
187 	       (!ht->p.max_size || tbl->size < ht->p.max_size);
188 }
189 
190 /**
191  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
192  * @ht:		hash table
193  * @tbl:	current table
194  */
195 static inline bool rht_shrink_below_30(const struct rhashtable *ht,
196 				       const struct bucket_table *tbl)
197 {
198 	/* Shrink table beneath 30% load */
199 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
200 	       tbl->size > ht->p.min_size;
201 }
202 
203 /**
204  * rht_grow_above_100 - returns true if nelems > table-size
205  * @ht:		hash table
206  * @tbl:	current table
207  */
208 static inline bool rht_grow_above_100(const struct rhashtable *ht,
209 				      const struct bucket_table *tbl)
210 {
211 	return atomic_read(&ht->nelems) > tbl->size &&
212 		(!ht->p.max_size || tbl->size < ht->p.max_size);
213 }
214 
215 /**
216  * rht_grow_above_max - returns true if table is above maximum
217  * @ht:		hash table
218  * @tbl:	current table
219  */
220 static inline bool rht_grow_above_max(const struct rhashtable *ht,
221 				      const struct bucket_table *tbl)
222 {
223 	return atomic_read(&ht->nelems) >= ht->max_elems;
224 }
225 
226 #ifdef CONFIG_PROVE_LOCKING
227 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
228 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
229 #else
230 static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
231 {
232 	return 1;
233 }
234 
235 static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
236 					     u32 hash)
237 {
238 	return 1;
239 }
240 #endif /* CONFIG_PROVE_LOCKING */
241 
242 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
243 			     struct rhash_head *obj);
244 
245 void rhashtable_walk_enter(struct rhashtable *ht,
246 			   struct rhashtable_iter *iter);
247 void rhashtable_walk_exit(struct rhashtable_iter *iter);
248 int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
249 
250 static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
251 {
252 	(void)rhashtable_walk_start_check(iter);
253 }
254 
255 void *rhashtable_walk_next(struct rhashtable_iter *iter);
256 void *rhashtable_walk_peek(struct rhashtable_iter *iter);
257 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
258 
259 void rhashtable_free_and_destroy(struct rhashtable *ht,
260 				 void (*free_fn)(void *ptr, void *arg),
261 				 void *arg);
262 void rhashtable_destroy(struct rhashtable *ht);
263 
264 struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl,
265 					   unsigned int hash);
266 struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl,
267 					     unsigned int hash);
268 struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht,
269 						  struct bucket_table *tbl,
270 						  unsigned int hash);
271 
272 #define rht_dereference(p, ht) \
273 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
274 
275 #define rht_dereference_rcu(p, ht) \
276 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
277 
278 #define rht_dereference_bucket(p, tbl, hash) \
279 	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
280 
281 #define rht_dereference_bucket_rcu(p, tbl, hash) \
282 	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
283 
284 #define rht_entry(tpos, pos, member) \
285 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
286 
287 static inline struct rhash_lock_head *const *rht_bucket(
288 	const struct bucket_table *tbl, unsigned int hash)
289 {
290 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
291 				     &tbl->buckets[hash];
292 }
293 
294 static inline struct rhash_lock_head **rht_bucket_var(
295 	struct bucket_table *tbl, unsigned int hash)
296 {
297 	return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
298 				     &tbl->buckets[hash];
299 }
300 
301 static inline struct rhash_lock_head **rht_bucket_insert(
302 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
303 {
304 	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
305 				     &tbl->buckets[hash];
306 }
307 
308 /*
309  * We lock a bucket by setting BIT(0) in the pointer - this is always
310  * zero in real pointers.  The NULLS mark is never stored in the bucket,
311  * rather we store NULL if the bucket is empty.
312  * bit_spin_locks do not handle contention well, but the whole point
313  * of the hashtable design is to achieve minimum per-bucket contention.
314  * A nested hash table might not have a bucket pointer.  In that case
315  * we cannot get a lock.  For remove and replace the bucket cannot be
316  * interesting and doesn't need locking.
317  * For insert we allocate the bucket if this is the last bucket_table,
318  * and then take the lock.
319  * Sometimes we unlock a bucket by writing a new pointer there.  In that
320  * case we don't need to unlock, but we do need to reset state such as
321  * local_bh. For that we have rht_assign_unlock().  As rcu_assign_pointer()
322  * provides the same release semantics that bit_spin_unlock() provides,
323  * this is safe.
324  * When we write to a bucket without unlocking, we use rht_assign_locked().
325  */
326 
327 static inline void rht_lock(struct bucket_table *tbl,
328 			    struct rhash_lock_head **bkt)
329 {
330 	local_bh_disable();
331 	bit_spin_lock(0, (unsigned long *)bkt);
332 	lock_map_acquire(&tbl->dep_map);
333 }
334 
335 static inline void rht_lock_nested(struct bucket_table *tbl,
336 				   struct rhash_lock_head **bucket,
337 				   unsigned int subclass)
338 {
339 	local_bh_disable();
340 	bit_spin_lock(0, (unsigned long *)bucket);
341 	lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
342 }
343 
344 static inline void rht_unlock(struct bucket_table *tbl,
345 			      struct rhash_lock_head **bkt)
346 {
347 	lock_map_release(&tbl->dep_map);
348 	bit_spin_unlock(0, (unsigned long *)bkt);
349 	local_bh_enable();
350 }
351 
352 static inline struct rhash_head __rcu *__rht_ptr(
353 	struct rhash_lock_head *const *bkt)
354 {
355 	return (struct rhash_head __rcu *)
356 		((unsigned long)*bkt & ~BIT(0) ?:
357 		 (unsigned long)RHT_NULLS_MARKER(bkt));
358 }
359 
360 /*
361  * Where 'bkt' is a bucket and might be locked:
362  *   rht_ptr_rcu() dereferences that pointer and clears the lock bit.
363  *   rht_ptr() dereferences in a context where the bucket is locked.
364  *   rht_ptr_exclusive() dereferences in a context where exclusive
365  *            access is guaranteed, such as when destroying the table.
366  */
367 static inline struct rhash_head *rht_ptr_rcu(
368 	struct rhash_lock_head *const *bkt)
369 {
370 	struct rhash_head __rcu *p = __rht_ptr(bkt);
371 
372 	return rcu_dereference(p);
373 }
374 
375 static inline struct rhash_head *rht_ptr(
376 	struct rhash_lock_head *const *bkt,
377 	struct bucket_table *tbl,
378 	unsigned int hash)
379 {
380 	return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash);
381 }
382 
383 static inline struct rhash_head *rht_ptr_exclusive(
384 	struct rhash_lock_head *const *bkt)
385 {
386 	return rcu_dereference_protected(__rht_ptr(bkt), 1);
387 }
388 
389 static inline void rht_assign_locked(struct rhash_lock_head **bkt,
390 				     struct rhash_head *obj)
391 {
392 	struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
393 
394 	if (rht_is_a_nulls(obj))
395 		obj = NULL;
396 	rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0)));
397 }
398 
399 static inline void rht_assign_unlock(struct bucket_table *tbl,
400 				     struct rhash_lock_head **bkt,
401 				     struct rhash_head *obj)
402 {
403 	struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt;
404 
405 	if (rht_is_a_nulls(obj))
406 		obj = NULL;
407 	lock_map_release(&tbl->dep_map);
408 	rcu_assign_pointer(*p, obj);
409 	preempt_enable();
410 	__release(bitlock);
411 	local_bh_enable();
412 }
413 
414 /**
415  * rht_for_each_from - iterate over hash chain from given head
416  * @pos:	the &struct rhash_head to use as a loop cursor.
417  * @head:	the &struct rhash_head to start from
418  * @tbl:	the &struct bucket_table
419  * @hash:	the hash value / bucket index
420  */
421 #define rht_for_each_from(pos, head, tbl, hash) \
422 	for (pos = head;			\
423 	     !rht_is_a_nulls(pos);		\
424 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
425 
426 /**
427  * rht_for_each - iterate over hash chain
428  * @pos:	the &struct rhash_head to use as a loop cursor.
429  * @tbl:	the &struct bucket_table
430  * @hash:	the hash value / bucket index
431  */
432 #define rht_for_each(pos, tbl, hash) \
433 	rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash),  \
434 			  tbl, hash)
435 
436 /**
437  * rht_for_each_entry_from - iterate over hash chain from given head
438  * @tpos:	the type * to use as a loop cursor.
439  * @pos:	the &struct rhash_head to use as a loop cursor.
440  * @head:	the &struct rhash_head to start from
441  * @tbl:	the &struct bucket_table
442  * @hash:	the hash value / bucket index
443  * @member:	name of the &struct rhash_head within the hashable struct.
444  */
445 #define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member)	\
446 	for (pos = head;						\
447 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
448 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
449 
450 /**
451  * rht_for_each_entry - iterate over hash chain of given type
452  * @tpos:	the type * to use as a loop cursor.
453  * @pos:	the &struct rhash_head to use as a loop cursor.
454  * @tbl:	the &struct bucket_table
455  * @hash:	the hash value / bucket index
456  * @member:	name of the &struct rhash_head within the hashable struct.
457  */
458 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
459 	rht_for_each_entry_from(tpos, pos,				\
460 				rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
461 				tbl, hash, member)
462 
463 /**
464  * rht_for_each_entry_safe - safely iterate over hash chain of given type
465  * @tpos:	the type * to use as a loop cursor.
466  * @pos:	the &struct rhash_head to use as a loop cursor.
467  * @next:	the &struct rhash_head to use as next in loop cursor.
468  * @tbl:	the &struct bucket_table
469  * @hash:	the hash value / bucket index
470  * @member:	name of the &struct rhash_head within the hashable struct.
471  *
472  * This hash chain list-traversal primitive allows for the looped code to
473  * remove the loop cursor from the list.
474  */
475 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
476 	for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash),		      \
477 	     next = !rht_is_a_nulls(pos) ?				      \
478 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
479 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
480 	     pos = next,						      \
481 	     next = !rht_is_a_nulls(pos) ?				      \
482 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
483 
484 /**
485  * rht_for_each_rcu_from - iterate over rcu hash chain from given head
486  * @pos:	the &struct rhash_head to use as a loop cursor.
487  * @head:	the &struct rhash_head to start from
488  * @tbl:	the &struct bucket_table
489  * @hash:	the hash value / bucket index
490  *
491  * This hash chain list-traversal primitive may safely run concurrently with
492  * the _rcu mutation primitives such as rhashtable_insert() as long as the
493  * traversal is guarded by rcu_read_lock().
494  */
495 #define rht_for_each_rcu_from(pos, head, tbl, hash)			\
496 	for (({barrier(); }),						\
497 	     pos = head;						\
498 	     !rht_is_a_nulls(pos);					\
499 	     pos = rcu_dereference_raw(pos->next))
500 
501 /**
502  * rht_for_each_rcu - iterate over rcu hash chain
503  * @pos:	the &struct rhash_head to use as a loop cursor.
504  * @tbl:	the &struct bucket_table
505  * @hash:	the hash value / bucket index
506  *
507  * This hash chain list-traversal primitive may safely run concurrently with
508  * the _rcu mutation primitives such as rhashtable_insert() as long as the
509  * traversal is guarded by rcu_read_lock().
510  */
511 #define rht_for_each_rcu(pos, tbl, hash)			\
512 	for (({barrier(); }),					\
513 	     pos = rht_ptr_rcu(rht_bucket(tbl, hash));		\
514 	     !rht_is_a_nulls(pos);				\
515 	     pos = rcu_dereference_raw(pos->next))
516 
517 /**
518  * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
519  * @tpos:	the type * to use as a loop cursor.
520  * @pos:	the &struct rhash_head to use as a loop cursor.
521  * @head:	the &struct rhash_head to start from
522  * @tbl:	the &struct bucket_table
523  * @hash:	the hash value / bucket index
524  * @member:	name of the &struct rhash_head within the hashable struct.
525  *
526  * This hash chain list-traversal primitive may safely run concurrently with
527  * the _rcu mutation primitives such as rhashtable_insert() as long as the
528  * traversal is guarded by rcu_read_lock().
529  */
530 #define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
531 	for (({barrier(); }),						    \
532 	     pos = head;						    \
533 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
534 	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
535 
536 /**
537  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
538  * @tpos:	the type * to use as a loop cursor.
539  * @pos:	the &struct rhash_head to use as a loop cursor.
540  * @tbl:	the &struct bucket_table
541  * @hash:	the hash value / bucket index
542  * @member:	name of the &struct rhash_head within the hashable struct.
543  *
544  * This hash chain list-traversal primitive may safely run concurrently with
545  * the _rcu mutation primitives such as rhashtable_insert() as long as the
546  * traversal is guarded by rcu_read_lock().
547  */
548 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
549 	rht_for_each_entry_rcu_from(tpos, pos,				   \
550 				    rht_ptr_rcu(rht_bucket(tbl, hash)),	   \
551 				    tbl, hash, member)
552 
553 /**
554  * rhl_for_each_rcu - iterate over rcu hash table list
555  * @pos:	the &struct rlist_head to use as a loop cursor.
556  * @list:	the head of the list
557  *
558  * This hash chain list-traversal primitive should be used on the
559  * list returned by rhltable_lookup.
560  */
561 #define rhl_for_each_rcu(pos, list)					\
562 	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
563 
564 /**
565  * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
566  * @tpos:	the type * to use as a loop cursor.
567  * @pos:	the &struct rlist_head to use as a loop cursor.
568  * @list:	the head of the list
569  * @member:	name of the &struct rlist_head within the hashable struct.
570  *
571  * This hash chain list-traversal primitive should be used on the
572  * list returned by rhltable_lookup.
573  */
574 #define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
575 	for (pos = list; pos && rht_entry(tpos, pos, member);		\
576 	     pos = rcu_dereference_raw(pos->next))
577 
578 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
579 				     const void *obj)
580 {
581 	struct rhashtable *ht = arg->ht;
582 	const char *ptr = obj;
583 
584 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
585 }
586 
587 /* Internal function, do not use. */
588 static inline struct rhash_head *__rhashtable_lookup(
589 	struct rhashtable *ht, const void *key,
590 	const struct rhashtable_params params)
591 {
592 	struct rhashtable_compare_arg arg = {
593 		.ht = ht,
594 		.key = key,
595 	};
596 	struct rhash_lock_head *const *bkt;
597 	struct bucket_table *tbl;
598 	struct rhash_head *he;
599 	unsigned int hash;
600 
601 	tbl = rht_dereference_rcu(ht->tbl, ht);
602 restart:
603 	hash = rht_key_hashfn(ht, tbl, key, params);
604 	bkt = rht_bucket(tbl, hash);
605 	do {
606 		rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
607 			if (params.obj_cmpfn ?
608 			    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
609 			    rhashtable_compare(&arg, rht_obj(ht, he)))
610 				continue;
611 			return he;
612 		}
613 		/* An object might have been moved to a different hash chain,
614 		 * while we walk along it - better check and retry.
615 		 */
616 	} while (he != RHT_NULLS_MARKER(bkt));
617 
618 	/* Ensure we see any new tables. */
619 	smp_rmb();
620 
621 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
622 	if (unlikely(tbl))
623 		goto restart;
624 
625 	return NULL;
626 }
627 
628 /**
629  * rhashtable_lookup - search hash table
630  * @ht:		hash table
631  * @key:	the pointer to the key
632  * @params:	hash table parameters
633  *
634  * Computes the hash value for the key and traverses the bucket chain looking
635  * for a entry with an identical key. The first matching entry is returned.
636  *
637  * This must only be called under the RCU read lock.
638  *
639  * Returns the first entry on which the compare function returned true.
640  */
641 static inline void *rhashtable_lookup(
642 	struct rhashtable *ht, const void *key,
643 	const struct rhashtable_params params)
644 {
645 	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
646 
647 	return he ? rht_obj(ht, he) : NULL;
648 }
649 
650 /**
651  * rhashtable_lookup_fast - search hash table, without RCU read lock
652  * @ht:		hash table
653  * @key:	the pointer to the key
654  * @params:	hash table parameters
655  *
656  * Computes the hash value for the key and traverses the bucket chain looking
657  * for a entry with an identical key. The first matching entry is returned.
658  *
659  * Only use this function when you have other mechanisms guaranteeing
660  * that the object won't go away after the RCU read lock is released.
661  *
662  * Returns the first entry on which the compare function returned true.
663  */
664 static inline void *rhashtable_lookup_fast(
665 	struct rhashtable *ht, const void *key,
666 	const struct rhashtable_params params)
667 {
668 	void *obj;
669 
670 	rcu_read_lock();
671 	obj = rhashtable_lookup(ht, key, params);
672 	rcu_read_unlock();
673 
674 	return obj;
675 }
676 
677 /**
678  * rhltable_lookup - search hash list table
679  * @hlt:	hash table
680  * @key:	the pointer to the key
681  * @params:	hash table parameters
682  *
683  * Computes the hash value for the key and traverses the bucket chain looking
684  * for a entry with an identical key.  All matching entries are returned
685  * in a list.
686  *
687  * This must only be called under the RCU read lock.
688  *
689  * Returns the list of entries that match the given key.
690  */
691 static inline struct rhlist_head *rhltable_lookup(
692 	struct rhltable *hlt, const void *key,
693 	const struct rhashtable_params params)
694 {
695 	struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
696 
697 	return he ? container_of(he, struct rhlist_head, rhead) : NULL;
698 }
699 
700 /* Internal function, please use rhashtable_insert_fast() instead. This
701  * function returns the existing element already in hashes in there is a clash,
702  * otherwise it returns an error via ERR_PTR().
703  */
704 static inline void *__rhashtable_insert_fast(
705 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
706 	const struct rhashtable_params params, bool rhlist)
707 {
708 	struct rhashtable_compare_arg arg = {
709 		.ht = ht,
710 		.key = key,
711 	};
712 	struct rhash_lock_head **bkt;
713 	struct rhash_head __rcu **pprev;
714 	struct bucket_table *tbl;
715 	struct rhash_head *head;
716 	unsigned int hash;
717 	int elasticity;
718 	void *data;
719 
720 	rcu_read_lock();
721 
722 	tbl = rht_dereference_rcu(ht->tbl, ht);
723 	hash = rht_head_hashfn(ht, tbl, obj, params);
724 	elasticity = RHT_ELASTICITY;
725 	bkt = rht_bucket_insert(ht, tbl, hash);
726 	data = ERR_PTR(-ENOMEM);
727 	if (!bkt)
728 		goto out;
729 	pprev = NULL;
730 	rht_lock(tbl, bkt);
731 
732 	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
733 slow_path:
734 		rht_unlock(tbl, bkt);
735 		rcu_read_unlock();
736 		return rhashtable_insert_slow(ht, key, obj);
737 	}
738 
739 	rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
740 		struct rhlist_head *plist;
741 		struct rhlist_head *list;
742 
743 		elasticity--;
744 		if (!key ||
745 		    (params.obj_cmpfn ?
746 		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
747 		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
748 			pprev = &head->next;
749 			continue;
750 		}
751 
752 		data = rht_obj(ht, head);
753 
754 		if (!rhlist)
755 			goto out_unlock;
756 
757 
758 		list = container_of(obj, struct rhlist_head, rhead);
759 		plist = container_of(head, struct rhlist_head, rhead);
760 
761 		RCU_INIT_POINTER(list->next, plist);
762 		head = rht_dereference_bucket(head->next, tbl, hash);
763 		RCU_INIT_POINTER(list->rhead.next, head);
764 		if (pprev) {
765 			rcu_assign_pointer(*pprev, obj);
766 			rht_unlock(tbl, bkt);
767 		} else
768 			rht_assign_unlock(tbl, bkt, obj);
769 		data = NULL;
770 		goto out;
771 	}
772 
773 	if (elasticity <= 0)
774 		goto slow_path;
775 
776 	data = ERR_PTR(-E2BIG);
777 	if (unlikely(rht_grow_above_max(ht, tbl)))
778 		goto out_unlock;
779 
780 	if (unlikely(rht_grow_above_100(ht, tbl)))
781 		goto slow_path;
782 
783 	/* Inserting at head of list makes unlocking free. */
784 	head = rht_ptr(bkt, tbl, hash);
785 
786 	RCU_INIT_POINTER(obj->next, head);
787 	if (rhlist) {
788 		struct rhlist_head *list;
789 
790 		list = container_of(obj, struct rhlist_head, rhead);
791 		RCU_INIT_POINTER(list->next, NULL);
792 	}
793 
794 	atomic_inc(&ht->nelems);
795 	rht_assign_unlock(tbl, bkt, obj);
796 
797 	if (rht_grow_above_75(ht, tbl))
798 		schedule_work(&ht->run_work);
799 
800 	data = NULL;
801 out:
802 	rcu_read_unlock();
803 
804 	return data;
805 
806 out_unlock:
807 	rht_unlock(tbl, bkt);
808 	goto out;
809 }
810 
811 /**
812  * rhashtable_insert_fast - insert object into hash table
813  * @ht:		hash table
814  * @obj:	pointer to hash head inside object
815  * @params:	hash table parameters
816  *
817  * Will take the per bucket bitlock to protect against mutual mutations
818  * on the same bucket. Multiple insertions may occur in parallel unless
819  * they map to the same bucket.
820  *
821  * It is safe to call this function from atomic context.
822  *
823  * Will trigger an automatic deferred table resizing if residency in the
824  * table grows beyond 70%.
825  */
826 static inline int rhashtable_insert_fast(
827 	struct rhashtable *ht, struct rhash_head *obj,
828 	const struct rhashtable_params params)
829 {
830 	void *ret;
831 
832 	ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
833 	if (IS_ERR(ret))
834 		return PTR_ERR(ret);
835 
836 	return ret == NULL ? 0 : -EEXIST;
837 }
838 
839 /**
840  * rhltable_insert_key - insert object into hash list table
841  * @hlt:	hash list table
842  * @key:	the pointer to the key
843  * @list:	pointer to hash list head inside object
844  * @params:	hash table parameters
845  *
846  * Will take the per bucket bitlock to protect against mutual mutations
847  * on the same bucket. Multiple insertions may occur in parallel unless
848  * they map to the same bucket.
849  *
850  * It is safe to call this function from atomic context.
851  *
852  * Will trigger an automatic deferred table resizing if residency in the
853  * table grows beyond 70%.
854  */
855 static inline int rhltable_insert_key(
856 	struct rhltable *hlt, const void *key, struct rhlist_head *list,
857 	const struct rhashtable_params params)
858 {
859 	return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
860 						params, true));
861 }
862 
863 /**
864  * rhltable_insert - insert object into hash list table
865  * @hlt:	hash list table
866  * @list:	pointer to hash list head inside object
867  * @params:	hash table parameters
868  *
869  * Will take the per bucket bitlock to protect against mutual mutations
870  * on the same bucket. Multiple insertions may occur in parallel unless
871  * they map to the same bucket.
872  *
873  * It is safe to call this function from atomic context.
874  *
875  * Will trigger an automatic deferred table resizing if residency in the
876  * table grows beyond 70%.
877  */
878 static inline int rhltable_insert(
879 	struct rhltable *hlt, struct rhlist_head *list,
880 	const struct rhashtable_params params)
881 {
882 	const char *key = rht_obj(&hlt->ht, &list->rhead);
883 
884 	key += params.key_offset;
885 
886 	return rhltable_insert_key(hlt, key, list, params);
887 }
888 
889 /**
890  * rhashtable_lookup_insert_fast - lookup and insert object into hash table
891  * @ht:		hash table
892  * @obj:	pointer to hash head inside object
893  * @params:	hash table parameters
894  *
895  * This lookup function may only be used for fixed key hash table (key_len
896  * parameter set). It will BUG() if used inappropriately.
897  *
898  * It is safe to call this function from atomic context.
899  *
900  * Will trigger an automatic deferred table resizing if residency in the
901  * table grows beyond 70%.
902  */
903 static inline int rhashtable_lookup_insert_fast(
904 	struct rhashtable *ht, struct rhash_head *obj,
905 	const struct rhashtable_params params)
906 {
907 	const char *key = rht_obj(ht, obj);
908 	void *ret;
909 
910 	BUG_ON(ht->p.obj_hashfn);
911 
912 	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
913 				       false);
914 	if (IS_ERR(ret))
915 		return PTR_ERR(ret);
916 
917 	return ret == NULL ? 0 : -EEXIST;
918 }
919 
920 /**
921  * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
922  * @ht:		hash table
923  * @obj:	pointer to hash head inside object
924  * @params:	hash table parameters
925  *
926  * Just like rhashtable_lookup_insert_fast(), but this function returns the
927  * object if it exists, NULL if it did not and the insertion was successful,
928  * and an ERR_PTR otherwise.
929  */
930 static inline void *rhashtable_lookup_get_insert_fast(
931 	struct rhashtable *ht, struct rhash_head *obj,
932 	const struct rhashtable_params params)
933 {
934 	const char *key = rht_obj(ht, obj);
935 
936 	BUG_ON(ht->p.obj_hashfn);
937 
938 	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
939 					false);
940 }
941 
942 /**
943  * rhashtable_lookup_insert_key - search and insert object to hash table
944  *				  with explicit key
945  * @ht:		hash table
946  * @key:	key
947  * @obj:	pointer to hash head inside object
948  * @params:	hash table parameters
949  *
950  * Lookups may occur in parallel with hashtable mutations and resizing.
951  *
952  * Will trigger an automatic deferred table resizing if residency in the
953  * table grows beyond 70%.
954  *
955  * Returns zero on success.
956  */
957 static inline int rhashtable_lookup_insert_key(
958 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
959 	const struct rhashtable_params params)
960 {
961 	void *ret;
962 
963 	BUG_ON(!ht->p.obj_hashfn || !key);
964 
965 	ret = __rhashtable_insert_fast(ht, key, obj, params, false);
966 	if (IS_ERR(ret))
967 		return PTR_ERR(ret);
968 
969 	return ret == NULL ? 0 : -EEXIST;
970 }
971 
972 /**
973  * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
974  * @ht:		hash table
975  * @key:	key
976  * @obj:	pointer to hash head inside object
977  * @params:	hash table parameters
978  *
979  * Just like rhashtable_lookup_insert_key(), but this function returns the
980  * object if it exists, NULL if it does not and the insertion was successful,
981  * and an ERR_PTR otherwise.
982  */
983 static inline void *rhashtable_lookup_get_insert_key(
984 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
985 	const struct rhashtable_params params)
986 {
987 	BUG_ON(!ht->p.obj_hashfn || !key);
988 
989 	return __rhashtable_insert_fast(ht, key, obj, params, false);
990 }
991 
992 /* Internal function, please use rhashtable_remove_fast() instead */
993 static inline int __rhashtable_remove_fast_one(
994 	struct rhashtable *ht, struct bucket_table *tbl,
995 	struct rhash_head *obj, const struct rhashtable_params params,
996 	bool rhlist)
997 {
998 	struct rhash_lock_head **bkt;
999 	struct rhash_head __rcu **pprev;
1000 	struct rhash_head *he;
1001 	unsigned int hash;
1002 	int err = -ENOENT;
1003 
1004 	hash = rht_head_hashfn(ht, tbl, obj, params);
1005 	bkt = rht_bucket_var(tbl, hash);
1006 	if (!bkt)
1007 		return -ENOENT;
1008 	pprev = NULL;
1009 	rht_lock(tbl, bkt);
1010 
1011 	rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
1012 		struct rhlist_head *list;
1013 
1014 		list = container_of(he, struct rhlist_head, rhead);
1015 
1016 		if (he != obj) {
1017 			struct rhlist_head __rcu **lpprev;
1018 
1019 			pprev = &he->next;
1020 
1021 			if (!rhlist)
1022 				continue;
1023 
1024 			do {
1025 				lpprev = &list->next;
1026 				list = rht_dereference_bucket(list->next,
1027 							      tbl, hash);
1028 			} while (list && obj != &list->rhead);
1029 
1030 			if (!list)
1031 				continue;
1032 
1033 			list = rht_dereference_bucket(list->next, tbl, hash);
1034 			RCU_INIT_POINTER(*lpprev, list);
1035 			err = 0;
1036 			break;
1037 		}
1038 
1039 		obj = rht_dereference_bucket(obj->next, tbl, hash);
1040 		err = 1;
1041 
1042 		if (rhlist) {
1043 			list = rht_dereference_bucket(list->next, tbl, hash);
1044 			if (list) {
1045 				RCU_INIT_POINTER(list->rhead.next, obj);
1046 				obj = &list->rhead;
1047 				err = 0;
1048 			}
1049 		}
1050 
1051 		if (pprev) {
1052 			rcu_assign_pointer(*pprev, obj);
1053 			rht_unlock(tbl, bkt);
1054 		} else {
1055 			rht_assign_unlock(tbl, bkt, obj);
1056 		}
1057 		goto unlocked;
1058 	}
1059 
1060 	rht_unlock(tbl, bkt);
1061 unlocked:
1062 	if (err > 0) {
1063 		atomic_dec(&ht->nelems);
1064 		if (unlikely(ht->p.automatic_shrinking &&
1065 			     rht_shrink_below_30(ht, tbl)))
1066 			schedule_work(&ht->run_work);
1067 		err = 0;
1068 	}
1069 
1070 	return err;
1071 }
1072 
1073 /* Internal function, please use rhashtable_remove_fast() instead */
1074 static inline int __rhashtable_remove_fast(
1075 	struct rhashtable *ht, struct rhash_head *obj,
1076 	const struct rhashtable_params params, bool rhlist)
1077 {
1078 	struct bucket_table *tbl;
1079 	int err;
1080 
1081 	rcu_read_lock();
1082 
1083 	tbl = rht_dereference_rcu(ht->tbl, ht);
1084 
1085 	/* Because we have already taken (and released) the bucket
1086 	 * lock in old_tbl, if we find that future_tbl is not yet
1087 	 * visible then that guarantees the entry to still be in
1088 	 * the old tbl if it exists.
1089 	 */
1090 	while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
1091 						   rhlist)) &&
1092 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1093 		;
1094 
1095 	rcu_read_unlock();
1096 
1097 	return err;
1098 }
1099 
1100 /**
1101  * rhashtable_remove_fast - remove object from hash table
1102  * @ht:		hash table
1103  * @obj:	pointer to hash head inside object
1104  * @params:	hash table parameters
1105  *
1106  * Since the hash chain is single linked, the removal operation needs to
1107  * walk the bucket chain upon removal. The removal operation is thus
1108  * considerable slow if the hash table is not correctly sized.
1109  *
1110  * Will automatically shrink the table if permitted when residency drops
1111  * below 30%.
1112  *
1113  * Returns zero on success, -ENOENT if the entry could not be found.
1114  */
1115 static inline int rhashtable_remove_fast(
1116 	struct rhashtable *ht, struct rhash_head *obj,
1117 	const struct rhashtable_params params)
1118 {
1119 	return __rhashtable_remove_fast(ht, obj, params, false);
1120 }
1121 
1122 /**
1123  * rhltable_remove - remove object from hash list table
1124  * @hlt:	hash list table
1125  * @list:	pointer to hash list head inside object
1126  * @params:	hash table parameters
1127  *
1128  * Since the hash chain is single linked, the removal operation needs to
1129  * walk the bucket chain upon removal. The removal operation is thus
1130  * considerable slow if the hash table is not correctly sized.
1131  *
1132  * Will automatically shrink the table if permitted when residency drops
1133  * below 30%
1134  *
1135  * Returns zero on success, -ENOENT if the entry could not be found.
1136  */
1137 static inline int rhltable_remove(
1138 	struct rhltable *hlt, struct rhlist_head *list,
1139 	const struct rhashtable_params params)
1140 {
1141 	return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
1142 }
1143 
1144 /* Internal function, please use rhashtable_replace_fast() instead */
1145 static inline int __rhashtable_replace_fast(
1146 	struct rhashtable *ht, struct bucket_table *tbl,
1147 	struct rhash_head *obj_old, struct rhash_head *obj_new,
1148 	const struct rhashtable_params params)
1149 {
1150 	struct rhash_lock_head **bkt;
1151 	struct rhash_head __rcu **pprev;
1152 	struct rhash_head *he;
1153 	unsigned int hash;
1154 	int err = -ENOENT;
1155 
1156 	/* Minimally, the old and new objects must have same hash
1157 	 * (which should mean identifiers are the same).
1158 	 */
1159 	hash = rht_head_hashfn(ht, tbl, obj_old, params);
1160 	if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
1161 		return -EINVAL;
1162 
1163 	bkt = rht_bucket_var(tbl, hash);
1164 	if (!bkt)
1165 		return -ENOENT;
1166 
1167 	pprev = NULL;
1168 	rht_lock(tbl, bkt);
1169 
1170 	rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
1171 		if (he != obj_old) {
1172 			pprev = &he->next;
1173 			continue;
1174 		}
1175 
1176 		rcu_assign_pointer(obj_new->next, obj_old->next);
1177 		if (pprev) {
1178 			rcu_assign_pointer(*pprev, obj_new);
1179 			rht_unlock(tbl, bkt);
1180 		} else {
1181 			rht_assign_unlock(tbl, bkt, obj_new);
1182 		}
1183 		err = 0;
1184 		goto unlocked;
1185 	}
1186 
1187 	rht_unlock(tbl, bkt);
1188 
1189 unlocked:
1190 	return err;
1191 }
1192 
1193 /**
1194  * rhashtable_replace_fast - replace an object in hash table
1195  * @ht:		hash table
1196  * @obj_old:	pointer to hash head inside object being replaced
1197  * @obj_new:	pointer to hash head inside object which is new
1198  * @params:	hash table parameters
1199  *
1200  * Replacing an object doesn't affect the number of elements in the hash table
1201  * or bucket, so we don't need to worry about shrinking or expanding the
1202  * table here.
1203  *
1204  * Returns zero on success, -ENOENT if the entry could not be found,
1205  * -EINVAL if hash is not the same for the old and new objects.
1206  */
1207 static inline int rhashtable_replace_fast(
1208 	struct rhashtable *ht, struct rhash_head *obj_old,
1209 	struct rhash_head *obj_new,
1210 	const struct rhashtable_params params)
1211 {
1212 	struct bucket_table *tbl;
1213 	int err;
1214 
1215 	rcu_read_lock();
1216 
1217 	tbl = rht_dereference_rcu(ht->tbl, ht);
1218 
1219 	/* Because we have already taken (and released) the bucket
1220 	 * lock in old_tbl, if we find that future_tbl is not yet
1221 	 * visible then that guarantees the entry to still be in
1222 	 * the old tbl if it exists.
1223 	 */
1224 	while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
1225 						obj_new, params)) &&
1226 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1227 		;
1228 
1229 	rcu_read_unlock();
1230 
1231 	return err;
1232 }
1233 
1234 /**
1235  * rhltable_walk_enter - Initialise an iterator
1236  * @hlt:	Table to walk over
1237  * @iter:	Hash table Iterator
1238  *
1239  * This function prepares a hash table walk.
1240  *
1241  * Note that if you restart a walk after rhashtable_walk_stop you
1242  * may see the same object twice.  Also, you may miss objects if
1243  * there are removals in between rhashtable_walk_stop and the next
1244  * call to rhashtable_walk_start.
1245  *
1246  * For a completely stable walk you should construct your own data
1247  * structure outside the hash table.
1248  *
1249  * This function may be called from any process context, including
1250  * non-preemptable context, but cannot be called from softirq or
1251  * hardirq context.
1252  *
1253  * You must call rhashtable_walk_exit after this function returns.
1254  */
1255 static inline void rhltable_walk_enter(struct rhltable *hlt,
1256 				       struct rhashtable_iter *iter)
1257 {
1258 	return rhashtable_walk_enter(&hlt->ht, iter);
1259 }
1260 
1261 /**
1262  * rhltable_free_and_destroy - free elements and destroy hash list table
1263  * @hlt:	the hash list table to destroy
1264  * @free_fn:	callback to release resources of element
1265  * @arg:	pointer passed to free_fn
1266  *
1267  * See documentation for rhashtable_free_and_destroy.
1268  */
1269 static inline void rhltable_free_and_destroy(struct rhltable *hlt,
1270 					     void (*free_fn)(void *ptr,
1271 							     void *arg),
1272 					     void *arg)
1273 {
1274 	return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
1275 }
1276 
1277 static inline void rhltable_destroy(struct rhltable *hlt)
1278 {
1279 	return rhltable_free_and_destroy(hlt, NULL, NULL);
1280 }
1281 
1282 #endif /* _LINUX_RHASHTABLE_H */
1283