xref: /linux/include/linux/rhashtable.h (revision 45de28e31a6e250cd9e17c8b9f9db5a439fb39b2)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Resizable, Scalable, Concurrent Hash Table
4  *
5  * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
6  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8  *
9  * Code partially derived from nft_hash
10  * Rewritten with rehash code from br_multicast plus single list
11  * pointer as suggested by Josh Triplett
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License version 2 as
15  * published by the Free Software Foundation.
16  */
17 
18 #ifndef _LINUX_RHASHTABLE_H
19 #define _LINUX_RHASHTABLE_H
20 
21 #include <linux/err.h>
22 #include <linux/errno.h>
23 #include <linux/jhash.h>
24 #include <linux/list_nulls.h>
25 #include <linux/workqueue.h>
26 #include <linux/rculist.h>
27 #include <linux/bit_spinlock.h>
28 
29 #include <linux/rhashtable-types.h>
30 /*
31  * Objects in an rhashtable have an embedded struct rhash_head
32  * which is linked into as hash chain from the hash table - or one
33  * of two or more hash tables when the rhashtable is being resized.
34  * The end of the chain is marked with a special nulls marks which has
35  * the least significant bit set but otherwise stores the address of
36  * the hash bucket.  This allows us to be sure we've found the end
37  * of the right list.
38  * The value stored in the hash bucket has BIT(0) used as a lock bit.
39  * This bit must be atomically set before any changes are made to
40  * the chain.  To avoid dereferencing this pointer without clearing
41  * the bit first, we use an opaque 'struct rhash_lock_head *' for the
42  * pointer stored in the bucket.  This struct needs to be defined so
43  * that rcu_dereference() works on it, but it has no content so a
44  * cast is needed for it to be useful.  This ensures it isn't
45  * used by mistake with clearing the lock bit first.
46  */
47 struct rhash_lock_head {};
48 
49 /* Maximum chain length before rehash
50  *
51  * The maximum (not average) chain length grows with the size of the hash
52  * table, at a rate of (log N)/(log log N).
53  *
54  * The value of 16 is selected so that even if the hash table grew to
55  * 2^32 you would not expect the maximum chain length to exceed it
56  * unless we are under attack (or extremely unlucky).
57  *
58  * As this limit is only to detect attacks, we don't need to set it to a
59  * lower value as you'd need the chain length to vastly exceed 16 to have
60  * any real effect on the system.
61  */
62 #define RHT_ELASTICITY	16u
63 
64 /**
65  * struct bucket_table - Table of hash buckets
66  * @size: Number of hash buckets
67  * @nest: Number of bits of first-level nested table.
68  * @rehash: Current bucket being rehashed
69  * @hash_rnd: Random seed to fold into hash
70  * @walkers: List of active walkers
71  * @rcu: RCU structure for freeing the table
72  * @future_tbl: Table under construction during rehashing
73  * @ntbl: Nested table used when out of memory.
74  * @buckets: size * hash buckets
75  */
76 struct bucket_table {
77 	unsigned int		size;
78 	unsigned int		nest;
79 	u32			hash_rnd;
80 	struct list_head	walkers;
81 	struct rcu_head		rcu;
82 
83 	struct bucket_table __rcu *future_tbl;
84 
85 	struct lockdep_map	dep_map;
86 
87 	struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp;
88 };
89 
90 /*
91  * NULLS_MARKER() expects a hash value with the low
92  * bits mostly likely to be significant, and it discards
93  * the msb.
94  * We give it an address, in which the bottom bit is
95  * always 0, and the msb might be significant.
96  * So we shift the address down one bit to align with
97  * expectations and avoid losing a significant bit.
98  *
99  * We never store the NULLS_MARKER in the hash table
100  * itself as we need the lsb for locking.
101  * Instead we store a NULL
102  */
103 #define	RHT_NULLS_MARKER(ptr)	\
104 	((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
105 #define INIT_RHT_NULLS_HEAD(ptr)	\
106 	((ptr) = NULL)
107 
108 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
109 {
110 	return ((unsigned long) ptr & 1);
111 }
112 
113 static inline void *rht_obj(const struct rhashtable *ht,
114 			    const struct rhash_head *he)
115 {
116 	return (char *)he - ht->p.head_offset;
117 }
118 
119 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
120 					    unsigned int hash)
121 {
122 	return hash & (tbl->size - 1);
123 }
124 
125 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
126 	const void *key, const struct rhashtable_params params,
127 	unsigned int hash_rnd)
128 {
129 	unsigned int hash;
130 
131 	/* params must be equal to ht->p if it isn't constant. */
132 	if (!__builtin_constant_p(params.key_len))
133 		hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
134 	else if (params.key_len) {
135 		unsigned int key_len = params.key_len;
136 
137 		if (params.hashfn)
138 			hash = params.hashfn(key, key_len, hash_rnd);
139 		else if (key_len & (sizeof(u32) - 1))
140 			hash = jhash(key, key_len, hash_rnd);
141 		else
142 			hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
143 	} else {
144 		unsigned int key_len = ht->p.key_len;
145 
146 		if (params.hashfn)
147 			hash = params.hashfn(key, key_len, hash_rnd);
148 		else
149 			hash = jhash(key, key_len, hash_rnd);
150 	}
151 
152 	return hash;
153 }
154 
155 static inline unsigned int rht_key_hashfn(
156 	struct rhashtable *ht, const struct bucket_table *tbl,
157 	const void *key, const struct rhashtable_params params)
158 {
159 	unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
160 
161 	return rht_bucket_index(tbl, hash);
162 }
163 
164 static inline unsigned int rht_head_hashfn(
165 	struct rhashtable *ht, const struct bucket_table *tbl,
166 	const struct rhash_head *he, const struct rhashtable_params params)
167 {
168 	const char *ptr = rht_obj(ht, he);
169 
170 	return likely(params.obj_hashfn) ?
171 	       rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
172 							    ht->p.key_len,
173 						       tbl->hash_rnd)) :
174 	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
175 }
176 
177 /**
178  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
179  * @ht:		hash table
180  * @tbl:	current table
181  */
182 static inline bool rht_grow_above_75(const struct rhashtable *ht,
183 				     const struct bucket_table *tbl)
184 {
185 	/* Expand table when exceeding 75% load */
186 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
187 	       (!ht->p.max_size || tbl->size < ht->p.max_size);
188 }
189 
190 /**
191  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
192  * @ht:		hash table
193  * @tbl:	current table
194  */
195 static inline bool rht_shrink_below_30(const struct rhashtable *ht,
196 				       const struct bucket_table *tbl)
197 {
198 	/* Shrink table beneath 30% load */
199 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
200 	       tbl->size > ht->p.min_size;
201 }
202 
203 /**
204  * rht_grow_above_100 - returns true if nelems > table-size
205  * @ht:		hash table
206  * @tbl:	current table
207  */
208 static inline bool rht_grow_above_100(const struct rhashtable *ht,
209 				      const struct bucket_table *tbl)
210 {
211 	return atomic_read(&ht->nelems) > tbl->size &&
212 		(!ht->p.max_size || tbl->size < ht->p.max_size);
213 }
214 
215 /**
216  * rht_grow_above_max - returns true if table is above maximum
217  * @ht:		hash table
218  * @tbl:	current table
219  */
220 static inline bool rht_grow_above_max(const struct rhashtable *ht,
221 				      const struct bucket_table *tbl)
222 {
223 	return atomic_read(&ht->nelems) >= ht->max_elems;
224 }
225 
226 #ifdef CONFIG_PROVE_LOCKING
227 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
228 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
229 #else
230 static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
231 {
232 	return 1;
233 }
234 
235 static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
236 					     u32 hash)
237 {
238 	return 1;
239 }
240 #endif /* CONFIG_PROVE_LOCKING */
241 
242 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
243 			     struct rhash_head *obj);
244 
245 void rhashtable_walk_enter(struct rhashtable *ht,
246 			   struct rhashtable_iter *iter);
247 void rhashtable_walk_exit(struct rhashtable_iter *iter);
248 int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
249 
250 static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
251 {
252 	(void)rhashtable_walk_start_check(iter);
253 }
254 
255 void *rhashtable_walk_next(struct rhashtable_iter *iter);
256 void *rhashtable_walk_peek(struct rhashtable_iter *iter);
257 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
258 
259 void rhashtable_free_and_destroy(struct rhashtable *ht,
260 				 void (*free_fn)(void *ptr, void *arg),
261 				 void *arg);
262 void rhashtable_destroy(struct rhashtable *ht);
263 
264 struct rhash_lock_head __rcu **rht_bucket_nested(
265 	const struct bucket_table *tbl, unsigned int hash);
266 struct rhash_lock_head __rcu **__rht_bucket_nested(
267 	const struct bucket_table *tbl, unsigned int hash);
268 struct rhash_lock_head __rcu **rht_bucket_nested_insert(
269 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash);
270 
271 #define rht_dereference(p, ht) \
272 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
273 
274 #define rht_dereference_rcu(p, ht) \
275 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
276 
277 #define rht_dereference_bucket(p, tbl, hash) \
278 	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
279 
280 #define rht_dereference_bucket_rcu(p, tbl, hash) \
281 	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
282 
283 #define rht_entry(tpos, pos, member) \
284 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
285 
286 static inline struct rhash_lock_head __rcu *const *rht_bucket(
287 	const struct bucket_table *tbl, unsigned int hash)
288 {
289 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
290 				     &tbl->buckets[hash];
291 }
292 
293 static inline struct rhash_lock_head __rcu **rht_bucket_var(
294 	struct bucket_table *tbl, unsigned int hash)
295 {
296 	return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) :
297 				     &tbl->buckets[hash];
298 }
299 
300 static inline struct rhash_lock_head __rcu **rht_bucket_insert(
301 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
302 {
303 	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
304 				     &tbl->buckets[hash];
305 }
306 
307 /*
308  * We lock a bucket by setting BIT(0) in the pointer - this is always
309  * zero in real pointers.  The NULLS mark is never stored in the bucket,
310  * rather we store NULL if the bucket is empty.
311  * bit_spin_locks do not handle contention well, but the whole point
312  * of the hashtable design is to achieve minimum per-bucket contention.
313  * A nested hash table might not have a bucket pointer.  In that case
314  * we cannot get a lock.  For remove and replace the bucket cannot be
315  * interesting and doesn't need locking.
316  * For insert we allocate the bucket if this is the last bucket_table,
317  * and then take the lock.
318  * Sometimes we unlock a bucket by writing a new pointer there.  In that
319  * case we don't need to unlock, but we do need to reset state such as
320  * local_bh. For that we have rht_assign_unlock().  As rcu_assign_pointer()
321  * provides the same release semantics that bit_spin_unlock() provides,
322  * this is safe.
323  * When we write to a bucket without unlocking, we use rht_assign_locked().
324  */
325 
326 static inline void rht_lock(struct bucket_table *tbl,
327 			    struct rhash_lock_head __rcu **bkt)
328 {
329 	local_bh_disable();
330 	bit_spin_lock(0, (unsigned long *)bkt);
331 	lock_map_acquire(&tbl->dep_map);
332 }
333 
334 static inline void rht_lock_nested(struct bucket_table *tbl,
335 				   struct rhash_lock_head __rcu **bucket,
336 				   unsigned int subclass)
337 {
338 	local_bh_disable();
339 	bit_spin_lock(0, (unsigned long *)bucket);
340 	lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_);
341 }
342 
343 static inline void rht_unlock(struct bucket_table *tbl,
344 			      struct rhash_lock_head __rcu **bkt)
345 {
346 	lock_map_release(&tbl->dep_map);
347 	bit_spin_unlock(0, (unsigned long *)bkt);
348 	local_bh_enable();
349 }
350 
351 static inline struct rhash_head *__rht_ptr(
352 	struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt)
353 {
354 	return (struct rhash_head *)
355 		((unsigned long)p & ~BIT(0) ?:
356 		 (unsigned long)RHT_NULLS_MARKER(bkt));
357 }
358 
359 /*
360  * Where 'bkt' is a bucket and might be locked:
361  *   rht_ptr_rcu() dereferences that pointer and clears the lock bit.
362  *   rht_ptr() dereferences in a context where the bucket is locked.
363  *   rht_ptr_exclusive() dereferences in a context where exclusive
364  *            access is guaranteed, such as when destroying the table.
365  */
366 static inline struct rhash_head *rht_ptr_rcu(
367 	struct rhash_lock_head __rcu *const *bkt)
368 {
369 	return __rht_ptr(rcu_dereference(*bkt), bkt);
370 }
371 
372 static inline struct rhash_head *rht_ptr(
373 	struct rhash_lock_head __rcu *const *bkt,
374 	struct bucket_table *tbl,
375 	unsigned int hash)
376 {
377 	return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt);
378 }
379 
380 static inline struct rhash_head *rht_ptr_exclusive(
381 	struct rhash_lock_head __rcu *const *bkt)
382 {
383 	return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt);
384 }
385 
386 static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt,
387 				     struct rhash_head *obj)
388 {
389 	if (rht_is_a_nulls(obj))
390 		obj = NULL;
391 	rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0)));
392 }
393 
394 static inline void rht_assign_unlock(struct bucket_table *tbl,
395 				     struct rhash_lock_head __rcu **bkt,
396 				     struct rhash_head *obj)
397 {
398 	if (rht_is_a_nulls(obj))
399 		obj = NULL;
400 	lock_map_release(&tbl->dep_map);
401 	rcu_assign_pointer(*bkt, (void *)obj);
402 	preempt_enable();
403 	__release(bitlock);
404 	local_bh_enable();
405 }
406 
407 /**
408  * rht_for_each_from - iterate over hash chain from given head
409  * @pos:	the &struct rhash_head to use as a loop cursor.
410  * @head:	the &struct rhash_head to start from
411  * @tbl:	the &struct bucket_table
412  * @hash:	the hash value / bucket index
413  */
414 #define rht_for_each_from(pos, head, tbl, hash) \
415 	for (pos = head;			\
416 	     !rht_is_a_nulls(pos);		\
417 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
418 
419 /**
420  * rht_for_each - iterate over hash chain
421  * @pos:	the &struct rhash_head to use as a loop cursor.
422  * @tbl:	the &struct bucket_table
423  * @hash:	the hash value / bucket index
424  */
425 #define rht_for_each(pos, tbl, hash) \
426 	rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash),  \
427 			  tbl, hash)
428 
429 /**
430  * rht_for_each_entry_from - iterate over hash chain from given head
431  * @tpos:	the type * to use as a loop cursor.
432  * @pos:	the &struct rhash_head to use as a loop cursor.
433  * @head:	the &struct rhash_head to start from
434  * @tbl:	the &struct bucket_table
435  * @hash:	the hash value / bucket index
436  * @member:	name of the &struct rhash_head within the hashable struct.
437  */
438 #define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member)	\
439 	for (pos = head;						\
440 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
441 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
442 
443 /**
444  * rht_for_each_entry - iterate over hash chain of given type
445  * @tpos:	the type * to use as a loop cursor.
446  * @pos:	the &struct rhash_head to use as a loop cursor.
447  * @tbl:	the &struct bucket_table
448  * @hash:	the hash value / bucket index
449  * @member:	name of the &struct rhash_head within the hashable struct.
450  */
451 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
452 	rht_for_each_entry_from(tpos, pos,				\
453 				rht_ptr(rht_bucket(tbl, hash), tbl, hash), \
454 				tbl, hash, member)
455 
456 /**
457  * rht_for_each_entry_safe - safely iterate over hash chain of given type
458  * @tpos:	the type * to use as a loop cursor.
459  * @pos:	the &struct rhash_head to use as a loop cursor.
460  * @next:	the &struct rhash_head to use as next in loop cursor.
461  * @tbl:	the &struct bucket_table
462  * @hash:	the hash value / bucket index
463  * @member:	name of the &struct rhash_head within the hashable struct.
464  *
465  * This hash chain list-traversal primitive allows for the looped code to
466  * remove the loop cursor from the list.
467  */
468 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
469 	for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash),		      \
470 	     next = !rht_is_a_nulls(pos) ?				      \
471 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
472 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
473 	     pos = next,						      \
474 	     next = !rht_is_a_nulls(pos) ?				      \
475 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
476 
477 /**
478  * rht_for_each_rcu_from - iterate over rcu hash chain from given head
479  * @pos:	the &struct rhash_head to use as a loop cursor.
480  * @head:	the &struct rhash_head to start from
481  * @tbl:	the &struct bucket_table
482  * @hash:	the hash value / bucket index
483  *
484  * This hash chain list-traversal primitive may safely run concurrently with
485  * the _rcu mutation primitives such as rhashtable_insert() as long as the
486  * traversal is guarded by rcu_read_lock().
487  */
488 #define rht_for_each_rcu_from(pos, head, tbl, hash)			\
489 	for (({barrier(); }),						\
490 	     pos = head;						\
491 	     !rht_is_a_nulls(pos);					\
492 	     pos = rcu_dereference_raw(pos->next))
493 
494 /**
495  * rht_for_each_rcu - iterate over rcu hash chain
496  * @pos:	the &struct rhash_head to use as a loop cursor.
497  * @tbl:	the &struct bucket_table
498  * @hash:	the hash value / bucket index
499  *
500  * This hash chain list-traversal primitive may safely run concurrently with
501  * the _rcu mutation primitives such as rhashtable_insert() as long as the
502  * traversal is guarded by rcu_read_lock().
503  */
504 #define rht_for_each_rcu(pos, tbl, hash)			\
505 	for (({barrier(); }),					\
506 	     pos = rht_ptr_rcu(rht_bucket(tbl, hash));		\
507 	     !rht_is_a_nulls(pos);				\
508 	     pos = rcu_dereference_raw(pos->next))
509 
510 /**
511  * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head
512  * @tpos:	the type * to use as a loop cursor.
513  * @pos:	the &struct rhash_head to use as a loop cursor.
514  * @head:	the &struct rhash_head to start from
515  * @tbl:	the &struct bucket_table
516  * @hash:	the hash value / bucket index
517  * @member:	name of the &struct rhash_head within the hashable struct.
518  *
519  * This hash chain list-traversal primitive may safely run concurrently with
520  * the _rcu mutation primitives such as rhashtable_insert() as long as the
521  * traversal is guarded by rcu_read_lock().
522  */
523 #define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \
524 	for (({barrier(); }),						    \
525 	     pos = head;						    \
526 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
527 	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
528 
529 /**
530  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
531  * @tpos:	the type * to use as a loop cursor.
532  * @pos:	the &struct rhash_head to use as a loop cursor.
533  * @tbl:	the &struct bucket_table
534  * @hash:	the hash value / bucket index
535  * @member:	name of the &struct rhash_head within the hashable struct.
536  *
537  * This hash chain list-traversal primitive may safely run concurrently with
538  * the _rcu mutation primitives such as rhashtable_insert() as long as the
539  * traversal is guarded by rcu_read_lock().
540  */
541 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
542 	rht_for_each_entry_rcu_from(tpos, pos,				   \
543 				    rht_ptr_rcu(rht_bucket(tbl, hash)),	   \
544 				    tbl, hash, member)
545 
546 /**
547  * rhl_for_each_rcu - iterate over rcu hash table list
548  * @pos:	the &struct rlist_head to use as a loop cursor.
549  * @list:	the head of the list
550  *
551  * This hash chain list-traversal primitive should be used on the
552  * list returned by rhltable_lookup.
553  */
554 #define rhl_for_each_rcu(pos, list)					\
555 	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
556 
557 /**
558  * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
559  * @tpos:	the type * to use as a loop cursor.
560  * @pos:	the &struct rlist_head to use as a loop cursor.
561  * @list:	the head of the list
562  * @member:	name of the &struct rlist_head within the hashable struct.
563  *
564  * This hash chain list-traversal primitive should be used on the
565  * list returned by rhltable_lookup.
566  */
567 #define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
568 	for (pos = list; pos && rht_entry(tpos, pos, member);		\
569 	     pos = rcu_dereference_raw(pos->next))
570 
571 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
572 				     const void *obj)
573 {
574 	struct rhashtable *ht = arg->ht;
575 	const char *ptr = obj;
576 
577 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
578 }
579 
580 /* Internal function, do not use. */
581 static inline struct rhash_head *__rhashtable_lookup(
582 	struct rhashtable *ht, const void *key,
583 	const struct rhashtable_params params)
584 {
585 	struct rhashtable_compare_arg arg = {
586 		.ht = ht,
587 		.key = key,
588 	};
589 	struct rhash_lock_head __rcu *const *bkt;
590 	struct bucket_table *tbl;
591 	struct rhash_head *he;
592 	unsigned int hash;
593 
594 	tbl = rht_dereference_rcu(ht->tbl, ht);
595 restart:
596 	hash = rht_key_hashfn(ht, tbl, key, params);
597 	bkt = rht_bucket(tbl, hash);
598 	do {
599 		rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
600 			if (params.obj_cmpfn ?
601 			    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
602 			    rhashtable_compare(&arg, rht_obj(ht, he)))
603 				continue;
604 			return he;
605 		}
606 		/* An object might have been moved to a different hash chain,
607 		 * while we walk along it - better check and retry.
608 		 */
609 	} while (he != RHT_NULLS_MARKER(bkt));
610 
611 	/* Ensure we see any new tables. */
612 	smp_rmb();
613 
614 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
615 	if (unlikely(tbl))
616 		goto restart;
617 
618 	return NULL;
619 }
620 
621 /**
622  * rhashtable_lookup - search hash table
623  * @ht:		hash table
624  * @key:	the pointer to the key
625  * @params:	hash table parameters
626  *
627  * Computes the hash value for the key and traverses the bucket chain looking
628  * for a entry with an identical key. The first matching entry is returned.
629  *
630  * This must only be called under the RCU read lock.
631  *
632  * Returns the first entry on which the compare function returned true.
633  */
634 static inline void *rhashtable_lookup(
635 	struct rhashtable *ht, const void *key,
636 	const struct rhashtable_params params)
637 {
638 	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
639 
640 	return he ? rht_obj(ht, he) : NULL;
641 }
642 
643 /**
644  * rhashtable_lookup_fast - search hash table, without RCU read lock
645  * @ht:		hash table
646  * @key:	the pointer to the key
647  * @params:	hash table parameters
648  *
649  * Computes the hash value for the key and traverses the bucket chain looking
650  * for a entry with an identical key. The first matching entry is returned.
651  *
652  * Only use this function when you have other mechanisms guaranteeing
653  * that the object won't go away after the RCU read lock is released.
654  *
655  * Returns the first entry on which the compare function returned true.
656  */
657 static inline void *rhashtable_lookup_fast(
658 	struct rhashtable *ht, const void *key,
659 	const struct rhashtable_params params)
660 {
661 	void *obj;
662 
663 	rcu_read_lock();
664 	obj = rhashtable_lookup(ht, key, params);
665 	rcu_read_unlock();
666 
667 	return obj;
668 }
669 
670 /**
671  * rhltable_lookup - search hash list table
672  * @hlt:	hash table
673  * @key:	the pointer to the key
674  * @params:	hash table parameters
675  *
676  * Computes the hash value for the key and traverses the bucket chain looking
677  * for a entry with an identical key.  All matching entries are returned
678  * in a list.
679  *
680  * This must only be called under the RCU read lock.
681  *
682  * Returns the list of entries that match the given key.
683  */
684 static inline struct rhlist_head *rhltable_lookup(
685 	struct rhltable *hlt, const void *key,
686 	const struct rhashtable_params params)
687 {
688 	struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
689 
690 	return he ? container_of(he, struct rhlist_head, rhead) : NULL;
691 }
692 
693 /* Internal function, please use rhashtable_insert_fast() instead. This
694  * function returns the existing element already in hashes in there is a clash,
695  * otherwise it returns an error via ERR_PTR().
696  */
697 static inline void *__rhashtable_insert_fast(
698 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
699 	const struct rhashtable_params params, bool rhlist)
700 {
701 	struct rhashtable_compare_arg arg = {
702 		.ht = ht,
703 		.key = key,
704 	};
705 	struct rhash_lock_head __rcu **bkt;
706 	struct rhash_head __rcu **pprev;
707 	struct bucket_table *tbl;
708 	struct rhash_head *head;
709 	unsigned int hash;
710 	int elasticity;
711 	void *data;
712 
713 	rcu_read_lock();
714 
715 	tbl = rht_dereference_rcu(ht->tbl, ht);
716 	hash = rht_head_hashfn(ht, tbl, obj, params);
717 	elasticity = RHT_ELASTICITY;
718 	bkt = rht_bucket_insert(ht, tbl, hash);
719 	data = ERR_PTR(-ENOMEM);
720 	if (!bkt)
721 		goto out;
722 	pprev = NULL;
723 	rht_lock(tbl, bkt);
724 
725 	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
726 slow_path:
727 		rht_unlock(tbl, bkt);
728 		rcu_read_unlock();
729 		return rhashtable_insert_slow(ht, key, obj);
730 	}
731 
732 	rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
733 		struct rhlist_head *plist;
734 		struct rhlist_head *list;
735 
736 		elasticity--;
737 		if (!key ||
738 		    (params.obj_cmpfn ?
739 		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
740 		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
741 			pprev = &head->next;
742 			continue;
743 		}
744 
745 		data = rht_obj(ht, head);
746 
747 		if (!rhlist)
748 			goto out_unlock;
749 
750 
751 		list = container_of(obj, struct rhlist_head, rhead);
752 		plist = container_of(head, struct rhlist_head, rhead);
753 
754 		RCU_INIT_POINTER(list->next, plist);
755 		head = rht_dereference_bucket(head->next, tbl, hash);
756 		RCU_INIT_POINTER(list->rhead.next, head);
757 		if (pprev) {
758 			rcu_assign_pointer(*pprev, obj);
759 			rht_unlock(tbl, bkt);
760 		} else
761 			rht_assign_unlock(tbl, bkt, obj);
762 		data = NULL;
763 		goto out;
764 	}
765 
766 	if (elasticity <= 0)
767 		goto slow_path;
768 
769 	data = ERR_PTR(-E2BIG);
770 	if (unlikely(rht_grow_above_max(ht, tbl)))
771 		goto out_unlock;
772 
773 	if (unlikely(rht_grow_above_100(ht, tbl)))
774 		goto slow_path;
775 
776 	/* Inserting at head of list makes unlocking free. */
777 	head = rht_ptr(bkt, tbl, hash);
778 
779 	RCU_INIT_POINTER(obj->next, head);
780 	if (rhlist) {
781 		struct rhlist_head *list;
782 
783 		list = container_of(obj, struct rhlist_head, rhead);
784 		RCU_INIT_POINTER(list->next, NULL);
785 	}
786 
787 	atomic_inc(&ht->nelems);
788 	rht_assign_unlock(tbl, bkt, obj);
789 
790 	if (rht_grow_above_75(ht, tbl))
791 		schedule_work(&ht->run_work);
792 
793 	data = NULL;
794 out:
795 	rcu_read_unlock();
796 
797 	return data;
798 
799 out_unlock:
800 	rht_unlock(tbl, bkt);
801 	goto out;
802 }
803 
804 /**
805  * rhashtable_insert_fast - insert object into hash table
806  * @ht:		hash table
807  * @obj:	pointer to hash head inside object
808  * @params:	hash table parameters
809  *
810  * Will take the per bucket bitlock to protect against mutual mutations
811  * on the same bucket. Multiple insertions may occur in parallel unless
812  * they map to the same bucket.
813  *
814  * It is safe to call this function from atomic context.
815  *
816  * Will trigger an automatic deferred table resizing if residency in the
817  * table grows beyond 70%.
818  */
819 static inline int rhashtable_insert_fast(
820 	struct rhashtable *ht, struct rhash_head *obj,
821 	const struct rhashtable_params params)
822 {
823 	void *ret;
824 
825 	ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
826 	if (IS_ERR(ret))
827 		return PTR_ERR(ret);
828 
829 	return ret == NULL ? 0 : -EEXIST;
830 }
831 
832 /**
833  * rhltable_insert_key - insert object into hash list table
834  * @hlt:	hash list table
835  * @key:	the pointer to the key
836  * @list:	pointer to hash list head inside object
837  * @params:	hash table parameters
838  *
839  * Will take the per bucket bitlock to protect against mutual mutations
840  * on the same bucket. Multiple insertions may occur in parallel unless
841  * they map to the same bucket.
842  *
843  * It is safe to call this function from atomic context.
844  *
845  * Will trigger an automatic deferred table resizing if residency in the
846  * table grows beyond 70%.
847  */
848 static inline int rhltable_insert_key(
849 	struct rhltable *hlt, const void *key, struct rhlist_head *list,
850 	const struct rhashtable_params params)
851 {
852 	return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
853 						params, true));
854 }
855 
856 /**
857  * rhltable_insert - insert object into hash list table
858  * @hlt:	hash list table
859  * @list:	pointer to hash list head inside object
860  * @params:	hash table parameters
861  *
862  * Will take the per bucket bitlock to protect against mutual mutations
863  * on the same bucket. Multiple insertions may occur in parallel unless
864  * they map to the same bucket.
865  *
866  * It is safe to call this function from atomic context.
867  *
868  * Will trigger an automatic deferred table resizing if residency in the
869  * table grows beyond 70%.
870  */
871 static inline int rhltable_insert(
872 	struct rhltable *hlt, struct rhlist_head *list,
873 	const struct rhashtable_params params)
874 {
875 	const char *key = rht_obj(&hlt->ht, &list->rhead);
876 
877 	key += params.key_offset;
878 
879 	return rhltable_insert_key(hlt, key, list, params);
880 }
881 
882 /**
883  * rhashtable_lookup_insert_fast - lookup and insert object into hash table
884  * @ht:		hash table
885  * @obj:	pointer to hash head inside object
886  * @params:	hash table parameters
887  *
888  * This lookup function may only be used for fixed key hash table (key_len
889  * parameter set). It will BUG() if used inappropriately.
890  *
891  * It is safe to call this function from atomic context.
892  *
893  * Will trigger an automatic deferred table resizing if residency in the
894  * table grows beyond 70%.
895  */
896 static inline int rhashtable_lookup_insert_fast(
897 	struct rhashtable *ht, struct rhash_head *obj,
898 	const struct rhashtable_params params)
899 {
900 	const char *key = rht_obj(ht, obj);
901 	void *ret;
902 
903 	BUG_ON(ht->p.obj_hashfn);
904 
905 	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
906 				       false);
907 	if (IS_ERR(ret))
908 		return PTR_ERR(ret);
909 
910 	return ret == NULL ? 0 : -EEXIST;
911 }
912 
913 /**
914  * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
915  * @ht:		hash table
916  * @obj:	pointer to hash head inside object
917  * @params:	hash table parameters
918  *
919  * Just like rhashtable_lookup_insert_fast(), but this function returns the
920  * object if it exists, NULL if it did not and the insertion was successful,
921  * and an ERR_PTR otherwise.
922  */
923 static inline void *rhashtable_lookup_get_insert_fast(
924 	struct rhashtable *ht, struct rhash_head *obj,
925 	const struct rhashtable_params params)
926 {
927 	const char *key = rht_obj(ht, obj);
928 
929 	BUG_ON(ht->p.obj_hashfn);
930 
931 	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
932 					false);
933 }
934 
935 /**
936  * rhashtable_lookup_insert_key - search and insert object to hash table
937  *				  with explicit key
938  * @ht:		hash table
939  * @key:	key
940  * @obj:	pointer to hash head inside object
941  * @params:	hash table parameters
942  *
943  * Lookups may occur in parallel with hashtable mutations and resizing.
944  *
945  * Will trigger an automatic deferred table resizing if residency in the
946  * table grows beyond 70%.
947  *
948  * Returns zero on success.
949  */
950 static inline int rhashtable_lookup_insert_key(
951 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
952 	const struct rhashtable_params params)
953 {
954 	void *ret;
955 
956 	BUG_ON(!ht->p.obj_hashfn || !key);
957 
958 	ret = __rhashtable_insert_fast(ht, key, obj, params, false);
959 	if (IS_ERR(ret))
960 		return PTR_ERR(ret);
961 
962 	return ret == NULL ? 0 : -EEXIST;
963 }
964 
965 /**
966  * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
967  * @ht:		hash table
968  * @key:	key
969  * @obj:	pointer to hash head inside object
970  * @params:	hash table parameters
971  *
972  * Just like rhashtable_lookup_insert_key(), but this function returns the
973  * object if it exists, NULL if it does not and the insertion was successful,
974  * and an ERR_PTR otherwise.
975  */
976 static inline void *rhashtable_lookup_get_insert_key(
977 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
978 	const struct rhashtable_params params)
979 {
980 	BUG_ON(!ht->p.obj_hashfn || !key);
981 
982 	return __rhashtable_insert_fast(ht, key, obj, params, false);
983 }
984 
985 /* Internal function, please use rhashtable_remove_fast() instead */
986 static inline int __rhashtable_remove_fast_one(
987 	struct rhashtable *ht, struct bucket_table *tbl,
988 	struct rhash_head *obj, const struct rhashtable_params params,
989 	bool rhlist)
990 {
991 	struct rhash_lock_head __rcu **bkt;
992 	struct rhash_head __rcu **pprev;
993 	struct rhash_head *he;
994 	unsigned int hash;
995 	int err = -ENOENT;
996 
997 	hash = rht_head_hashfn(ht, tbl, obj, params);
998 	bkt = rht_bucket_var(tbl, hash);
999 	if (!bkt)
1000 		return -ENOENT;
1001 	pprev = NULL;
1002 	rht_lock(tbl, bkt);
1003 
1004 	rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
1005 		struct rhlist_head *list;
1006 
1007 		list = container_of(he, struct rhlist_head, rhead);
1008 
1009 		if (he != obj) {
1010 			struct rhlist_head __rcu **lpprev;
1011 
1012 			pprev = &he->next;
1013 
1014 			if (!rhlist)
1015 				continue;
1016 
1017 			do {
1018 				lpprev = &list->next;
1019 				list = rht_dereference_bucket(list->next,
1020 							      tbl, hash);
1021 			} while (list && obj != &list->rhead);
1022 
1023 			if (!list)
1024 				continue;
1025 
1026 			list = rht_dereference_bucket(list->next, tbl, hash);
1027 			RCU_INIT_POINTER(*lpprev, list);
1028 			err = 0;
1029 			break;
1030 		}
1031 
1032 		obj = rht_dereference_bucket(obj->next, tbl, hash);
1033 		err = 1;
1034 
1035 		if (rhlist) {
1036 			list = rht_dereference_bucket(list->next, tbl, hash);
1037 			if (list) {
1038 				RCU_INIT_POINTER(list->rhead.next, obj);
1039 				obj = &list->rhead;
1040 				err = 0;
1041 			}
1042 		}
1043 
1044 		if (pprev) {
1045 			rcu_assign_pointer(*pprev, obj);
1046 			rht_unlock(tbl, bkt);
1047 		} else {
1048 			rht_assign_unlock(tbl, bkt, obj);
1049 		}
1050 		goto unlocked;
1051 	}
1052 
1053 	rht_unlock(tbl, bkt);
1054 unlocked:
1055 	if (err > 0) {
1056 		atomic_dec(&ht->nelems);
1057 		if (unlikely(ht->p.automatic_shrinking &&
1058 			     rht_shrink_below_30(ht, tbl)))
1059 			schedule_work(&ht->run_work);
1060 		err = 0;
1061 	}
1062 
1063 	return err;
1064 }
1065 
1066 /* Internal function, please use rhashtable_remove_fast() instead */
1067 static inline int __rhashtable_remove_fast(
1068 	struct rhashtable *ht, struct rhash_head *obj,
1069 	const struct rhashtable_params params, bool rhlist)
1070 {
1071 	struct bucket_table *tbl;
1072 	int err;
1073 
1074 	rcu_read_lock();
1075 
1076 	tbl = rht_dereference_rcu(ht->tbl, ht);
1077 
1078 	/* Because we have already taken (and released) the bucket
1079 	 * lock in old_tbl, if we find that future_tbl is not yet
1080 	 * visible then that guarantees the entry to still be in
1081 	 * the old tbl if it exists.
1082 	 */
1083 	while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
1084 						   rhlist)) &&
1085 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1086 		;
1087 
1088 	rcu_read_unlock();
1089 
1090 	return err;
1091 }
1092 
1093 /**
1094  * rhashtable_remove_fast - remove object from hash table
1095  * @ht:		hash table
1096  * @obj:	pointer to hash head inside object
1097  * @params:	hash table parameters
1098  *
1099  * Since the hash chain is single linked, the removal operation needs to
1100  * walk the bucket chain upon removal. The removal operation is thus
1101  * considerable slow if the hash table is not correctly sized.
1102  *
1103  * Will automatically shrink the table if permitted when residency drops
1104  * below 30%.
1105  *
1106  * Returns zero on success, -ENOENT if the entry could not be found.
1107  */
1108 static inline int rhashtable_remove_fast(
1109 	struct rhashtable *ht, struct rhash_head *obj,
1110 	const struct rhashtable_params params)
1111 {
1112 	return __rhashtable_remove_fast(ht, obj, params, false);
1113 }
1114 
1115 /**
1116  * rhltable_remove - remove object from hash list table
1117  * @hlt:	hash list table
1118  * @list:	pointer to hash list head inside object
1119  * @params:	hash table parameters
1120  *
1121  * Since the hash chain is single linked, the removal operation needs to
1122  * walk the bucket chain upon removal. The removal operation is thus
1123  * considerable slow if the hash table is not correctly sized.
1124  *
1125  * Will automatically shrink the table if permitted when residency drops
1126  * below 30%
1127  *
1128  * Returns zero on success, -ENOENT if the entry could not be found.
1129  */
1130 static inline int rhltable_remove(
1131 	struct rhltable *hlt, struct rhlist_head *list,
1132 	const struct rhashtable_params params)
1133 {
1134 	return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
1135 }
1136 
1137 /* Internal function, please use rhashtable_replace_fast() instead */
1138 static inline int __rhashtable_replace_fast(
1139 	struct rhashtable *ht, struct bucket_table *tbl,
1140 	struct rhash_head *obj_old, struct rhash_head *obj_new,
1141 	const struct rhashtable_params params)
1142 {
1143 	struct rhash_lock_head __rcu **bkt;
1144 	struct rhash_head __rcu **pprev;
1145 	struct rhash_head *he;
1146 	unsigned int hash;
1147 	int err = -ENOENT;
1148 
1149 	/* Minimally, the old and new objects must have same hash
1150 	 * (which should mean identifiers are the same).
1151 	 */
1152 	hash = rht_head_hashfn(ht, tbl, obj_old, params);
1153 	if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
1154 		return -EINVAL;
1155 
1156 	bkt = rht_bucket_var(tbl, hash);
1157 	if (!bkt)
1158 		return -ENOENT;
1159 
1160 	pprev = NULL;
1161 	rht_lock(tbl, bkt);
1162 
1163 	rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
1164 		if (he != obj_old) {
1165 			pprev = &he->next;
1166 			continue;
1167 		}
1168 
1169 		rcu_assign_pointer(obj_new->next, obj_old->next);
1170 		if (pprev) {
1171 			rcu_assign_pointer(*pprev, obj_new);
1172 			rht_unlock(tbl, bkt);
1173 		} else {
1174 			rht_assign_unlock(tbl, bkt, obj_new);
1175 		}
1176 		err = 0;
1177 		goto unlocked;
1178 	}
1179 
1180 	rht_unlock(tbl, bkt);
1181 
1182 unlocked:
1183 	return err;
1184 }
1185 
1186 /**
1187  * rhashtable_replace_fast - replace an object in hash table
1188  * @ht:		hash table
1189  * @obj_old:	pointer to hash head inside object being replaced
1190  * @obj_new:	pointer to hash head inside object which is new
1191  * @params:	hash table parameters
1192  *
1193  * Replacing an object doesn't affect the number of elements in the hash table
1194  * or bucket, so we don't need to worry about shrinking or expanding the
1195  * table here.
1196  *
1197  * Returns zero on success, -ENOENT if the entry could not be found,
1198  * -EINVAL if hash is not the same for the old and new objects.
1199  */
1200 static inline int rhashtable_replace_fast(
1201 	struct rhashtable *ht, struct rhash_head *obj_old,
1202 	struct rhash_head *obj_new,
1203 	const struct rhashtable_params params)
1204 {
1205 	struct bucket_table *tbl;
1206 	int err;
1207 
1208 	rcu_read_lock();
1209 
1210 	tbl = rht_dereference_rcu(ht->tbl, ht);
1211 
1212 	/* Because we have already taken (and released) the bucket
1213 	 * lock in old_tbl, if we find that future_tbl is not yet
1214 	 * visible then that guarantees the entry to still be in
1215 	 * the old tbl if it exists.
1216 	 */
1217 	while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
1218 						obj_new, params)) &&
1219 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1220 		;
1221 
1222 	rcu_read_unlock();
1223 
1224 	return err;
1225 }
1226 
1227 /**
1228  * rhltable_walk_enter - Initialise an iterator
1229  * @hlt:	Table to walk over
1230  * @iter:	Hash table Iterator
1231  *
1232  * This function prepares a hash table walk.
1233  *
1234  * Note that if you restart a walk after rhashtable_walk_stop you
1235  * may see the same object twice.  Also, you may miss objects if
1236  * there are removals in between rhashtable_walk_stop and the next
1237  * call to rhashtable_walk_start.
1238  *
1239  * For a completely stable walk you should construct your own data
1240  * structure outside the hash table.
1241  *
1242  * This function may be called from any process context, including
1243  * non-preemptable context, but cannot be called from softirq or
1244  * hardirq context.
1245  *
1246  * You must call rhashtable_walk_exit after this function returns.
1247  */
1248 static inline void rhltable_walk_enter(struct rhltable *hlt,
1249 				       struct rhashtable_iter *iter)
1250 {
1251 	return rhashtable_walk_enter(&hlt->ht, iter);
1252 }
1253 
1254 /**
1255  * rhltable_free_and_destroy - free elements and destroy hash list table
1256  * @hlt:	the hash list table to destroy
1257  * @free_fn:	callback to release resources of element
1258  * @arg:	pointer passed to free_fn
1259  *
1260  * See documentation for rhashtable_free_and_destroy.
1261  */
1262 static inline void rhltable_free_and_destroy(struct rhltable *hlt,
1263 					     void (*free_fn)(void *ptr,
1264 							     void *arg),
1265 					     void *arg)
1266 {
1267 	return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
1268 }
1269 
1270 static inline void rhltable_destroy(struct rhltable *hlt)
1271 {
1272 	return rhltable_free_and_destroy(hlt, NULL, NULL);
1273 }
1274 
1275 #endif /* _LINUX_RHASHTABLE_H */
1276