xref: /linux/include/linux/rhashtable.h (revision 1f2367a39f17bd553a75e179a747f9b257bc9478)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Resizable, Scalable, Concurrent Hash Table
4  *
5  * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
6  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8  *
9  * Code partially derived from nft_hash
10  * Rewritten with rehash code from br_multicast plus single list
11  * pointer as suggested by Josh Triplett
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License version 2 as
15  * published by the Free Software Foundation.
16  */
17 
18 #ifndef _LINUX_RHASHTABLE_H
19 #define _LINUX_RHASHTABLE_H
20 
21 #include <linux/err.h>
22 #include <linux/errno.h>
23 #include <linux/jhash.h>
24 #include <linux/list_nulls.h>
25 #include <linux/workqueue.h>
26 #include <linux/rculist.h>
27 
28 #include <linux/rhashtable-types.h>
29 /*
30  * The end of the chain is marked with a special nulls marks which has
31  * the least significant bit set.
32  */
33 
34 /* Maximum chain length before rehash
35  *
36  * The maximum (not average) chain length grows with the size of the hash
37  * table, at a rate of (log N)/(log log N).
38  *
39  * The value of 16 is selected so that even if the hash table grew to
40  * 2^32 you would not expect the maximum chain length to exceed it
41  * unless we are under attack (or extremely unlucky).
42  *
43  * As this limit is only to detect attacks, we don't need to set it to a
44  * lower value as you'd need the chain length to vastly exceed 16 to have
45  * any real effect on the system.
46  */
47 #define RHT_ELASTICITY	16u
48 
49 /**
50  * struct bucket_table - Table of hash buckets
51  * @size: Number of hash buckets
52  * @nest: Number of bits of first-level nested table.
53  * @rehash: Current bucket being rehashed
54  * @hash_rnd: Random seed to fold into hash
55  * @locks_mask: Mask to apply before accessing locks[]
56  * @locks: Array of spinlocks protecting individual buckets
57  * @walkers: List of active walkers
58  * @rcu: RCU structure for freeing the table
59  * @future_tbl: Table under construction during rehashing
60  * @ntbl: Nested table used when out of memory.
61  * @buckets: size * hash buckets
62  */
63 struct bucket_table {
64 	unsigned int		size;
65 	unsigned int		nest;
66 	unsigned int		rehash;
67 	u32			hash_rnd;
68 	unsigned int		locks_mask;
69 	spinlock_t		*locks;
70 	struct list_head	walkers;
71 	struct rcu_head		rcu;
72 
73 	struct bucket_table __rcu *future_tbl;
74 
75 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
76 };
77 
78 /*
79  * NULLS_MARKER() expects a hash value with the low
80  * bits mostly likely to be significant, and it discards
81  * the msb.
82  * We git it an address, in which the bottom 2 bits are
83  * always 0, and the msb might be significant.
84  * So we shift the address down one bit to align with
85  * expectations and avoid losing a significant bit.
86  */
87 #define	RHT_NULLS_MARKER(ptr)	\
88 	((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
89 #define INIT_RHT_NULLS_HEAD(ptr)	\
90 	((ptr) = RHT_NULLS_MARKER(&(ptr)))
91 
92 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
93 {
94 	return ((unsigned long) ptr & 1);
95 }
96 
97 static inline void *rht_obj(const struct rhashtable *ht,
98 			    const struct rhash_head *he)
99 {
100 	return (char *)he - ht->p.head_offset;
101 }
102 
103 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
104 					    unsigned int hash)
105 {
106 	return hash & (tbl->size - 1);
107 }
108 
109 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
110 	const void *key, const struct rhashtable_params params,
111 	unsigned int hash_rnd)
112 {
113 	unsigned int hash;
114 
115 	/* params must be equal to ht->p if it isn't constant. */
116 	if (!__builtin_constant_p(params.key_len))
117 		hash = ht->p.hashfn(key, ht->key_len, hash_rnd);
118 	else if (params.key_len) {
119 		unsigned int key_len = params.key_len;
120 
121 		if (params.hashfn)
122 			hash = params.hashfn(key, key_len, hash_rnd);
123 		else if (key_len & (sizeof(u32) - 1))
124 			hash = jhash(key, key_len, hash_rnd);
125 		else
126 			hash = jhash2(key, key_len / sizeof(u32), hash_rnd);
127 	} else {
128 		unsigned int key_len = ht->p.key_len;
129 
130 		if (params.hashfn)
131 			hash = params.hashfn(key, key_len, hash_rnd);
132 		else
133 			hash = jhash(key, key_len, hash_rnd);
134 	}
135 
136 	return hash;
137 }
138 
139 static inline unsigned int rht_key_hashfn(
140 	struct rhashtable *ht, const struct bucket_table *tbl,
141 	const void *key, const struct rhashtable_params params)
142 {
143 	unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd);
144 
145 	return rht_bucket_index(tbl, hash);
146 }
147 
148 static inline unsigned int rht_head_hashfn(
149 	struct rhashtable *ht, const struct bucket_table *tbl,
150 	const struct rhash_head *he, const struct rhashtable_params params)
151 {
152 	const char *ptr = rht_obj(ht, he);
153 
154 	return likely(params.obj_hashfn) ?
155 	       rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
156 							    ht->p.key_len,
157 						       tbl->hash_rnd)) :
158 	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
159 }
160 
161 /**
162  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
163  * @ht:		hash table
164  * @tbl:	current table
165  */
166 static inline bool rht_grow_above_75(const struct rhashtable *ht,
167 				     const struct bucket_table *tbl)
168 {
169 	/* Expand table when exceeding 75% load */
170 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
171 	       (!ht->p.max_size || tbl->size < ht->p.max_size);
172 }
173 
174 /**
175  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
176  * @ht:		hash table
177  * @tbl:	current table
178  */
179 static inline bool rht_shrink_below_30(const struct rhashtable *ht,
180 				       const struct bucket_table *tbl)
181 {
182 	/* Shrink table beneath 30% load */
183 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
184 	       tbl->size > ht->p.min_size;
185 }
186 
187 /**
188  * rht_grow_above_100 - returns true if nelems > table-size
189  * @ht:		hash table
190  * @tbl:	current table
191  */
192 static inline bool rht_grow_above_100(const struct rhashtable *ht,
193 				      const struct bucket_table *tbl)
194 {
195 	return atomic_read(&ht->nelems) > tbl->size &&
196 		(!ht->p.max_size || tbl->size < ht->p.max_size);
197 }
198 
199 /**
200  * rht_grow_above_max - returns true if table is above maximum
201  * @ht:		hash table
202  * @tbl:	current table
203  */
204 static inline bool rht_grow_above_max(const struct rhashtable *ht,
205 				      const struct bucket_table *tbl)
206 {
207 	return atomic_read(&ht->nelems) >= ht->max_elems;
208 }
209 
210 /* The bucket lock is selected based on the hash and protects mutations
211  * on a group of hash buckets.
212  *
213  * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
214  * a single lock always covers both buckets which may both contains
215  * entries which link to the same bucket of the old table during resizing.
216  * This allows to simplify the locking as locking the bucket in both
217  * tables during resize always guarantee protection.
218  *
219  * IMPORTANT: When holding the bucket lock of both the old and new table
220  * during expansions and shrinking, the old bucket lock must always be
221  * acquired first.
222  */
223 static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
224 					  unsigned int hash)
225 {
226 	return &tbl->locks[hash & tbl->locks_mask];
227 }
228 
229 #ifdef CONFIG_PROVE_LOCKING
230 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
231 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
232 #else
233 static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
234 {
235 	return 1;
236 }
237 
238 static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
239 					     u32 hash)
240 {
241 	return 1;
242 }
243 #endif /* CONFIG_PROVE_LOCKING */
244 
245 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
246 			     struct rhash_head *obj);
247 
248 void rhashtable_walk_enter(struct rhashtable *ht,
249 			   struct rhashtable_iter *iter);
250 void rhashtable_walk_exit(struct rhashtable_iter *iter);
251 int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU);
252 
253 static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
254 {
255 	(void)rhashtable_walk_start_check(iter);
256 }
257 
258 void *rhashtable_walk_next(struct rhashtable_iter *iter);
259 void *rhashtable_walk_peek(struct rhashtable_iter *iter);
260 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
261 
262 void rhashtable_free_and_destroy(struct rhashtable *ht,
263 				 void (*free_fn)(void *ptr, void *arg),
264 				 void *arg);
265 void rhashtable_destroy(struct rhashtable *ht);
266 
267 struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
268 					    unsigned int hash);
269 struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
270 						   struct bucket_table *tbl,
271 						   unsigned int hash);
272 
273 #define rht_dereference(p, ht) \
274 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
275 
276 #define rht_dereference_rcu(p, ht) \
277 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
278 
279 #define rht_dereference_bucket(p, tbl, hash) \
280 	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
281 
282 #define rht_dereference_bucket_rcu(p, tbl, hash) \
283 	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
284 
285 #define rht_entry(tpos, pos, member) \
286 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
287 
288 static inline struct rhash_head __rcu *const *rht_bucket(
289 	const struct bucket_table *tbl, unsigned int hash)
290 {
291 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
292 				     &tbl->buckets[hash];
293 }
294 
295 static inline struct rhash_head __rcu **rht_bucket_var(
296 	struct bucket_table *tbl, unsigned int hash)
297 {
298 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
299 				     &tbl->buckets[hash];
300 }
301 
302 static inline struct rhash_head __rcu **rht_bucket_insert(
303 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
304 {
305 	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
306 				     &tbl->buckets[hash];
307 }
308 
309 /**
310  * rht_for_each_continue - continue iterating over hash chain
311  * @pos:	the &struct rhash_head to use as a loop cursor.
312  * @head:	the previous &struct rhash_head to continue from
313  * @tbl:	the &struct bucket_table
314  * @hash:	the hash value / bucket index
315  */
316 #define rht_for_each_continue(pos, head, tbl, hash) \
317 	for (pos = rht_dereference_bucket(head, tbl, hash); \
318 	     !rht_is_a_nulls(pos); \
319 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
320 
321 /**
322  * rht_for_each - iterate over hash chain
323  * @pos:	the &struct rhash_head to use as a loop cursor.
324  * @tbl:	the &struct bucket_table
325  * @hash:	the hash value / bucket index
326  */
327 #define rht_for_each(pos, tbl, hash) \
328 	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
329 
330 /**
331  * rht_for_each_entry_continue - continue iterating over hash chain
332  * @tpos:	the type * to use as a loop cursor.
333  * @pos:	the &struct rhash_head to use as a loop cursor.
334  * @head:	the previous &struct rhash_head to continue from
335  * @tbl:	the &struct bucket_table
336  * @hash:	the hash value / bucket index
337  * @member:	name of the &struct rhash_head within the hashable struct.
338  */
339 #define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member)	\
340 	for (pos = rht_dereference_bucket(head, tbl, hash);		\
341 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
342 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
343 
344 /**
345  * rht_for_each_entry - iterate over hash chain of given type
346  * @tpos:	the type * to use as a loop cursor.
347  * @pos:	the &struct rhash_head to use as a loop cursor.
348  * @tbl:	the &struct bucket_table
349  * @hash:	the hash value / bucket index
350  * @member:	name of the &struct rhash_head within the hashable struct.
351  */
352 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
353 	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
354 				    tbl, hash, member)
355 
356 /**
357  * rht_for_each_entry_safe - safely iterate over hash chain of given type
358  * @tpos:	the type * to use as a loop cursor.
359  * @pos:	the &struct rhash_head to use as a loop cursor.
360  * @next:	the &struct rhash_head to use as next in loop cursor.
361  * @tbl:	the &struct bucket_table
362  * @hash:	the hash value / bucket index
363  * @member:	name of the &struct rhash_head within the hashable struct.
364  *
365  * This hash chain list-traversal primitive allows for the looped code to
366  * remove the loop cursor from the list.
367  */
368 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
369 	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
370 	     next = !rht_is_a_nulls(pos) ?				      \
371 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
372 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
373 	     pos = next,						      \
374 	     next = !rht_is_a_nulls(pos) ?				      \
375 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
376 
377 /**
378  * rht_for_each_rcu_continue - continue iterating over rcu hash chain
379  * @pos:	the &struct rhash_head to use as a loop cursor.
380  * @head:	the previous &struct rhash_head to continue from
381  * @tbl:	the &struct bucket_table
382  * @hash:	the hash value / bucket index
383  *
384  * This hash chain list-traversal primitive may safely run concurrently with
385  * the _rcu mutation primitives such as rhashtable_insert() as long as the
386  * traversal is guarded by rcu_read_lock().
387  */
388 #define rht_for_each_rcu_continue(pos, head, tbl, hash)			\
389 	for (({barrier(); }),						\
390 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
391 	     !rht_is_a_nulls(pos);					\
392 	     pos = rcu_dereference_raw(pos->next))
393 
394 /**
395  * rht_for_each_rcu - iterate over rcu hash chain
396  * @pos:	the &struct rhash_head to use as a loop cursor.
397  * @tbl:	the &struct bucket_table
398  * @hash:	the hash value / bucket index
399  *
400  * This hash chain list-traversal primitive may safely run concurrently with
401  * the _rcu mutation primitives such as rhashtable_insert() as long as the
402  * traversal is guarded by rcu_read_lock().
403  */
404 #define rht_for_each_rcu(pos, tbl, hash)				\
405 	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
406 
407 /**
408  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
409  * @tpos:	the type * to use as a loop cursor.
410  * @pos:	the &struct rhash_head to use as a loop cursor.
411  * @head:	the previous &struct rhash_head to continue from
412  * @tbl:	the &struct bucket_table
413  * @hash:	the hash value / bucket index
414  * @member:	name of the &struct rhash_head within the hashable struct.
415  *
416  * This hash chain list-traversal primitive may safely run concurrently with
417  * the _rcu mutation primitives such as rhashtable_insert() as long as the
418  * traversal is guarded by rcu_read_lock().
419  */
420 #define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
421 	for (({barrier(); }),						    \
422 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
423 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
424 	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
425 
426 /**
427  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
428  * @tpos:	the type * to use as a loop cursor.
429  * @pos:	the &struct rhash_head to use as a loop cursor.
430  * @tbl:	the &struct bucket_table
431  * @hash:	the hash value / bucket index
432  * @member:	name of the &struct rhash_head within the hashable struct.
433  *
434  * This hash chain list-traversal primitive may safely run concurrently with
435  * the _rcu mutation primitives such as rhashtable_insert() as long as the
436  * traversal is guarded by rcu_read_lock().
437  */
438 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
439 	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
440 					tbl, hash, member)
441 
442 /**
443  * rhl_for_each_rcu - iterate over rcu hash table list
444  * @pos:	the &struct rlist_head to use as a loop cursor.
445  * @list:	the head of the list
446  *
447  * This hash chain list-traversal primitive should be used on the
448  * list returned by rhltable_lookup.
449  */
450 #define rhl_for_each_rcu(pos, list)					\
451 	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
452 
453 /**
454  * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
455  * @tpos:	the type * to use as a loop cursor.
456  * @pos:	the &struct rlist_head to use as a loop cursor.
457  * @list:	the head of the list
458  * @member:	name of the &struct rlist_head within the hashable struct.
459  *
460  * This hash chain list-traversal primitive should be used on the
461  * list returned by rhltable_lookup.
462  */
463 #define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
464 	for (pos = list; pos && rht_entry(tpos, pos, member);		\
465 	     pos = rcu_dereference_raw(pos->next))
466 
467 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
468 				     const void *obj)
469 {
470 	struct rhashtable *ht = arg->ht;
471 	const char *ptr = obj;
472 
473 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
474 }
475 
476 /* Internal function, do not use. */
477 static inline struct rhash_head *__rhashtable_lookup(
478 	struct rhashtable *ht, const void *key,
479 	const struct rhashtable_params params)
480 {
481 	struct rhashtable_compare_arg arg = {
482 		.ht = ht,
483 		.key = key,
484 	};
485 	struct rhash_head __rcu * const *head;
486 	struct bucket_table *tbl;
487 	struct rhash_head *he;
488 	unsigned int hash;
489 
490 	tbl = rht_dereference_rcu(ht->tbl, ht);
491 restart:
492 	hash = rht_key_hashfn(ht, tbl, key, params);
493 	head = rht_bucket(tbl, hash);
494 	do {
495 		rht_for_each_rcu_continue(he, *head, tbl, hash) {
496 			if (params.obj_cmpfn ?
497 			    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
498 			    rhashtable_compare(&arg, rht_obj(ht, he)))
499 				continue;
500 			return he;
501 		}
502 		/* An object might have been moved to a different hash chain,
503 		 * while we walk along it - better check and retry.
504 		 */
505 	} while (he != RHT_NULLS_MARKER(head));
506 
507 	/* Ensure we see any new tables. */
508 	smp_rmb();
509 
510 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
511 	if (unlikely(tbl))
512 		goto restart;
513 
514 	return NULL;
515 }
516 
517 /**
518  * rhashtable_lookup - search hash table
519  * @ht:		hash table
520  * @key:	the pointer to the key
521  * @params:	hash table parameters
522  *
523  * Computes the hash value for the key and traverses the bucket chain looking
524  * for a entry with an identical key. The first matching entry is returned.
525  *
526  * This must only be called under the RCU read lock.
527  *
528  * Returns the first entry on which the compare function returned true.
529  */
530 static inline void *rhashtable_lookup(
531 	struct rhashtable *ht, const void *key,
532 	const struct rhashtable_params params)
533 {
534 	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
535 
536 	return he ? rht_obj(ht, he) : NULL;
537 }
538 
539 /**
540  * rhashtable_lookup_fast - search hash table, without RCU read lock
541  * @ht:		hash table
542  * @key:	the pointer to the key
543  * @params:	hash table parameters
544  *
545  * Computes the hash value for the key and traverses the bucket chain looking
546  * for a entry with an identical key. The first matching entry is returned.
547  *
548  * Only use this function when you have other mechanisms guaranteeing
549  * that the object won't go away after the RCU read lock is released.
550  *
551  * Returns the first entry on which the compare function returned true.
552  */
553 static inline void *rhashtable_lookup_fast(
554 	struct rhashtable *ht, const void *key,
555 	const struct rhashtable_params params)
556 {
557 	void *obj;
558 
559 	rcu_read_lock();
560 	obj = rhashtable_lookup(ht, key, params);
561 	rcu_read_unlock();
562 
563 	return obj;
564 }
565 
566 /**
567  * rhltable_lookup - search hash list table
568  * @hlt:	hash table
569  * @key:	the pointer to the key
570  * @params:	hash table parameters
571  *
572  * Computes the hash value for the key and traverses the bucket chain looking
573  * for a entry with an identical key.  All matching entries are returned
574  * in a list.
575  *
576  * This must only be called under the RCU read lock.
577  *
578  * Returns the list of entries that match the given key.
579  */
580 static inline struct rhlist_head *rhltable_lookup(
581 	struct rhltable *hlt, const void *key,
582 	const struct rhashtable_params params)
583 {
584 	struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
585 
586 	return he ? container_of(he, struct rhlist_head, rhead) : NULL;
587 }
588 
589 /* Internal function, please use rhashtable_insert_fast() instead. This
590  * function returns the existing element already in hashes in there is a clash,
591  * otherwise it returns an error via ERR_PTR().
592  */
593 static inline void *__rhashtable_insert_fast(
594 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
595 	const struct rhashtable_params params, bool rhlist)
596 {
597 	struct rhashtable_compare_arg arg = {
598 		.ht = ht,
599 		.key = key,
600 	};
601 	struct rhash_head __rcu **pprev;
602 	struct bucket_table *tbl;
603 	struct rhash_head *head;
604 	spinlock_t *lock;
605 	unsigned int hash;
606 	int elasticity;
607 	void *data;
608 
609 	rcu_read_lock();
610 
611 	tbl = rht_dereference_rcu(ht->tbl, ht);
612 	hash = rht_head_hashfn(ht, tbl, obj, params);
613 	lock = rht_bucket_lock(tbl, hash);
614 	spin_lock_bh(lock);
615 
616 	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
617 slow_path:
618 		spin_unlock_bh(lock);
619 		rcu_read_unlock();
620 		return rhashtable_insert_slow(ht, key, obj);
621 	}
622 
623 	elasticity = RHT_ELASTICITY;
624 	pprev = rht_bucket_insert(ht, tbl, hash);
625 	data = ERR_PTR(-ENOMEM);
626 	if (!pprev)
627 		goto out;
628 
629 	rht_for_each_continue(head, *pprev, tbl, hash) {
630 		struct rhlist_head *plist;
631 		struct rhlist_head *list;
632 
633 		elasticity--;
634 		if (!key ||
635 		    (params.obj_cmpfn ?
636 		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
637 		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
638 			pprev = &head->next;
639 			continue;
640 		}
641 
642 		data = rht_obj(ht, head);
643 
644 		if (!rhlist)
645 			goto out;
646 
647 
648 		list = container_of(obj, struct rhlist_head, rhead);
649 		plist = container_of(head, struct rhlist_head, rhead);
650 
651 		RCU_INIT_POINTER(list->next, plist);
652 		head = rht_dereference_bucket(head->next, tbl, hash);
653 		RCU_INIT_POINTER(list->rhead.next, head);
654 		rcu_assign_pointer(*pprev, obj);
655 
656 		goto good;
657 	}
658 
659 	if (elasticity <= 0)
660 		goto slow_path;
661 
662 	data = ERR_PTR(-E2BIG);
663 	if (unlikely(rht_grow_above_max(ht, tbl)))
664 		goto out;
665 
666 	if (unlikely(rht_grow_above_100(ht, tbl)))
667 		goto slow_path;
668 
669 	head = rht_dereference_bucket(*pprev, tbl, hash);
670 
671 	RCU_INIT_POINTER(obj->next, head);
672 	if (rhlist) {
673 		struct rhlist_head *list;
674 
675 		list = container_of(obj, struct rhlist_head, rhead);
676 		RCU_INIT_POINTER(list->next, NULL);
677 	}
678 
679 	rcu_assign_pointer(*pprev, obj);
680 
681 	atomic_inc(&ht->nelems);
682 	if (rht_grow_above_75(ht, tbl))
683 		schedule_work(&ht->run_work);
684 
685 good:
686 	data = NULL;
687 
688 out:
689 	spin_unlock_bh(lock);
690 	rcu_read_unlock();
691 
692 	return data;
693 }
694 
695 /**
696  * rhashtable_insert_fast - insert object into hash table
697  * @ht:		hash table
698  * @obj:	pointer to hash head inside object
699  * @params:	hash table parameters
700  *
701  * Will take a per bucket spinlock to protect against mutual mutations
702  * on the same bucket. Multiple insertions may occur in parallel unless
703  * they map to the same bucket lock.
704  *
705  * It is safe to call this function from atomic context.
706  *
707  * Will trigger an automatic deferred table resizing if residency in the
708  * table grows beyond 70%.
709  */
710 static inline int rhashtable_insert_fast(
711 	struct rhashtable *ht, struct rhash_head *obj,
712 	const struct rhashtable_params params)
713 {
714 	void *ret;
715 
716 	ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
717 	if (IS_ERR(ret))
718 		return PTR_ERR(ret);
719 
720 	return ret == NULL ? 0 : -EEXIST;
721 }
722 
723 /**
724  * rhltable_insert_key - insert object into hash list table
725  * @hlt:	hash list table
726  * @key:	the pointer to the key
727  * @list:	pointer to hash list head inside object
728  * @params:	hash table parameters
729  *
730  * Will take a per bucket spinlock to protect against mutual mutations
731  * on the same bucket. Multiple insertions may occur in parallel unless
732  * they map to the same bucket lock.
733  *
734  * It is safe to call this function from atomic context.
735  *
736  * Will trigger an automatic deferred table resizing if residency in the
737  * table grows beyond 70%.
738  */
739 static inline int rhltable_insert_key(
740 	struct rhltable *hlt, const void *key, struct rhlist_head *list,
741 	const struct rhashtable_params params)
742 {
743 	return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
744 						params, true));
745 }
746 
747 /**
748  * rhltable_insert - insert object into hash list table
749  * @hlt:	hash list table
750  * @list:	pointer to hash list head inside object
751  * @params:	hash table parameters
752  *
753  * Will take a per bucket spinlock to protect against mutual mutations
754  * on the same bucket. Multiple insertions may occur in parallel unless
755  * they map to the same bucket lock.
756  *
757  * It is safe to call this function from atomic context.
758  *
759  * Will trigger an automatic deferred table resizing if residency in the
760  * table grows beyond 70%.
761  */
762 static inline int rhltable_insert(
763 	struct rhltable *hlt, struct rhlist_head *list,
764 	const struct rhashtable_params params)
765 {
766 	const char *key = rht_obj(&hlt->ht, &list->rhead);
767 
768 	key += params.key_offset;
769 
770 	return rhltable_insert_key(hlt, key, list, params);
771 }
772 
773 /**
774  * rhashtable_lookup_insert_fast - lookup and insert object into hash table
775  * @ht:		hash table
776  * @obj:	pointer to hash head inside object
777  * @params:	hash table parameters
778  *
779  * Locks down the bucket chain in both the old and new table if a resize
780  * is in progress to ensure that writers can't remove from the old table
781  * and can't insert to the new table during the atomic operation of search
782  * and insertion. Searches for duplicates in both the old and new table if
783  * a resize is in progress.
784  *
785  * This lookup function may only be used for fixed key hash table (key_len
786  * parameter set). It will BUG() if used inappropriately.
787  *
788  * It is safe to call this function from atomic context.
789  *
790  * Will trigger an automatic deferred table resizing if residency in the
791  * table grows beyond 70%.
792  */
793 static inline int rhashtable_lookup_insert_fast(
794 	struct rhashtable *ht, struct rhash_head *obj,
795 	const struct rhashtable_params params)
796 {
797 	const char *key = rht_obj(ht, obj);
798 	void *ret;
799 
800 	BUG_ON(ht->p.obj_hashfn);
801 
802 	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
803 				       false);
804 	if (IS_ERR(ret))
805 		return PTR_ERR(ret);
806 
807 	return ret == NULL ? 0 : -EEXIST;
808 }
809 
810 /**
811  * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
812  * @ht:		hash table
813  * @obj:	pointer to hash head inside object
814  * @params:	hash table parameters
815  *
816  * Just like rhashtable_lookup_insert_fast(), but this function returns the
817  * object if it exists, NULL if it did not and the insertion was successful,
818  * and an ERR_PTR otherwise.
819  */
820 static inline void *rhashtable_lookup_get_insert_fast(
821 	struct rhashtable *ht, struct rhash_head *obj,
822 	const struct rhashtable_params params)
823 {
824 	const char *key = rht_obj(ht, obj);
825 
826 	BUG_ON(ht->p.obj_hashfn);
827 
828 	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
829 					false);
830 }
831 
832 /**
833  * rhashtable_lookup_insert_key - search and insert object to hash table
834  *				  with explicit key
835  * @ht:		hash table
836  * @key:	key
837  * @obj:	pointer to hash head inside object
838  * @params:	hash table parameters
839  *
840  * Locks down the bucket chain in both the old and new table if a resize
841  * is in progress to ensure that writers can't remove from the old table
842  * and can't insert to the new table during the atomic operation of search
843  * and insertion. Searches for duplicates in both the old and new table if
844  * a resize is in progress.
845  *
846  * Lookups may occur in parallel with hashtable mutations and resizing.
847  *
848  * Will trigger an automatic deferred table resizing if residency in the
849  * table grows beyond 70%.
850  *
851  * Returns zero on success.
852  */
853 static inline int rhashtable_lookup_insert_key(
854 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
855 	const struct rhashtable_params params)
856 {
857 	void *ret;
858 
859 	BUG_ON(!ht->p.obj_hashfn || !key);
860 
861 	ret = __rhashtable_insert_fast(ht, key, obj, params, false);
862 	if (IS_ERR(ret))
863 		return PTR_ERR(ret);
864 
865 	return ret == NULL ? 0 : -EEXIST;
866 }
867 
868 /**
869  * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
870  * @ht:		hash table
871  * @obj:	pointer to hash head inside object
872  * @params:	hash table parameters
873  * @data:	pointer to element data already in hashes
874  *
875  * Just like rhashtable_lookup_insert_key(), but this function returns the
876  * object if it exists, NULL if it does not and the insertion was successful,
877  * and an ERR_PTR otherwise.
878  */
879 static inline void *rhashtable_lookup_get_insert_key(
880 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
881 	const struct rhashtable_params params)
882 {
883 	BUG_ON(!ht->p.obj_hashfn || !key);
884 
885 	return __rhashtable_insert_fast(ht, key, obj, params, false);
886 }
887 
888 /* Internal function, please use rhashtable_remove_fast() instead */
889 static inline int __rhashtable_remove_fast_one(
890 	struct rhashtable *ht, struct bucket_table *tbl,
891 	struct rhash_head *obj, const struct rhashtable_params params,
892 	bool rhlist)
893 {
894 	struct rhash_head __rcu **pprev;
895 	struct rhash_head *he;
896 	spinlock_t * lock;
897 	unsigned int hash;
898 	int err = -ENOENT;
899 
900 	hash = rht_head_hashfn(ht, tbl, obj, params);
901 	lock = rht_bucket_lock(tbl, hash);
902 
903 	spin_lock_bh(lock);
904 
905 	pprev = rht_bucket_var(tbl, hash);
906 	rht_for_each_continue(he, *pprev, tbl, hash) {
907 		struct rhlist_head *list;
908 
909 		list = container_of(he, struct rhlist_head, rhead);
910 
911 		if (he != obj) {
912 			struct rhlist_head __rcu **lpprev;
913 
914 			pprev = &he->next;
915 
916 			if (!rhlist)
917 				continue;
918 
919 			do {
920 				lpprev = &list->next;
921 				list = rht_dereference_bucket(list->next,
922 							      tbl, hash);
923 			} while (list && obj != &list->rhead);
924 
925 			if (!list)
926 				continue;
927 
928 			list = rht_dereference_bucket(list->next, tbl, hash);
929 			RCU_INIT_POINTER(*lpprev, list);
930 			err = 0;
931 			break;
932 		}
933 
934 		obj = rht_dereference_bucket(obj->next, tbl, hash);
935 		err = 1;
936 
937 		if (rhlist) {
938 			list = rht_dereference_bucket(list->next, tbl, hash);
939 			if (list) {
940 				RCU_INIT_POINTER(list->rhead.next, obj);
941 				obj = &list->rhead;
942 				err = 0;
943 			}
944 		}
945 
946 		rcu_assign_pointer(*pprev, obj);
947 		break;
948 	}
949 
950 	spin_unlock_bh(lock);
951 
952 	if (err > 0) {
953 		atomic_dec(&ht->nelems);
954 		if (unlikely(ht->p.automatic_shrinking &&
955 			     rht_shrink_below_30(ht, tbl)))
956 			schedule_work(&ht->run_work);
957 		err = 0;
958 	}
959 
960 	return err;
961 }
962 
963 /* Internal function, please use rhashtable_remove_fast() instead */
964 static inline int __rhashtable_remove_fast(
965 	struct rhashtable *ht, struct rhash_head *obj,
966 	const struct rhashtable_params params, bool rhlist)
967 {
968 	struct bucket_table *tbl;
969 	int err;
970 
971 	rcu_read_lock();
972 
973 	tbl = rht_dereference_rcu(ht->tbl, ht);
974 
975 	/* Because we have already taken (and released) the bucket
976 	 * lock in old_tbl, if we find that future_tbl is not yet
977 	 * visible then that guarantees the entry to still be in
978 	 * the old tbl if it exists.
979 	 */
980 	while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
981 						   rhlist)) &&
982 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
983 		;
984 
985 	rcu_read_unlock();
986 
987 	return err;
988 }
989 
990 /**
991  * rhashtable_remove_fast - remove object from hash table
992  * @ht:		hash table
993  * @obj:	pointer to hash head inside object
994  * @params:	hash table parameters
995  *
996  * Since the hash chain is single linked, the removal operation needs to
997  * walk the bucket chain upon removal. The removal operation is thus
998  * considerable slow if the hash table is not correctly sized.
999  *
1000  * Will automatically shrink the table if permitted when residency drops
1001  * below 30%.
1002  *
1003  * Returns zero on success, -ENOENT if the entry could not be found.
1004  */
1005 static inline int rhashtable_remove_fast(
1006 	struct rhashtable *ht, struct rhash_head *obj,
1007 	const struct rhashtable_params params)
1008 {
1009 	return __rhashtable_remove_fast(ht, obj, params, false);
1010 }
1011 
1012 /**
1013  * rhltable_remove - remove object from hash list table
1014  * @hlt:	hash list table
1015  * @list:	pointer to hash list head inside object
1016  * @params:	hash table parameters
1017  *
1018  * Since the hash chain is single linked, the removal operation needs to
1019  * walk the bucket chain upon removal. The removal operation is thus
1020  * considerable slow if the hash table is not correctly sized.
1021  *
1022  * Will automatically shrink the table if permitted when residency drops
1023  * below 30%
1024  *
1025  * Returns zero on success, -ENOENT if the entry could not be found.
1026  */
1027 static inline int rhltable_remove(
1028 	struct rhltable *hlt, struct rhlist_head *list,
1029 	const struct rhashtable_params params)
1030 {
1031 	return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
1032 }
1033 
1034 /* Internal function, please use rhashtable_replace_fast() instead */
1035 static inline int __rhashtable_replace_fast(
1036 	struct rhashtable *ht, struct bucket_table *tbl,
1037 	struct rhash_head *obj_old, struct rhash_head *obj_new,
1038 	const struct rhashtable_params params)
1039 {
1040 	struct rhash_head __rcu **pprev;
1041 	struct rhash_head *he;
1042 	spinlock_t *lock;
1043 	unsigned int hash;
1044 	int err = -ENOENT;
1045 
1046 	/* Minimally, the old and new objects must have same hash
1047 	 * (which should mean identifiers are the same).
1048 	 */
1049 	hash = rht_head_hashfn(ht, tbl, obj_old, params);
1050 	if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
1051 		return -EINVAL;
1052 
1053 	lock = rht_bucket_lock(tbl, hash);
1054 
1055 	spin_lock_bh(lock);
1056 
1057 	pprev = rht_bucket_var(tbl, hash);
1058 	rht_for_each_continue(he, *pprev, tbl, hash) {
1059 		if (he != obj_old) {
1060 			pprev = &he->next;
1061 			continue;
1062 		}
1063 
1064 		rcu_assign_pointer(obj_new->next, obj_old->next);
1065 		rcu_assign_pointer(*pprev, obj_new);
1066 		err = 0;
1067 		break;
1068 	}
1069 
1070 	spin_unlock_bh(lock);
1071 
1072 	return err;
1073 }
1074 
1075 /**
1076  * rhashtable_replace_fast - replace an object in hash table
1077  * @ht:		hash table
1078  * @obj_old:	pointer to hash head inside object being replaced
1079  * @obj_new:	pointer to hash head inside object which is new
1080  * @params:	hash table parameters
1081  *
1082  * Replacing an object doesn't affect the number of elements in the hash table
1083  * or bucket, so we don't need to worry about shrinking or expanding the
1084  * table here.
1085  *
1086  * Returns zero on success, -ENOENT if the entry could not be found,
1087  * -EINVAL if hash is not the same for the old and new objects.
1088  */
1089 static inline int rhashtable_replace_fast(
1090 	struct rhashtable *ht, struct rhash_head *obj_old,
1091 	struct rhash_head *obj_new,
1092 	const struct rhashtable_params params)
1093 {
1094 	struct bucket_table *tbl;
1095 	int err;
1096 
1097 	rcu_read_lock();
1098 
1099 	tbl = rht_dereference_rcu(ht->tbl, ht);
1100 
1101 	/* Because we have already taken (and released) the bucket
1102 	 * lock in old_tbl, if we find that future_tbl is not yet
1103 	 * visible then that guarantees the entry to still be in
1104 	 * the old tbl if it exists.
1105 	 */
1106 	while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
1107 						obj_new, params)) &&
1108 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1109 		;
1110 
1111 	rcu_read_unlock();
1112 
1113 	return err;
1114 }
1115 
1116 /**
1117  * rhltable_walk_enter - Initialise an iterator
1118  * @hlt:	Table to walk over
1119  * @iter:	Hash table Iterator
1120  *
1121  * This function prepares a hash table walk.
1122  *
1123  * Note that if you restart a walk after rhashtable_walk_stop you
1124  * may see the same object twice.  Also, you may miss objects if
1125  * there are removals in between rhashtable_walk_stop and the next
1126  * call to rhashtable_walk_start.
1127  *
1128  * For a completely stable walk you should construct your own data
1129  * structure outside the hash table.
1130  *
1131  * This function may be called from any process context, including
1132  * non-preemptable context, but cannot be called from softirq or
1133  * hardirq context.
1134  *
1135  * You must call rhashtable_walk_exit after this function returns.
1136  */
1137 static inline void rhltable_walk_enter(struct rhltable *hlt,
1138 				       struct rhashtable_iter *iter)
1139 {
1140 	return rhashtable_walk_enter(&hlt->ht, iter);
1141 }
1142 
1143 /**
1144  * rhltable_free_and_destroy - free elements and destroy hash list table
1145  * @hlt:	the hash list table to destroy
1146  * @free_fn:	callback to release resources of element
1147  * @arg:	pointer passed to free_fn
1148  *
1149  * See documentation for rhashtable_free_and_destroy.
1150  */
1151 static inline void rhltable_free_and_destroy(struct rhltable *hlt,
1152 					     void (*free_fn)(void *ptr,
1153 							     void *arg),
1154 					     void *arg)
1155 {
1156 	return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
1157 }
1158 
1159 static inline void rhltable_destroy(struct rhltable *hlt)
1160 {
1161 	return rhltable_free_and_destroy(hlt, NULL, NULL);
1162 }
1163 
1164 #endif /* _LINUX_RHASHTABLE_H */
1165