xref: /linux/include/linux/rhashtable.h (revision fcc8487d477a3452a1d0ccbdd4c5e0e1e3cb8bed)
1 /*
2  * Resizable, Scalable, Concurrent Hash Table
3  *
4  * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
5  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
6  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
7  *
8  * Code partially derived from nft_hash
9  * Rewritten with rehash code from br_multicast plus single list
10  * pointer as suggested by Josh Triplett
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16 
17 #ifndef _LINUX_RHASHTABLE_H
18 #define _LINUX_RHASHTABLE_H
19 
20 #include <linux/atomic.h>
21 #include <linux/compiler.h>
22 #include <linux/err.h>
23 #include <linux/errno.h>
24 #include <linux/jhash.h>
25 #include <linux/list_nulls.h>
26 #include <linux/workqueue.h>
27 #include <linux/mutex.h>
28 #include <linux/rculist.h>
29 
30 /*
31  * The end of the chain is marked with a special nulls marks which has
32  * the following format:
33  *
34  * +-------+-----------------------------------------------------+-+
35  * | Base  |                      Hash                           |1|
36  * +-------+-----------------------------------------------------+-+
37  *
38  * Base (4 bits) : Reserved to distinguish between multiple tables.
39  *                 Specified via &struct rhashtable_params.nulls_base.
40  * Hash (27 bits): Full hash (unmasked) of first element added to bucket
41  * 1 (1 bit)     : Nulls marker (always set)
42  *
43  * The remaining bits of the next pointer remain unused for now.
44  */
45 #define RHT_BASE_BITS		4
46 #define RHT_HASH_BITS		27
47 #define RHT_BASE_SHIFT		RHT_HASH_BITS
48 
49 /* Base bits plus 1 bit for nulls marker */
50 #define RHT_HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
51 
52 /* Maximum chain length before rehash
53  *
54  * The maximum (not average) chain length grows with the size of the hash
55  * table, at a rate of (log N)/(log log N).
56  *
57  * The value of 16 is selected so that even if the hash table grew to
58  * 2^32 you would not expect the maximum chain length to exceed it
59  * unless we are under attack (or extremely unlucky).
60  *
61  * As this limit is only to detect attacks, we don't need to set it to a
62  * lower value as you'd need the chain length to vastly exceed 16 to have
63  * any real effect on the system.
64  */
65 #define RHT_ELASTICITY	16u
66 
67 struct rhash_head {
68 	struct rhash_head __rcu		*next;
69 };
70 
71 struct rhlist_head {
72 	struct rhash_head		rhead;
73 	struct rhlist_head __rcu	*next;
74 };
75 
76 /**
77  * struct bucket_table - Table of hash buckets
78  * @size: Number of hash buckets
79  * @nest: Number of bits of first-level nested table.
80  * @rehash: Current bucket being rehashed
81  * @hash_rnd: Random seed to fold into hash
82  * @locks_mask: Mask to apply before accessing locks[]
83  * @locks: Array of spinlocks protecting individual buckets
84  * @walkers: List of active walkers
85  * @rcu: RCU structure for freeing the table
86  * @future_tbl: Table under construction during rehashing
87  * @ntbl: Nested table used when out of memory.
88  * @buckets: size * hash buckets
89  */
90 struct bucket_table {
91 	unsigned int		size;
92 	unsigned int		nest;
93 	unsigned int		rehash;
94 	u32			hash_rnd;
95 	unsigned int		locks_mask;
96 	spinlock_t		*locks;
97 	struct list_head	walkers;
98 	struct rcu_head		rcu;
99 
100 	struct bucket_table __rcu *future_tbl;
101 
102 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
103 };
104 
105 /**
106  * struct rhashtable_compare_arg - Key for the function rhashtable_compare
107  * @ht: Hash table
108  * @key: Key to compare against
109  */
110 struct rhashtable_compare_arg {
111 	struct rhashtable *ht;
112 	const void *key;
113 };
114 
115 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
116 typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
117 typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
118 			       const void *obj);
119 
120 struct rhashtable;
121 
122 /**
123  * struct rhashtable_params - Hash table construction parameters
124  * @nelem_hint: Hint on number of elements, should be 75% of desired size
125  * @key_len: Length of key
126  * @key_offset: Offset of key in struct to be hashed
127  * @head_offset: Offset of rhash_head in struct to be hashed
128  * @max_size: Maximum size while expanding
129  * @min_size: Minimum size while shrinking
130  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
131  * @automatic_shrinking: Enable automatic shrinking of tables
132  * @nulls_base: Base value to generate nulls marker
133  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
134  * @obj_hashfn: Function to hash object
135  * @obj_cmpfn: Function to compare key with object
136  */
137 struct rhashtable_params {
138 	u16			nelem_hint;
139 	u16			key_len;
140 	u16			key_offset;
141 	u16			head_offset;
142 	unsigned int		max_size;
143 	u16			min_size;
144 	bool			automatic_shrinking;
145 	u8			locks_mul;
146 	u32			nulls_base;
147 	rht_hashfn_t		hashfn;
148 	rht_obj_hashfn_t	obj_hashfn;
149 	rht_obj_cmpfn_t		obj_cmpfn;
150 };
151 
152 /**
153  * struct rhashtable - Hash table handle
154  * @tbl: Bucket table
155  * @nelems: Number of elements in table
156  * @key_len: Key length for hashfn
157  * @p: Configuration parameters
158  * @max_elems: Maximum number of elements in table
159  * @rhlist: True if this is an rhltable
160  * @run_work: Deferred worker to expand/shrink asynchronously
161  * @mutex: Mutex to protect current/future table swapping
162  * @lock: Spin lock to protect walker list
163  */
164 struct rhashtable {
165 	struct bucket_table __rcu	*tbl;
166 	atomic_t			nelems;
167 	unsigned int			key_len;
168 	struct rhashtable_params	p;
169 	unsigned int			max_elems;
170 	bool				rhlist;
171 	struct work_struct		run_work;
172 	struct mutex                    mutex;
173 	spinlock_t			lock;
174 };
175 
176 /**
177  * struct rhltable - Hash table with duplicate objects in a list
178  * @ht: Underlying rhtable
179  */
180 struct rhltable {
181 	struct rhashtable ht;
182 };
183 
184 /**
185  * struct rhashtable_walker - Hash table walker
186  * @list: List entry on list of walkers
187  * @tbl: The table that we were walking over
188  */
189 struct rhashtable_walker {
190 	struct list_head list;
191 	struct bucket_table *tbl;
192 };
193 
194 /**
195  * struct rhashtable_iter - Hash table iterator
196  * @ht: Table to iterate through
197  * @p: Current pointer
198  * @list: Current hash list pointer
199  * @walker: Associated rhashtable walker
200  * @slot: Current slot
201  * @skip: Number of entries to skip in slot
202  */
203 struct rhashtable_iter {
204 	struct rhashtable *ht;
205 	struct rhash_head *p;
206 	struct rhlist_head *list;
207 	struct rhashtable_walker walker;
208 	unsigned int slot;
209 	unsigned int skip;
210 };
211 
212 static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
213 {
214 	return NULLS_MARKER(ht->p.nulls_base + hash);
215 }
216 
217 #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
218 	((ptr) = (typeof(ptr)) rht_marker(ht, hash))
219 
220 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
221 {
222 	return ((unsigned long) ptr & 1);
223 }
224 
225 static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
226 {
227 	return ((unsigned long) ptr) >> 1;
228 }
229 
230 static inline void *rht_obj(const struct rhashtable *ht,
231 			    const struct rhash_head *he)
232 {
233 	return (char *)he - ht->p.head_offset;
234 }
235 
236 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
237 					    unsigned int hash)
238 {
239 	return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
240 }
241 
242 static inline unsigned int rht_key_hashfn(
243 	struct rhashtable *ht, const struct bucket_table *tbl,
244 	const void *key, const struct rhashtable_params params)
245 {
246 	unsigned int hash;
247 
248 	/* params must be equal to ht->p if it isn't constant. */
249 	if (!__builtin_constant_p(params.key_len))
250 		hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
251 	else if (params.key_len) {
252 		unsigned int key_len = params.key_len;
253 
254 		if (params.hashfn)
255 			hash = params.hashfn(key, key_len, tbl->hash_rnd);
256 		else if (key_len & (sizeof(u32) - 1))
257 			hash = jhash(key, key_len, tbl->hash_rnd);
258 		else
259 			hash = jhash2(key, key_len / sizeof(u32),
260 				      tbl->hash_rnd);
261 	} else {
262 		unsigned int key_len = ht->p.key_len;
263 
264 		if (params.hashfn)
265 			hash = params.hashfn(key, key_len, tbl->hash_rnd);
266 		else
267 			hash = jhash(key, key_len, tbl->hash_rnd);
268 	}
269 
270 	return rht_bucket_index(tbl, hash);
271 }
272 
273 static inline unsigned int rht_head_hashfn(
274 	struct rhashtable *ht, const struct bucket_table *tbl,
275 	const struct rhash_head *he, const struct rhashtable_params params)
276 {
277 	const char *ptr = rht_obj(ht, he);
278 
279 	return likely(params.obj_hashfn) ?
280 	       rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
281 							    ht->p.key_len,
282 						       tbl->hash_rnd)) :
283 	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
284 }
285 
286 /**
287  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
288  * @ht:		hash table
289  * @tbl:	current table
290  */
291 static inline bool rht_grow_above_75(const struct rhashtable *ht,
292 				     const struct bucket_table *tbl)
293 {
294 	/* Expand table when exceeding 75% load */
295 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
296 	       (!ht->p.max_size || tbl->size < ht->p.max_size);
297 }
298 
299 /**
300  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
301  * @ht:		hash table
302  * @tbl:	current table
303  */
304 static inline bool rht_shrink_below_30(const struct rhashtable *ht,
305 				       const struct bucket_table *tbl)
306 {
307 	/* Shrink table beneath 30% load */
308 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
309 	       tbl->size > ht->p.min_size;
310 }
311 
312 /**
313  * rht_grow_above_100 - returns true if nelems > table-size
314  * @ht:		hash table
315  * @tbl:	current table
316  */
317 static inline bool rht_grow_above_100(const struct rhashtable *ht,
318 				      const struct bucket_table *tbl)
319 {
320 	return atomic_read(&ht->nelems) > tbl->size &&
321 		(!ht->p.max_size || tbl->size < ht->p.max_size);
322 }
323 
324 /**
325  * rht_grow_above_max - returns true if table is above maximum
326  * @ht:		hash table
327  * @tbl:	current table
328  */
329 static inline bool rht_grow_above_max(const struct rhashtable *ht,
330 				      const struct bucket_table *tbl)
331 {
332 	return atomic_read(&ht->nelems) >= ht->max_elems;
333 }
334 
335 /* The bucket lock is selected based on the hash and protects mutations
336  * on a group of hash buckets.
337  *
338  * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
339  * a single lock always covers both buckets which may both contains
340  * entries which link to the same bucket of the old table during resizing.
341  * This allows to simplify the locking as locking the bucket in both
342  * tables during resize always guarantee protection.
343  *
344  * IMPORTANT: When holding the bucket lock of both the old and new table
345  * during expansions and shrinking, the old bucket lock must always be
346  * acquired first.
347  */
348 static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
349 					  unsigned int hash)
350 {
351 	return &tbl->locks[hash & tbl->locks_mask];
352 }
353 
354 #ifdef CONFIG_PROVE_LOCKING
355 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
356 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
357 #else
358 static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
359 {
360 	return 1;
361 }
362 
363 static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
364 					     u32 hash)
365 {
366 	return 1;
367 }
368 #endif /* CONFIG_PROVE_LOCKING */
369 
370 int rhashtable_init(struct rhashtable *ht,
371 		    const struct rhashtable_params *params);
372 int rhltable_init(struct rhltable *hlt,
373 		  const struct rhashtable_params *params);
374 
375 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
376 			     struct rhash_head *obj);
377 
378 void rhashtable_walk_enter(struct rhashtable *ht,
379 			   struct rhashtable_iter *iter);
380 void rhashtable_walk_exit(struct rhashtable_iter *iter);
381 int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
382 void *rhashtable_walk_next(struct rhashtable_iter *iter);
383 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
384 
385 void rhashtable_free_and_destroy(struct rhashtable *ht,
386 				 void (*free_fn)(void *ptr, void *arg),
387 				 void *arg);
388 void rhashtable_destroy(struct rhashtable *ht);
389 
390 struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
391 					    unsigned int hash);
392 struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
393 						   struct bucket_table *tbl,
394 						   unsigned int hash);
395 
396 #define rht_dereference(p, ht) \
397 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
398 
399 #define rht_dereference_rcu(p, ht) \
400 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
401 
402 #define rht_dereference_bucket(p, tbl, hash) \
403 	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
404 
405 #define rht_dereference_bucket_rcu(p, tbl, hash) \
406 	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
407 
408 #define rht_entry(tpos, pos, member) \
409 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
410 
411 static inline struct rhash_head __rcu *const *rht_bucket(
412 	const struct bucket_table *tbl, unsigned int hash)
413 {
414 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
415 				     &tbl->buckets[hash];
416 }
417 
418 static inline struct rhash_head __rcu **rht_bucket_var(
419 	struct bucket_table *tbl, unsigned int hash)
420 {
421 	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
422 				     &tbl->buckets[hash];
423 }
424 
425 static inline struct rhash_head __rcu **rht_bucket_insert(
426 	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
427 {
428 	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
429 				     &tbl->buckets[hash];
430 }
431 
432 /**
433  * rht_for_each_continue - continue iterating over hash chain
434  * @pos:	the &struct rhash_head to use as a loop cursor.
435  * @head:	the previous &struct rhash_head to continue from
436  * @tbl:	the &struct bucket_table
437  * @hash:	the hash value / bucket index
438  */
439 #define rht_for_each_continue(pos, head, tbl, hash) \
440 	for (pos = rht_dereference_bucket(head, tbl, hash); \
441 	     !rht_is_a_nulls(pos); \
442 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
443 
444 /**
445  * rht_for_each - iterate over hash chain
446  * @pos:	the &struct rhash_head to use as a loop cursor.
447  * @tbl:	the &struct bucket_table
448  * @hash:	the hash value / bucket index
449  */
450 #define rht_for_each(pos, tbl, hash) \
451 	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
452 
453 /**
454  * rht_for_each_entry_continue - continue iterating over hash chain
455  * @tpos:	the type * to use as a loop cursor.
456  * @pos:	the &struct rhash_head to use as a loop cursor.
457  * @head:	the previous &struct rhash_head to continue from
458  * @tbl:	the &struct bucket_table
459  * @hash:	the hash value / bucket index
460  * @member:	name of the &struct rhash_head within the hashable struct.
461  */
462 #define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member)	\
463 	for (pos = rht_dereference_bucket(head, tbl, hash);		\
464 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
465 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
466 
467 /**
468  * rht_for_each_entry - iterate over hash chain of given type
469  * @tpos:	the type * to use as a loop cursor.
470  * @pos:	the &struct rhash_head to use as a loop cursor.
471  * @tbl:	the &struct bucket_table
472  * @hash:	the hash value / bucket index
473  * @member:	name of the &struct rhash_head within the hashable struct.
474  */
475 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
476 	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
477 				    tbl, hash, member)
478 
479 /**
480  * rht_for_each_entry_safe - safely iterate over hash chain of given type
481  * @tpos:	the type * to use as a loop cursor.
482  * @pos:	the &struct rhash_head to use as a loop cursor.
483  * @next:	the &struct rhash_head to use as next in loop cursor.
484  * @tbl:	the &struct bucket_table
485  * @hash:	the hash value / bucket index
486  * @member:	name of the &struct rhash_head within the hashable struct.
487  *
488  * This hash chain list-traversal primitive allows for the looped code to
489  * remove the loop cursor from the list.
490  */
491 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
492 	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
493 	     next = !rht_is_a_nulls(pos) ?				      \
494 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
495 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
496 	     pos = next,						      \
497 	     next = !rht_is_a_nulls(pos) ?				      \
498 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
499 
500 /**
501  * rht_for_each_rcu_continue - continue iterating over rcu hash chain
502  * @pos:	the &struct rhash_head to use as a loop cursor.
503  * @head:	the previous &struct rhash_head to continue from
504  * @tbl:	the &struct bucket_table
505  * @hash:	the hash value / bucket index
506  *
507  * This hash chain list-traversal primitive may safely run concurrently with
508  * the _rcu mutation primitives such as rhashtable_insert() as long as the
509  * traversal is guarded by rcu_read_lock().
510  */
511 #define rht_for_each_rcu_continue(pos, head, tbl, hash)			\
512 	for (({barrier(); }),						\
513 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
514 	     !rht_is_a_nulls(pos);					\
515 	     pos = rcu_dereference_raw(pos->next))
516 
517 /**
518  * rht_for_each_rcu - iterate over rcu hash chain
519  * @pos:	the &struct rhash_head to use as a loop cursor.
520  * @tbl:	the &struct bucket_table
521  * @hash:	the hash value / bucket index
522  *
523  * This hash chain list-traversal primitive may safely run concurrently with
524  * the _rcu mutation primitives such as rhashtable_insert() as long as the
525  * traversal is guarded by rcu_read_lock().
526  */
527 #define rht_for_each_rcu(pos, tbl, hash)				\
528 	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
529 
530 /**
531  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
532  * @tpos:	the type * to use as a loop cursor.
533  * @pos:	the &struct rhash_head to use as a loop cursor.
534  * @head:	the previous &struct rhash_head to continue from
535  * @tbl:	the &struct bucket_table
536  * @hash:	the hash value / bucket index
537  * @member:	name of the &struct rhash_head within the hashable struct.
538  *
539  * This hash chain list-traversal primitive may safely run concurrently with
540  * the _rcu mutation primitives such as rhashtable_insert() as long as the
541  * traversal is guarded by rcu_read_lock().
542  */
543 #define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
544 	for (({barrier(); }),						    \
545 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
546 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
547 	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
548 
549 /**
550  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
551  * @tpos:	the type * to use as a loop cursor.
552  * @pos:	the &struct rhash_head to use as a loop cursor.
553  * @tbl:	the &struct bucket_table
554  * @hash:	the hash value / bucket index
555  * @member:	name of the &struct rhash_head within the hashable struct.
556  *
557  * This hash chain list-traversal primitive may safely run concurrently with
558  * the _rcu mutation primitives such as rhashtable_insert() as long as the
559  * traversal is guarded by rcu_read_lock().
560  */
561 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
562 	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
563 					tbl, hash, member)
564 
565 /**
566  * rhl_for_each_rcu - iterate over rcu hash table list
567  * @pos:	the &struct rlist_head to use as a loop cursor.
568  * @list:	the head of the list
569  *
570  * This hash chain list-traversal primitive should be used on the
571  * list returned by rhltable_lookup.
572  */
573 #define rhl_for_each_rcu(pos, list)					\
574 	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
575 
576 /**
577  * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
578  * @tpos:	the type * to use as a loop cursor.
579  * @pos:	the &struct rlist_head to use as a loop cursor.
580  * @list:	the head of the list
581  * @member:	name of the &struct rlist_head within the hashable struct.
582  *
583  * This hash chain list-traversal primitive should be used on the
584  * list returned by rhltable_lookup.
585  */
586 #define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
587 	for (pos = list; pos && rht_entry(tpos, pos, member);		\
588 	     pos = rcu_dereference_raw(pos->next))
589 
590 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
591 				     const void *obj)
592 {
593 	struct rhashtable *ht = arg->ht;
594 	const char *ptr = obj;
595 
596 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
597 }
598 
599 /* Internal function, do not use. */
600 static inline struct rhash_head *__rhashtable_lookup(
601 	struct rhashtable *ht, const void *key,
602 	const struct rhashtable_params params)
603 {
604 	struct rhashtable_compare_arg arg = {
605 		.ht = ht,
606 		.key = key,
607 	};
608 	struct bucket_table *tbl;
609 	struct rhash_head *he;
610 	unsigned int hash;
611 
612 	tbl = rht_dereference_rcu(ht->tbl, ht);
613 restart:
614 	hash = rht_key_hashfn(ht, tbl, key, params);
615 	rht_for_each_rcu(he, tbl, hash) {
616 		if (params.obj_cmpfn ?
617 		    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
618 		    rhashtable_compare(&arg, rht_obj(ht, he)))
619 			continue;
620 		return he;
621 	}
622 
623 	/* Ensure we see any new tables. */
624 	smp_rmb();
625 
626 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
627 	if (unlikely(tbl))
628 		goto restart;
629 
630 	return NULL;
631 }
632 
633 /**
634  * rhashtable_lookup - search hash table
635  * @ht:		hash table
636  * @key:	the pointer to the key
637  * @params:	hash table parameters
638  *
639  * Computes the hash value for the key and traverses the bucket chain looking
640  * for a entry with an identical key. The first matching entry is returned.
641  *
642  * This must only be called under the RCU read lock.
643  *
644  * Returns the first entry on which the compare function returned true.
645  */
646 static inline void *rhashtable_lookup(
647 	struct rhashtable *ht, const void *key,
648 	const struct rhashtable_params params)
649 {
650 	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
651 
652 	return he ? rht_obj(ht, he) : NULL;
653 }
654 
655 /**
656  * rhashtable_lookup_fast - search hash table, without RCU read lock
657  * @ht:		hash table
658  * @key:	the pointer to the key
659  * @params:	hash table parameters
660  *
661  * Computes the hash value for the key and traverses the bucket chain looking
662  * for a entry with an identical key. The first matching entry is returned.
663  *
664  * Only use this function when you have other mechanisms guaranteeing
665  * that the object won't go away after the RCU read lock is released.
666  *
667  * Returns the first entry on which the compare function returned true.
668  */
669 static inline void *rhashtable_lookup_fast(
670 	struct rhashtable *ht, const void *key,
671 	const struct rhashtable_params params)
672 {
673 	void *obj;
674 
675 	rcu_read_lock();
676 	obj = rhashtable_lookup(ht, key, params);
677 	rcu_read_unlock();
678 
679 	return obj;
680 }
681 
682 /**
683  * rhltable_lookup - search hash list table
684  * @hlt:	hash table
685  * @key:	the pointer to the key
686  * @params:	hash table parameters
687  *
688  * Computes the hash value for the key and traverses the bucket chain looking
689  * for a entry with an identical key.  All matching entries are returned
690  * in a list.
691  *
692  * This must only be called under the RCU read lock.
693  *
694  * Returns the list of entries that match the given key.
695  */
696 static inline struct rhlist_head *rhltable_lookup(
697 	struct rhltable *hlt, const void *key,
698 	const struct rhashtable_params params)
699 {
700 	struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
701 
702 	return he ? container_of(he, struct rhlist_head, rhead) : NULL;
703 }
704 
705 /* Internal function, please use rhashtable_insert_fast() instead. This
706  * function returns the existing element already in hashes in there is a clash,
707  * otherwise it returns an error via ERR_PTR().
708  */
709 static inline void *__rhashtable_insert_fast(
710 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
711 	const struct rhashtable_params params, bool rhlist)
712 {
713 	struct rhashtable_compare_arg arg = {
714 		.ht = ht,
715 		.key = key,
716 	};
717 	struct rhash_head __rcu **pprev;
718 	struct bucket_table *tbl;
719 	struct rhash_head *head;
720 	spinlock_t *lock;
721 	unsigned int hash;
722 	int elasticity;
723 	void *data;
724 
725 	rcu_read_lock();
726 
727 	tbl = rht_dereference_rcu(ht->tbl, ht);
728 	hash = rht_head_hashfn(ht, tbl, obj, params);
729 	lock = rht_bucket_lock(tbl, hash);
730 	spin_lock_bh(lock);
731 
732 	if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) {
733 slow_path:
734 		spin_unlock_bh(lock);
735 		rcu_read_unlock();
736 		return rhashtable_insert_slow(ht, key, obj);
737 	}
738 
739 	elasticity = RHT_ELASTICITY;
740 	pprev = rht_bucket_insert(ht, tbl, hash);
741 	data = ERR_PTR(-ENOMEM);
742 	if (!pprev)
743 		goto out;
744 
745 	rht_for_each_continue(head, *pprev, tbl, hash) {
746 		struct rhlist_head *plist;
747 		struct rhlist_head *list;
748 
749 		elasticity--;
750 		if (!key ||
751 		    (params.obj_cmpfn ?
752 		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
753 		     rhashtable_compare(&arg, rht_obj(ht, head))))
754 			continue;
755 
756 		data = rht_obj(ht, head);
757 
758 		if (!rhlist)
759 			goto out;
760 
761 
762 		list = container_of(obj, struct rhlist_head, rhead);
763 		plist = container_of(head, struct rhlist_head, rhead);
764 
765 		RCU_INIT_POINTER(list->next, plist);
766 		head = rht_dereference_bucket(head->next, tbl, hash);
767 		RCU_INIT_POINTER(list->rhead.next, head);
768 		rcu_assign_pointer(*pprev, obj);
769 
770 		goto good;
771 	}
772 
773 	if (elasticity <= 0)
774 		goto slow_path;
775 
776 	data = ERR_PTR(-E2BIG);
777 	if (unlikely(rht_grow_above_max(ht, tbl)))
778 		goto out;
779 
780 	if (unlikely(rht_grow_above_100(ht, tbl)))
781 		goto slow_path;
782 
783 	head = rht_dereference_bucket(*pprev, tbl, hash);
784 
785 	RCU_INIT_POINTER(obj->next, head);
786 	if (rhlist) {
787 		struct rhlist_head *list;
788 
789 		list = container_of(obj, struct rhlist_head, rhead);
790 		RCU_INIT_POINTER(list->next, NULL);
791 	}
792 
793 	rcu_assign_pointer(*pprev, obj);
794 
795 	atomic_inc(&ht->nelems);
796 	if (rht_grow_above_75(ht, tbl))
797 		schedule_work(&ht->run_work);
798 
799 good:
800 	data = NULL;
801 
802 out:
803 	spin_unlock_bh(lock);
804 	rcu_read_unlock();
805 
806 	return data;
807 }
808 
809 /**
810  * rhashtable_insert_fast - insert object into hash table
811  * @ht:		hash table
812  * @obj:	pointer to hash head inside object
813  * @params:	hash table parameters
814  *
815  * Will take a per bucket spinlock to protect against mutual mutations
816  * on the same bucket. Multiple insertions may occur in parallel unless
817  * they map to the same bucket lock.
818  *
819  * It is safe to call this function from atomic context.
820  *
821  * Will trigger an automatic deferred table resizing if the size grows
822  * beyond the watermark indicated by grow_decision() which can be passed
823  * to rhashtable_init().
824  */
825 static inline int rhashtable_insert_fast(
826 	struct rhashtable *ht, struct rhash_head *obj,
827 	const struct rhashtable_params params)
828 {
829 	void *ret;
830 
831 	ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
832 	if (IS_ERR(ret))
833 		return PTR_ERR(ret);
834 
835 	return ret == NULL ? 0 : -EEXIST;
836 }
837 
838 /**
839  * rhltable_insert_key - insert object into hash list table
840  * @hlt:	hash list table
841  * @key:	the pointer to the key
842  * @list:	pointer to hash list head inside object
843  * @params:	hash table parameters
844  *
845  * Will take a per bucket spinlock to protect against mutual mutations
846  * on the same bucket. Multiple insertions may occur in parallel unless
847  * they map to the same bucket lock.
848  *
849  * It is safe to call this function from atomic context.
850  *
851  * Will trigger an automatic deferred table resizing if the size grows
852  * beyond the watermark indicated by grow_decision() which can be passed
853  * to rhashtable_init().
854  */
855 static inline int rhltable_insert_key(
856 	struct rhltable *hlt, const void *key, struct rhlist_head *list,
857 	const struct rhashtable_params params)
858 {
859 	return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
860 						params, true));
861 }
862 
863 /**
864  * rhltable_insert - insert object into hash list table
865  * @hlt:	hash list table
866  * @list:	pointer to hash list head inside object
867  * @params:	hash table parameters
868  *
869  * Will take a per bucket spinlock to protect against mutual mutations
870  * on the same bucket. Multiple insertions may occur in parallel unless
871  * they map to the same bucket lock.
872  *
873  * It is safe to call this function from atomic context.
874  *
875  * Will trigger an automatic deferred table resizing if the size grows
876  * beyond the watermark indicated by grow_decision() which can be passed
877  * to rhashtable_init().
878  */
879 static inline int rhltable_insert(
880 	struct rhltable *hlt, struct rhlist_head *list,
881 	const struct rhashtable_params params)
882 {
883 	const char *key = rht_obj(&hlt->ht, &list->rhead);
884 
885 	key += params.key_offset;
886 
887 	return rhltable_insert_key(hlt, key, list, params);
888 }
889 
890 /**
891  * rhashtable_lookup_insert_fast - lookup and insert object into hash table
892  * @ht:		hash table
893  * @obj:	pointer to hash head inside object
894  * @params:	hash table parameters
895  *
896  * Locks down the bucket chain in both the old and new table if a resize
897  * is in progress to ensure that writers can't remove from the old table
898  * and can't insert to the new table during the atomic operation of search
899  * and insertion. Searches for duplicates in both the old and new table if
900  * a resize is in progress.
901  *
902  * This lookup function may only be used for fixed key hash table (key_len
903  * parameter set). It will BUG() if used inappropriately.
904  *
905  * It is safe to call this function from atomic context.
906  *
907  * Will trigger an automatic deferred table resizing if the size grows
908  * beyond the watermark indicated by grow_decision() which can be passed
909  * to rhashtable_init().
910  */
911 static inline int rhashtable_lookup_insert_fast(
912 	struct rhashtable *ht, struct rhash_head *obj,
913 	const struct rhashtable_params params)
914 {
915 	const char *key = rht_obj(ht, obj);
916 	void *ret;
917 
918 	BUG_ON(ht->p.obj_hashfn);
919 
920 	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
921 				       false);
922 	if (IS_ERR(ret))
923 		return PTR_ERR(ret);
924 
925 	return ret == NULL ? 0 : -EEXIST;
926 }
927 
928 /**
929  * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table
930  * @ht:		hash table
931  * @obj:	pointer to hash head inside object
932  * @params:	hash table parameters
933  *
934  * Just like rhashtable_lookup_insert_fast(), but this function returns the
935  * object if it exists, NULL if it did not and the insertion was successful,
936  * and an ERR_PTR otherwise.
937  */
938 static inline void *rhashtable_lookup_get_insert_fast(
939 	struct rhashtable *ht, struct rhash_head *obj,
940 	const struct rhashtable_params params)
941 {
942 	const char *key = rht_obj(ht, obj);
943 
944 	BUG_ON(ht->p.obj_hashfn);
945 
946 	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
947 					false);
948 }
949 
950 /**
951  * rhashtable_lookup_insert_key - search and insert object to hash table
952  *				  with explicit key
953  * @ht:		hash table
954  * @key:	key
955  * @obj:	pointer to hash head inside object
956  * @params:	hash table parameters
957  *
958  * Locks down the bucket chain in both the old and new table if a resize
959  * is in progress to ensure that writers can't remove from the old table
960  * and can't insert to the new table during the atomic operation of search
961  * and insertion. Searches for duplicates in both the old and new table if
962  * a resize is in progress.
963  *
964  * Lookups may occur in parallel with hashtable mutations and resizing.
965  *
966  * Will trigger an automatic deferred table resizing if the size grows
967  * beyond the watermark indicated by grow_decision() which can be passed
968  * to rhashtable_init().
969  *
970  * Returns zero on success.
971  */
972 static inline int rhashtable_lookup_insert_key(
973 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
974 	const struct rhashtable_params params)
975 {
976 	void *ret;
977 
978 	BUG_ON(!ht->p.obj_hashfn || !key);
979 
980 	ret = __rhashtable_insert_fast(ht, key, obj, params, false);
981 	if (IS_ERR(ret))
982 		return PTR_ERR(ret);
983 
984 	return ret == NULL ? 0 : -EEXIST;
985 }
986 
987 /**
988  * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
989  * @ht:		hash table
990  * @obj:	pointer to hash head inside object
991  * @params:	hash table parameters
992  * @data:	pointer to element data already in hashes
993  *
994  * Just like rhashtable_lookup_insert_key(), but this function returns the
995  * object if it exists, NULL if it does not and the insertion was successful,
996  * and an ERR_PTR otherwise.
997  */
998 static inline void *rhashtable_lookup_get_insert_key(
999 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
1000 	const struct rhashtable_params params)
1001 {
1002 	BUG_ON(!ht->p.obj_hashfn || !key);
1003 
1004 	return __rhashtable_insert_fast(ht, key, obj, params, false);
1005 }
1006 
1007 /* Internal function, please use rhashtable_remove_fast() instead */
1008 static inline int __rhashtable_remove_fast_one(
1009 	struct rhashtable *ht, struct bucket_table *tbl,
1010 	struct rhash_head *obj, const struct rhashtable_params params,
1011 	bool rhlist)
1012 {
1013 	struct rhash_head __rcu **pprev;
1014 	struct rhash_head *he;
1015 	spinlock_t * lock;
1016 	unsigned int hash;
1017 	int err = -ENOENT;
1018 
1019 	hash = rht_head_hashfn(ht, tbl, obj, params);
1020 	lock = rht_bucket_lock(tbl, hash);
1021 
1022 	spin_lock_bh(lock);
1023 
1024 	pprev = rht_bucket_var(tbl, hash);
1025 	rht_for_each_continue(he, *pprev, tbl, hash) {
1026 		struct rhlist_head *list;
1027 
1028 		list = container_of(he, struct rhlist_head, rhead);
1029 
1030 		if (he != obj) {
1031 			struct rhlist_head __rcu **lpprev;
1032 
1033 			pprev = &he->next;
1034 
1035 			if (!rhlist)
1036 				continue;
1037 
1038 			do {
1039 				lpprev = &list->next;
1040 				list = rht_dereference_bucket(list->next,
1041 							      tbl, hash);
1042 			} while (list && obj != &list->rhead);
1043 
1044 			if (!list)
1045 				continue;
1046 
1047 			list = rht_dereference_bucket(list->next, tbl, hash);
1048 			RCU_INIT_POINTER(*lpprev, list);
1049 			err = 0;
1050 			break;
1051 		}
1052 
1053 		obj = rht_dereference_bucket(obj->next, tbl, hash);
1054 		err = 1;
1055 
1056 		if (rhlist) {
1057 			list = rht_dereference_bucket(list->next, tbl, hash);
1058 			if (list) {
1059 				RCU_INIT_POINTER(list->rhead.next, obj);
1060 				obj = &list->rhead;
1061 				err = 0;
1062 			}
1063 		}
1064 
1065 		rcu_assign_pointer(*pprev, obj);
1066 		break;
1067 	}
1068 
1069 	spin_unlock_bh(lock);
1070 
1071 	if (err > 0) {
1072 		atomic_dec(&ht->nelems);
1073 		if (unlikely(ht->p.automatic_shrinking &&
1074 			     rht_shrink_below_30(ht, tbl)))
1075 			schedule_work(&ht->run_work);
1076 		err = 0;
1077 	}
1078 
1079 	return err;
1080 }
1081 
1082 /* Internal function, please use rhashtable_remove_fast() instead */
1083 static inline int __rhashtable_remove_fast(
1084 	struct rhashtable *ht, struct rhash_head *obj,
1085 	const struct rhashtable_params params, bool rhlist)
1086 {
1087 	struct bucket_table *tbl;
1088 	int err;
1089 
1090 	rcu_read_lock();
1091 
1092 	tbl = rht_dereference_rcu(ht->tbl, ht);
1093 
1094 	/* Because we have already taken (and released) the bucket
1095 	 * lock in old_tbl, if we find that future_tbl is not yet
1096 	 * visible then that guarantees the entry to still be in
1097 	 * the old tbl if it exists.
1098 	 */
1099 	while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
1100 						   rhlist)) &&
1101 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1102 		;
1103 
1104 	rcu_read_unlock();
1105 
1106 	return err;
1107 }
1108 
1109 /**
1110  * rhashtable_remove_fast - remove object from hash table
1111  * @ht:		hash table
1112  * @obj:	pointer to hash head inside object
1113  * @params:	hash table parameters
1114  *
1115  * Since the hash chain is single linked, the removal operation needs to
1116  * walk the bucket chain upon removal. The removal operation is thus
1117  * considerable slow if the hash table is not correctly sized.
1118  *
1119  * Will automatically shrink the table via rhashtable_expand() if the
1120  * shrink_decision function specified at rhashtable_init() returns true.
1121  *
1122  * Returns zero on success, -ENOENT if the entry could not be found.
1123  */
1124 static inline int rhashtable_remove_fast(
1125 	struct rhashtable *ht, struct rhash_head *obj,
1126 	const struct rhashtable_params params)
1127 {
1128 	return __rhashtable_remove_fast(ht, obj, params, false);
1129 }
1130 
1131 /**
1132  * rhltable_remove - remove object from hash list table
1133  * @hlt:	hash list table
1134  * @list:	pointer to hash list head inside object
1135  * @params:	hash table parameters
1136  *
1137  * Since the hash chain is single linked, the removal operation needs to
1138  * walk the bucket chain upon removal. The removal operation is thus
1139  * considerable slow if the hash table is not correctly sized.
1140  *
1141  * Will automatically shrink the table via rhashtable_expand() if the
1142  * shrink_decision function specified at rhashtable_init() returns true.
1143  *
1144  * Returns zero on success, -ENOENT if the entry could not be found.
1145  */
1146 static inline int rhltable_remove(
1147 	struct rhltable *hlt, struct rhlist_head *list,
1148 	const struct rhashtable_params params)
1149 {
1150 	return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
1151 }
1152 
1153 /* Internal function, please use rhashtable_replace_fast() instead */
1154 static inline int __rhashtable_replace_fast(
1155 	struct rhashtable *ht, struct bucket_table *tbl,
1156 	struct rhash_head *obj_old, struct rhash_head *obj_new,
1157 	const struct rhashtable_params params)
1158 {
1159 	struct rhash_head __rcu **pprev;
1160 	struct rhash_head *he;
1161 	spinlock_t *lock;
1162 	unsigned int hash;
1163 	int err = -ENOENT;
1164 
1165 	/* Minimally, the old and new objects must have same hash
1166 	 * (which should mean identifiers are the same).
1167 	 */
1168 	hash = rht_head_hashfn(ht, tbl, obj_old, params);
1169 	if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
1170 		return -EINVAL;
1171 
1172 	lock = rht_bucket_lock(tbl, hash);
1173 
1174 	spin_lock_bh(lock);
1175 
1176 	pprev = rht_bucket_var(tbl, hash);
1177 	rht_for_each_continue(he, *pprev, tbl, hash) {
1178 		if (he != obj_old) {
1179 			pprev = &he->next;
1180 			continue;
1181 		}
1182 
1183 		rcu_assign_pointer(obj_new->next, obj_old->next);
1184 		rcu_assign_pointer(*pprev, obj_new);
1185 		err = 0;
1186 		break;
1187 	}
1188 
1189 	spin_unlock_bh(lock);
1190 
1191 	return err;
1192 }
1193 
1194 /**
1195  * rhashtable_replace_fast - replace an object in hash table
1196  * @ht:		hash table
1197  * @obj_old:	pointer to hash head inside object being replaced
1198  * @obj_new:	pointer to hash head inside object which is new
1199  * @params:	hash table parameters
1200  *
1201  * Replacing an object doesn't affect the number of elements in the hash table
1202  * or bucket, so we don't need to worry about shrinking or expanding the
1203  * table here.
1204  *
1205  * Returns zero on success, -ENOENT if the entry could not be found,
1206  * -EINVAL if hash is not the same for the old and new objects.
1207  */
1208 static inline int rhashtable_replace_fast(
1209 	struct rhashtable *ht, struct rhash_head *obj_old,
1210 	struct rhash_head *obj_new,
1211 	const struct rhashtable_params params)
1212 {
1213 	struct bucket_table *tbl;
1214 	int err;
1215 
1216 	rcu_read_lock();
1217 
1218 	tbl = rht_dereference_rcu(ht->tbl, ht);
1219 
1220 	/* Because we have already taken (and released) the bucket
1221 	 * lock in old_tbl, if we find that future_tbl is not yet
1222 	 * visible then that guarantees the entry to still be in
1223 	 * the old tbl if it exists.
1224 	 */
1225 	while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
1226 						obj_new, params)) &&
1227 	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
1228 		;
1229 
1230 	rcu_read_unlock();
1231 
1232 	return err;
1233 }
1234 
1235 /* Obsolete function, do not use in new code. */
1236 static inline int rhashtable_walk_init(struct rhashtable *ht,
1237 				       struct rhashtable_iter *iter, gfp_t gfp)
1238 {
1239 	rhashtable_walk_enter(ht, iter);
1240 	return 0;
1241 }
1242 
1243 /**
1244  * rhltable_walk_enter - Initialise an iterator
1245  * @hlt:	Table to walk over
1246  * @iter:	Hash table Iterator
1247  *
1248  * This function prepares a hash table walk.
1249  *
1250  * Note that if you restart a walk after rhashtable_walk_stop you
1251  * may see the same object twice.  Also, you may miss objects if
1252  * there are removals in between rhashtable_walk_stop and the next
1253  * call to rhashtable_walk_start.
1254  *
1255  * For a completely stable walk you should construct your own data
1256  * structure outside the hash table.
1257  *
1258  * This function may sleep so you must not call it from interrupt
1259  * context or with spin locks held.
1260  *
1261  * You must call rhashtable_walk_exit after this function returns.
1262  */
1263 static inline void rhltable_walk_enter(struct rhltable *hlt,
1264 				       struct rhashtable_iter *iter)
1265 {
1266 	return rhashtable_walk_enter(&hlt->ht, iter);
1267 }
1268 
1269 /**
1270  * rhltable_free_and_destroy - free elements and destroy hash list table
1271  * @hlt:	the hash list table to destroy
1272  * @free_fn:	callback to release resources of element
1273  * @arg:	pointer passed to free_fn
1274  *
1275  * See documentation for rhashtable_free_and_destroy.
1276  */
1277 static inline void rhltable_free_and_destroy(struct rhltable *hlt,
1278 					     void (*free_fn)(void *ptr,
1279 							     void *arg),
1280 					     void *arg)
1281 {
1282 	return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
1283 }
1284 
1285 static inline void rhltable_destroy(struct rhltable *hlt)
1286 {
1287 	return rhltable_free_and_destroy(hlt, NULL, NULL);
1288 }
1289 
1290 #endif /* _LINUX_RHASHTABLE_H */
1291