17e1e7763SThomas Graf /* 27e1e7763SThomas Graf * Resizable, Scalable, Concurrent Hash Table 37e1e7763SThomas Graf * 47e1e7763SThomas Graf * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> 57e1e7763SThomas Graf * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> 67e1e7763SThomas Graf * 77e1e7763SThomas Graf * Based on the following paper: 87e1e7763SThomas Graf * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf 97e1e7763SThomas Graf * 107e1e7763SThomas Graf * Code partially derived from nft_hash 117e1e7763SThomas Graf * 127e1e7763SThomas Graf * This program is free software; you can redistribute it and/or modify 137e1e7763SThomas Graf * it under the terms of the GNU General Public License version 2 as 147e1e7763SThomas Graf * published by the Free Software Foundation. 157e1e7763SThomas Graf */ 167e1e7763SThomas Graf 177e1e7763SThomas Graf #include <linux/kernel.h> 187e1e7763SThomas Graf #include <linux/init.h> 197e1e7763SThomas Graf #include <linux/log2.h> 207e1e7763SThomas Graf #include <linux/slab.h> 217e1e7763SThomas Graf #include <linux/vmalloc.h> 227e1e7763SThomas Graf #include <linux/mm.h> 2387545899SDaniel Borkmann #include <linux/jhash.h> 247e1e7763SThomas Graf #include <linux/random.h> 257e1e7763SThomas Graf #include <linux/rhashtable.h> 267e1e7763SThomas Graf 277e1e7763SThomas Graf #define HASH_DEFAULT_SIZE 64UL 287e1e7763SThomas Graf #define HASH_MIN_SIZE 4UL 297e1e7763SThomas Graf 307e1e7763SThomas Graf #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) 317e1e7763SThomas Graf 327e1e7763SThomas Graf #ifdef CONFIG_PROVE_LOCKING 337e1e7763SThomas Graf int lockdep_rht_mutex_is_held(const struct rhashtable *ht) 347e1e7763SThomas Graf { 357b4ce235SHerbert Xu return ht->p.mutex_is_held(ht->p.parent); 367e1e7763SThomas Graf } 377e1e7763SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 387e1e7763SThomas Graf #endif 397e1e7763SThomas Graf 40c91eee56SThomas Graf static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) 417e1e7763SThomas Graf { 427e1e7763SThomas Graf return (void *) he - ht->p.head_offset; 437e1e7763SThomas Graf } 447e1e7763SThomas Graf 458d24c0b4SThomas Graf static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) 467e1e7763SThomas Graf { 478d24c0b4SThomas Graf return hash & (tbl->size - 1); 487e1e7763SThomas Graf } 497e1e7763SThomas Graf 508d24c0b4SThomas Graf static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) 518d24c0b4SThomas Graf { 528d24c0b4SThomas Graf u32 hash; 538d24c0b4SThomas Graf 548d24c0b4SThomas Graf if (unlikely(!ht->p.key_len)) 558d24c0b4SThomas Graf hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); 568d24c0b4SThomas Graf else 578d24c0b4SThomas Graf hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, 588d24c0b4SThomas Graf ht->p.hash_rnd); 598d24c0b4SThomas Graf 608d24c0b4SThomas Graf return hash; 618d24c0b4SThomas Graf } 628d24c0b4SThomas Graf 638d24c0b4SThomas Graf static u32 key_hashfn(const struct rhashtable *ht, const void *key, u32 len) 647e1e7763SThomas Graf { 657e1e7763SThomas Graf struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 668d24c0b4SThomas Graf u32 hash; 677e1e7763SThomas Graf 688d24c0b4SThomas Graf hash = ht->p.hashfn(key, len, ht->p.hash_rnd); 698d24c0b4SThomas Graf 708d24c0b4SThomas Graf return rht_bucket_index(tbl, hash); 717e1e7763SThomas Graf } 727e1e7763SThomas Graf 737e1e7763SThomas Graf static u32 head_hashfn(const struct rhashtable *ht, 748d24c0b4SThomas Graf const struct bucket_table *tbl, 758d24c0b4SThomas Graf const struct rhash_head *he) 767e1e7763SThomas Graf { 778d24c0b4SThomas Graf return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); 787e1e7763SThomas Graf } 797e1e7763SThomas Graf 806eba8224SThomas Graf static struct bucket_table *bucket_table_alloc(size_t nbuckets) 817e1e7763SThomas Graf { 827e1e7763SThomas Graf struct bucket_table *tbl; 837e1e7763SThomas Graf size_t size; 847e1e7763SThomas Graf 857e1e7763SThomas Graf size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); 866eba8224SThomas Graf tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 877e1e7763SThomas Graf if (tbl == NULL) 887e1e7763SThomas Graf tbl = vzalloc(size); 897e1e7763SThomas Graf 907e1e7763SThomas Graf if (tbl == NULL) 917e1e7763SThomas Graf return NULL; 927e1e7763SThomas Graf 937e1e7763SThomas Graf tbl->size = nbuckets; 947e1e7763SThomas Graf 957e1e7763SThomas Graf return tbl; 967e1e7763SThomas Graf } 977e1e7763SThomas Graf 987e1e7763SThomas Graf static void bucket_table_free(const struct bucket_table *tbl) 997e1e7763SThomas Graf { 1007e1e7763SThomas Graf kvfree(tbl); 1017e1e7763SThomas Graf } 1027e1e7763SThomas Graf 1037e1e7763SThomas Graf /** 1047e1e7763SThomas Graf * rht_grow_above_75 - returns true if nelems > 0.75 * table-size 1057e1e7763SThomas Graf * @ht: hash table 1067e1e7763SThomas Graf * @new_size: new table size 1077e1e7763SThomas Graf */ 1087e1e7763SThomas Graf bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) 1097e1e7763SThomas Graf { 1107e1e7763SThomas Graf /* Expand table when exceeding 75% load */ 1117e1e7763SThomas Graf return ht->nelems > (new_size / 4 * 3); 1127e1e7763SThomas Graf } 1137e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rht_grow_above_75); 1147e1e7763SThomas Graf 1157e1e7763SThomas Graf /** 1167e1e7763SThomas Graf * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size 1177e1e7763SThomas Graf * @ht: hash table 1187e1e7763SThomas Graf * @new_size: new table size 1197e1e7763SThomas Graf */ 1207e1e7763SThomas Graf bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) 1217e1e7763SThomas Graf { 1227e1e7763SThomas Graf /* Shrink table beneath 30% load */ 1237e1e7763SThomas Graf return ht->nelems < (new_size * 3 / 10); 1247e1e7763SThomas Graf } 1257e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rht_shrink_below_30); 1267e1e7763SThomas Graf 1277e1e7763SThomas Graf static void hashtable_chain_unzip(const struct rhashtable *ht, 1287e1e7763SThomas Graf const struct bucket_table *new_tbl, 1297e1e7763SThomas Graf struct bucket_table *old_tbl, size_t n) 1307e1e7763SThomas Graf { 1317e1e7763SThomas Graf struct rhash_head *he, *p, *next; 1327e1e7763SThomas Graf unsigned int h; 1337e1e7763SThomas Graf 1347e1e7763SThomas Graf /* Old bucket empty, no work needed. */ 1357e1e7763SThomas Graf p = rht_dereference(old_tbl->buckets[n], ht); 1367e1e7763SThomas Graf if (!p) 1377e1e7763SThomas Graf return; 1387e1e7763SThomas Graf 1397e1e7763SThomas Graf /* Advance the old bucket pointer one or more times until it 1407e1e7763SThomas Graf * reaches a node that doesn't hash to the same bucket as the 1417e1e7763SThomas Graf * previous node p. Call the previous node p; 1427e1e7763SThomas Graf */ 1438d24c0b4SThomas Graf h = head_hashfn(ht, new_tbl, p); 1447e1e7763SThomas Graf rht_for_each(he, p->next, ht) { 1458d24c0b4SThomas Graf if (head_hashfn(ht, new_tbl, he) != h) 1467e1e7763SThomas Graf break; 1477e1e7763SThomas Graf p = he; 1487e1e7763SThomas Graf } 1497e1e7763SThomas Graf RCU_INIT_POINTER(old_tbl->buckets[n], p->next); 1507e1e7763SThomas Graf 1517e1e7763SThomas Graf /* Find the subsequent node which does hash to the same 1527e1e7763SThomas Graf * bucket as node P, or NULL if no such node exists. 1537e1e7763SThomas Graf */ 1547e1e7763SThomas Graf next = NULL; 1557e1e7763SThomas Graf if (he) { 1567e1e7763SThomas Graf rht_for_each(he, he->next, ht) { 1578d24c0b4SThomas Graf if (head_hashfn(ht, new_tbl, he) == h) { 1587e1e7763SThomas Graf next = he; 1597e1e7763SThomas Graf break; 1607e1e7763SThomas Graf } 1617e1e7763SThomas Graf } 1627e1e7763SThomas Graf } 1637e1e7763SThomas Graf 1647e1e7763SThomas Graf /* Set p's next pointer to that subsequent node pointer, 1657e1e7763SThomas Graf * bypassing the nodes which do not hash to p's bucket 1667e1e7763SThomas Graf */ 1677e1e7763SThomas Graf RCU_INIT_POINTER(p->next, next); 1687e1e7763SThomas Graf } 1697e1e7763SThomas Graf 1707e1e7763SThomas Graf /** 1717e1e7763SThomas Graf * rhashtable_expand - Expand hash table while allowing concurrent lookups 1727e1e7763SThomas Graf * @ht: the hash table to expand 1737e1e7763SThomas Graf * 1747e1e7763SThomas Graf * A secondary bucket array is allocated and the hash entries are migrated 1757e1e7763SThomas Graf * while keeping them on both lists until the end of the RCU grace period. 1767e1e7763SThomas Graf * 1777e1e7763SThomas Graf * This function may only be called in a context where it is safe to call 1787e1e7763SThomas Graf * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 1797e1e7763SThomas Graf * 1807e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations take place. 1817e1e7763SThomas Graf * It is however valid to have concurrent lookups if they are RCU protected. 1827e1e7763SThomas Graf */ 1836eba8224SThomas Graf int rhashtable_expand(struct rhashtable *ht) 1847e1e7763SThomas Graf { 1857e1e7763SThomas Graf struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); 1867e1e7763SThomas Graf struct rhash_head *he; 1877e1e7763SThomas Graf unsigned int i, h; 1887e1e7763SThomas Graf bool complete; 1897e1e7763SThomas Graf 1907e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 1917e1e7763SThomas Graf 1927e1e7763SThomas Graf if (ht->p.max_shift && ht->shift >= ht->p.max_shift) 1937e1e7763SThomas Graf return 0; 1947e1e7763SThomas Graf 1956eba8224SThomas Graf new_tbl = bucket_table_alloc(old_tbl->size * 2); 1967e1e7763SThomas Graf if (new_tbl == NULL) 1977e1e7763SThomas Graf return -ENOMEM; 1987e1e7763SThomas Graf 1997e1e7763SThomas Graf ht->shift++; 2007e1e7763SThomas Graf 2017e1e7763SThomas Graf /* For each new bucket, search the corresponding old bucket 2020c828f2fSHerbert Xu * for the first entry that hashes to the new bucket, and 2037e1e7763SThomas Graf * link the new bucket to that entry. Since all the entries 2047e1e7763SThomas Graf * which will end up in the new bucket appear in the same 2057e1e7763SThomas Graf * old bucket, this constructs an entirely valid new hash 2067e1e7763SThomas Graf * table, but with multiple buckets "zipped" together into a 2077e1e7763SThomas Graf * single imprecise chain. 2087e1e7763SThomas Graf */ 2097e1e7763SThomas Graf for (i = 0; i < new_tbl->size; i++) { 2108d24c0b4SThomas Graf h = rht_bucket_index(old_tbl, i); 2117e1e7763SThomas Graf rht_for_each(he, old_tbl->buckets[h], ht) { 2128d24c0b4SThomas Graf if (head_hashfn(ht, new_tbl, he) == i) { 2137e1e7763SThomas Graf RCU_INIT_POINTER(new_tbl->buckets[i], he); 2147e1e7763SThomas Graf break; 2157e1e7763SThomas Graf } 2167e1e7763SThomas Graf } 2177e1e7763SThomas Graf } 2187e1e7763SThomas Graf 2197e1e7763SThomas Graf /* Publish the new table pointer. Lookups may now traverse 2200c828f2fSHerbert Xu * the new table, but they will not benefit from any 2210c828f2fSHerbert Xu * additional efficiency until later steps unzip the buckets. 2227e1e7763SThomas Graf */ 2237e1e7763SThomas Graf rcu_assign_pointer(ht->tbl, new_tbl); 2247e1e7763SThomas Graf 2257e1e7763SThomas Graf /* Unzip interleaved hash chains */ 2267e1e7763SThomas Graf do { 2277e1e7763SThomas Graf /* Wait for readers. All new readers will see the new 2287e1e7763SThomas Graf * table, and thus no references to the old table will 2297e1e7763SThomas Graf * remain. 2307e1e7763SThomas Graf */ 2317e1e7763SThomas Graf synchronize_rcu(); 2327e1e7763SThomas Graf 2337e1e7763SThomas Graf /* For each bucket in the old table (each of which 2347e1e7763SThomas Graf * contains items from multiple buckets of the new 2357e1e7763SThomas Graf * table): ... 2367e1e7763SThomas Graf */ 2377e1e7763SThomas Graf complete = true; 2387e1e7763SThomas Graf for (i = 0; i < old_tbl->size; i++) { 2397e1e7763SThomas Graf hashtable_chain_unzip(ht, new_tbl, old_tbl, i); 2407e1e7763SThomas Graf if (old_tbl->buckets[i] != NULL) 2417e1e7763SThomas Graf complete = false; 2427e1e7763SThomas Graf } 2437e1e7763SThomas Graf } while (!complete); 2447e1e7763SThomas Graf 2457e1e7763SThomas Graf bucket_table_free(old_tbl); 2467e1e7763SThomas Graf return 0; 2477e1e7763SThomas Graf } 2487e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_expand); 2497e1e7763SThomas Graf 2507e1e7763SThomas Graf /** 2517e1e7763SThomas Graf * rhashtable_shrink - Shrink hash table while allowing concurrent lookups 2527e1e7763SThomas Graf * @ht: the hash table to shrink 2537e1e7763SThomas Graf * 2547e1e7763SThomas Graf * This function may only be called in a context where it is safe to call 2557e1e7763SThomas Graf * synchronize_rcu(), e.g. not within a rcu_read_lock() section. 2567e1e7763SThomas Graf * 2577e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations take place. 2587e1e7763SThomas Graf * It is however valid to have concurrent lookups if they are RCU protected. 2597e1e7763SThomas Graf */ 2606eba8224SThomas Graf int rhashtable_shrink(struct rhashtable *ht) 2617e1e7763SThomas Graf { 2627e1e7763SThomas Graf struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); 2637e1e7763SThomas Graf struct rhash_head __rcu **pprev; 2647e1e7763SThomas Graf unsigned int i; 2657e1e7763SThomas Graf 2667e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 2677e1e7763SThomas Graf 26894000176SYing Xue if (ht->shift <= ht->p.min_shift) 2697e1e7763SThomas Graf return 0; 2707e1e7763SThomas Graf 2716eba8224SThomas Graf ntbl = bucket_table_alloc(tbl->size / 2); 2727e1e7763SThomas Graf if (ntbl == NULL) 2737e1e7763SThomas Graf return -ENOMEM; 2747e1e7763SThomas Graf 2757e1e7763SThomas Graf ht->shift--; 2767e1e7763SThomas Graf 2770c828f2fSHerbert Xu /* Link each bucket in the new table to the first bucket 2787e1e7763SThomas Graf * in the old table that contains entries which will hash 2797e1e7763SThomas Graf * to the new bucket. 2807e1e7763SThomas Graf */ 2817e1e7763SThomas Graf for (i = 0; i < ntbl->size; i++) { 2827e1e7763SThomas Graf ntbl->buckets[i] = tbl->buckets[i]; 2837e1e7763SThomas Graf 2840c828f2fSHerbert Xu /* Link each bucket in the new table to the first bucket 2857e1e7763SThomas Graf * in the old table that contains entries which will hash 2867e1e7763SThomas Graf * to the new bucket. 2877e1e7763SThomas Graf */ 2887e1e7763SThomas Graf for (pprev = &ntbl->buckets[i]; *pprev != NULL; 2897e1e7763SThomas Graf pprev = &rht_dereference(*pprev, ht)->next) 2907e1e7763SThomas Graf ; 2917e1e7763SThomas Graf RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); 2927e1e7763SThomas Graf } 2937e1e7763SThomas Graf 2947e1e7763SThomas Graf /* Publish the new, valid hash table */ 2957e1e7763SThomas Graf rcu_assign_pointer(ht->tbl, ntbl); 2967e1e7763SThomas Graf 2977e1e7763SThomas Graf /* Wait for readers. No new readers will have references to the 2987e1e7763SThomas Graf * old hash table. 2997e1e7763SThomas Graf */ 3007e1e7763SThomas Graf synchronize_rcu(); 3017e1e7763SThomas Graf 3027e1e7763SThomas Graf bucket_table_free(tbl); 3037e1e7763SThomas Graf 3047e1e7763SThomas Graf return 0; 3057e1e7763SThomas Graf } 3067e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_shrink); 3077e1e7763SThomas Graf 3087e1e7763SThomas Graf /** 3097e1e7763SThomas Graf * rhashtable_insert - insert object into hash hash table 3107e1e7763SThomas Graf * @ht: hash table 3117e1e7763SThomas Graf * @obj: pointer to hash head inside object 3127e1e7763SThomas Graf * 3137e1e7763SThomas Graf * Will automatically grow the table via rhashtable_expand() if the the 3147e1e7763SThomas Graf * grow_decision function specified at rhashtable_init() returns true. 3157e1e7763SThomas Graf * 3167e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations occur. It is 3177e1e7763SThomas Graf * however valid to have concurrent lookups if they are RCU protected. 3187e1e7763SThomas Graf */ 3196eba8224SThomas Graf void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) 3207e1e7763SThomas Graf { 3217e1e7763SThomas Graf struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 3227e1e7763SThomas Graf u32 hash; 3237e1e7763SThomas Graf 3247e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 3257e1e7763SThomas Graf 3268d24c0b4SThomas Graf hash = head_hashfn(ht, tbl, obj); 3277e1e7763SThomas Graf RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); 3287e1e7763SThomas Graf rcu_assign_pointer(tbl->buckets[hash], obj); 3297e1e7763SThomas Graf ht->nelems++; 3307e1e7763SThomas Graf 3317e1e7763SThomas Graf if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) 3326eba8224SThomas Graf rhashtable_expand(ht); 3337e1e7763SThomas Graf } 3347e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_insert); 3357e1e7763SThomas Graf 3367e1e7763SThomas Graf /** 3377e1e7763SThomas Graf * rhashtable_remove_pprev - remove object from hash table given previous element 3387e1e7763SThomas Graf * @ht: hash table 3397e1e7763SThomas Graf * @obj: pointer to hash head inside object 3407e1e7763SThomas Graf * @pprev: pointer to previous element 3417e1e7763SThomas Graf * 3427e1e7763SThomas Graf * Identical to rhashtable_remove() but caller is alreayd aware of the element 3437e1e7763SThomas Graf * in front of the element to be deleted. This is in particular useful for 3447e1e7763SThomas Graf * deletion when combined with walking or lookup. 3457e1e7763SThomas Graf */ 3467e1e7763SThomas Graf void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, 3476eba8224SThomas Graf struct rhash_head __rcu **pprev) 3487e1e7763SThomas Graf { 3497e1e7763SThomas Graf struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 3507e1e7763SThomas Graf 3517e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 3527e1e7763SThomas Graf 3537e1e7763SThomas Graf RCU_INIT_POINTER(*pprev, obj->next); 3547e1e7763SThomas Graf ht->nelems--; 3557e1e7763SThomas Graf 3567e1e7763SThomas Graf if (ht->p.shrink_decision && 3577e1e7763SThomas Graf ht->p.shrink_decision(ht, tbl->size)) 3586eba8224SThomas Graf rhashtable_shrink(ht); 3597e1e7763SThomas Graf } 3607e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); 3617e1e7763SThomas Graf 3627e1e7763SThomas Graf /** 3637e1e7763SThomas Graf * rhashtable_remove - remove object from hash table 3647e1e7763SThomas Graf * @ht: hash table 3657e1e7763SThomas Graf * @obj: pointer to hash head inside object 3667e1e7763SThomas Graf * 3677e1e7763SThomas Graf * Since the hash chain is single linked, the removal operation needs to 3687e1e7763SThomas Graf * walk the bucket chain upon removal. The removal operation is thus 3697e1e7763SThomas Graf * considerable slow if the hash table is not correctly sized. 3707e1e7763SThomas Graf * 3717e1e7763SThomas Graf * Will automatically shrink the table via rhashtable_expand() if the the 3727e1e7763SThomas Graf * shrink_decision function specified at rhashtable_init() returns true. 3737e1e7763SThomas Graf * 3747e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations occur. It is 3757e1e7763SThomas Graf * however valid to have concurrent lookups if they are RCU protected. 3767e1e7763SThomas Graf */ 3776eba8224SThomas Graf bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) 3787e1e7763SThomas Graf { 3797e1e7763SThomas Graf struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 3807e1e7763SThomas Graf struct rhash_head __rcu **pprev; 3817e1e7763SThomas Graf struct rhash_head *he; 3827e1e7763SThomas Graf u32 h; 3837e1e7763SThomas Graf 3847e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht); 3857e1e7763SThomas Graf 3868d24c0b4SThomas Graf h = head_hashfn(ht, tbl, obj); 3877e1e7763SThomas Graf 3887e1e7763SThomas Graf pprev = &tbl->buckets[h]; 3897e1e7763SThomas Graf rht_for_each(he, tbl->buckets[h], ht) { 3907e1e7763SThomas Graf if (he != obj) { 3917e1e7763SThomas Graf pprev = &he->next; 3927e1e7763SThomas Graf continue; 3937e1e7763SThomas Graf } 3947e1e7763SThomas Graf 3956eba8224SThomas Graf rhashtable_remove_pprev(ht, he, pprev); 3967e1e7763SThomas Graf return true; 3977e1e7763SThomas Graf } 3987e1e7763SThomas Graf 3997e1e7763SThomas Graf return false; 4007e1e7763SThomas Graf } 4017e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_remove); 4027e1e7763SThomas Graf 4037e1e7763SThomas Graf /** 4047e1e7763SThomas Graf * rhashtable_lookup - lookup key in hash table 4057e1e7763SThomas Graf * @ht: hash table 4067e1e7763SThomas Graf * @key: pointer to key 4077e1e7763SThomas Graf * 4087e1e7763SThomas Graf * Computes the hash value for the key and traverses the bucket chain looking 4097e1e7763SThomas Graf * for a entry with an identical key. The first matching entry is returned. 4107e1e7763SThomas Graf * 4117e1e7763SThomas Graf * This lookup function may only be used for fixed key hash table (key_len 4127e1e7763SThomas Graf * paramter set). It will BUG() if used inappropriately. 4137e1e7763SThomas Graf * 4147e1e7763SThomas Graf * Lookups may occur in parallel with hash mutations as long as the lookup is 4157e1e7763SThomas Graf * guarded by rcu_read_lock(). The caller must take care of this. 4167e1e7763SThomas Graf */ 4177e1e7763SThomas Graf void *rhashtable_lookup(const struct rhashtable *ht, const void *key) 4187e1e7763SThomas Graf { 4197e1e7763SThomas Graf const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 4207e1e7763SThomas Graf struct rhash_head *he; 4217e1e7763SThomas Graf u32 h; 4227e1e7763SThomas Graf 4237e1e7763SThomas Graf BUG_ON(!ht->p.key_len); 4247e1e7763SThomas Graf 4258d24c0b4SThomas Graf h = key_hashfn(ht, key, ht->p.key_len); 4267e1e7763SThomas Graf rht_for_each_rcu(he, tbl->buckets[h], ht) { 4277e1e7763SThomas Graf if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key, 4287e1e7763SThomas Graf ht->p.key_len)) 4297e1e7763SThomas Graf continue; 430*a4b18cdaSThomas Graf return rht_obj(ht, he); 4317e1e7763SThomas Graf } 4327e1e7763SThomas Graf 4337e1e7763SThomas Graf return NULL; 4347e1e7763SThomas Graf } 4357e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_lookup); 4367e1e7763SThomas Graf 4377e1e7763SThomas Graf /** 4387e1e7763SThomas Graf * rhashtable_lookup_compare - search hash table with compare function 4397e1e7763SThomas Graf * @ht: hash table 4408d24c0b4SThomas Graf * @key: the pointer to the key 4417e1e7763SThomas Graf * @compare: compare function, must return true on match 4427e1e7763SThomas Graf * @arg: argument passed on to compare function 4437e1e7763SThomas Graf * 4447e1e7763SThomas Graf * Traverses the bucket chain behind the provided hash value and calls the 4457e1e7763SThomas Graf * specified compare function for each entry. 4467e1e7763SThomas Graf * 4477e1e7763SThomas Graf * Lookups may occur in parallel with hash mutations as long as the lookup is 4487e1e7763SThomas Graf * guarded by rcu_read_lock(). The caller must take care of this. 4497e1e7763SThomas Graf * 4507e1e7763SThomas Graf * Returns the first entry on which the compare function returned true. 4517e1e7763SThomas Graf */ 4528d24c0b4SThomas Graf void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key, 4537e1e7763SThomas Graf bool (*compare)(void *, void *), void *arg) 4547e1e7763SThomas Graf { 4557e1e7763SThomas Graf const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); 4567e1e7763SThomas Graf struct rhash_head *he; 4578d24c0b4SThomas Graf u32 hash; 4587e1e7763SThomas Graf 4598d24c0b4SThomas Graf hash = key_hashfn(ht, key, ht->p.key_len); 4607e1e7763SThomas Graf rht_for_each_rcu(he, tbl->buckets[hash], ht) { 4617e1e7763SThomas Graf if (!compare(rht_obj(ht, he), arg)) 4627e1e7763SThomas Graf continue; 463*a4b18cdaSThomas Graf return rht_obj(ht, he); 4647e1e7763SThomas Graf } 4657e1e7763SThomas Graf 4667e1e7763SThomas Graf return NULL; 4677e1e7763SThomas Graf } 4687e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); 4697e1e7763SThomas Graf 47094000176SYing Xue static size_t rounded_hashtable_size(struct rhashtable_params *params) 4717e1e7763SThomas Graf { 47294000176SYing Xue return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), 47394000176SYing Xue 1UL << params->min_shift); 4747e1e7763SThomas Graf } 4757e1e7763SThomas Graf 4767e1e7763SThomas Graf /** 4777e1e7763SThomas Graf * rhashtable_init - initialize a new hash table 4787e1e7763SThomas Graf * @ht: hash table to be initialized 4797e1e7763SThomas Graf * @params: configuration parameters 4807e1e7763SThomas Graf * 4817e1e7763SThomas Graf * Initializes a new hash table based on the provided configuration 4827e1e7763SThomas Graf * parameters. A table can be configured either with a variable or 4837e1e7763SThomas Graf * fixed length key: 4847e1e7763SThomas Graf * 4857e1e7763SThomas Graf * Configuration Example 1: Fixed length keys 4867e1e7763SThomas Graf * struct test_obj { 4877e1e7763SThomas Graf * int key; 4887e1e7763SThomas Graf * void * my_member; 4897e1e7763SThomas Graf * struct rhash_head node; 4907e1e7763SThomas Graf * }; 4917e1e7763SThomas Graf * 4927e1e7763SThomas Graf * struct rhashtable_params params = { 4937e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node), 4947e1e7763SThomas Graf * .key_offset = offsetof(struct test_obj, key), 4957e1e7763SThomas Graf * .key_len = sizeof(int), 49687545899SDaniel Borkmann * .hashfn = jhash, 4971b2f309dSHerbert Xu * #ifdef CONFIG_PROVE_LOCKING 4987e1e7763SThomas Graf * .mutex_is_held = &my_mutex_is_held, 4991b2f309dSHerbert Xu * #endif 5007e1e7763SThomas Graf * }; 5017e1e7763SThomas Graf * 5027e1e7763SThomas Graf * Configuration Example 2: Variable length keys 5037e1e7763SThomas Graf * struct test_obj { 5047e1e7763SThomas Graf * [...] 5057e1e7763SThomas Graf * struct rhash_head node; 5067e1e7763SThomas Graf * }; 5077e1e7763SThomas Graf * 5087e1e7763SThomas Graf * u32 my_hash_fn(const void *data, u32 seed) 5097e1e7763SThomas Graf * { 5107e1e7763SThomas Graf * struct test_obj *obj = data; 5117e1e7763SThomas Graf * 5127e1e7763SThomas Graf * return [... hash ...]; 5137e1e7763SThomas Graf * } 5147e1e7763SThomas Graf * 5157e1e7763SThomas Graf * struct rhashtable_params params = { 5167e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node), 51787545899SDaniel Borkmann * .hashfn = jhash, 5187e1e7763SThomas Graf * .obj_hashfn = my_hash_fn, 5191b2f309dSHerbert Xu * #ifdef CONFIG_PROVE_LOCKING 5207e1e7763SThomas Graf * .mutex_is_held = &my_mutex_is_held, 5211b2f309dSHerbert Xu * #endif 5227e1e7763SThomas Graf * }; 5237e1e7763SThomas Graf */ 5247e1e7763SThomas Graf int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) 5257e1e7763SThomas Graf { 5267e1e7763SThomas Graf struct bucket_table *tbl; 5277e1e7763SThomas Graf size_t size; 5287e1e7763SThomas Graf 5297e1e7763SThomas Graf size = HASH_DEFAULT_SIZE; 5307e1e7763SThomas Graf 5317e1e7763SThomas Graf if ((params->key_len && !params->hashfn) || 5327e1e7763SThomas Graf (!params->key_len && !params->obj_hashfn)) 5337e1e7763SThomas Graf return -EINVAL; 5347e1e7763SThomas Graf 53594000176SYing Xue params->min_shift = max_t(size_t, params->min_shift, 53694000176SYing Xue ilog2(HASH_MIN_SIZE)); 53794000176SYing Xue 5387e1e7763SThomas Graf if (params->nelem_hint) 53994000176SYing Xue size = rounded_hashtable_size(params); 5407e1e7763SThomas Graf 5416eba8224SThomas Graf tbl = bucket_table_alloc(size); 5427e1e7763SThomas Graf if (tbl == NULL) 5437e1e7763SThomas Graf return -ENOMEM; 5447e1e7763SThomas Graf 5457e1e7763SThomas Graf memset(ht, 0, sizeof(*ht)); 5467e1e7763SThomas Graf ht->shift = ilog2(tbl->size); 5477e1e7763SThomas Graf memcpy(&ht->p, params, sizeof(*params)); 5487e1e7763SThomas Graf RCU_INIT_POINTER(ht->tbl, tbl); 5497e1e7763SThomas Graf 5507e1e7763SThomas Graf if (!ht->p.hash_rnd) 5517e1e7763SThomas Graf get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); 5527e1e7763SThomas Graf 5537e1e7763SThomas Graf return 0; 5547e1e7763SThomas Graf } 5557e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_init); 5567e1e7763SThomas Graf 5577e1e7763SThomas Graf /** 5587e1e7763SThomas Graf * rhashtable_destroy - destroy hash table 5597e1e7763SThomas Graf * @ht: the hash table to destroy 5607e1e7763SThomas Graf * 561ae82ddcfSPablo Neira Ayuso * Frees the bucket array. This function is not rcu safe, therefore the caller 562ae82ddcfSPablo Neira Ayuso * has to make sure that no resizing may happen by unpublishing the hashtable 563ae82ddcfSPablo Neira Ayuso * and waiting for the quiescent cycle before releasing the bucket array. 5647e1e7763SThomas Graf */ 5657e1e7763SThomas Graf void rhashtable_destroy(const struct rhashtable *ht) 5667e1e7763SThomas Graf { 567ae82ddcfSPablo Neira Ayuso bucket_table_free(ht->tbl); 5687e1e7763SThomas Graf } 5697e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_destroy); 5707e1e7763SThomas Graf 5717e1e7763SThomas Graf /************************************************************************** 5727e1e7763SThomas Graf * Self Test 5737e1e7763SThomas Graf **************************************************************************/ 5747e1e7763SThomas Graf 5757e1e7763SThomas Graf #ifdef CONFIG_TEST_RHASHTABLE 5767e1e7763SThomas Graf 5777e1e7763SThomas Graf #define TEST_HT_SIZE 8 5787e1e7763SThomas Graf #define TEST_ENTRIES 2048 5797e1e7763SThomas Graf #define TEST_PTR ((void *) 0xdeadbeef) 5807e1e7763SThomas Graf #define TEST_NEXPANDS 4 5817e1e7763SThomas Graf 5821b2f309dSHerbert Xu #ifdef CONFIG_PROVE_LOCKING 5837b4ce235SHerbert Xu static int test_mutex_is_held(void *parent) 5847e1e7763SThomas Graf { 5857e1e7763SThomas Graf return 1; 5867e1e7763SThomas Graf } 5871b2f309dSHerbert Xu #endif 5887e1e7763SThomas Graf 5897e1e7763SThomas Graf struct test_obj { 5907e1e7763SThomas Graf void *ptr; 5917e1e7763SThomas Graf int value; 5927e1e7763SThomas Graf struct rhash_head node; 5937e1e7763SThomas Graf }; 5947e1e7763SThomas Graf 5957e1e7763SThomas Graf static int __init test_rht_lookup(struct rhashtable *ht) 5967e1e7763SThomas Graf { 5977e1e7763SThomas Graf unsigned int i; 5987e1e7763SThomas Graf 5997e1e7763SThomas Graf for (i = 0; i < TEST_ENTRIES * 2; i++) { 6007e1e7763SThomas Graf struct test_obj *obj; 6017e1e7763SThomas Graf bool expected = !(i % 2); 6027e1e7763SThomas Graf u32 key = i; 6037e1e7763SThomas Graf 6047e1e7763SThomas Graf obj = rhashtable_lookup(ht, &key); 6057e1e7763SThomas Graf 6067e1e7763SThomas Graf if (expected && !obj) { 6077e1e7763SThomas Graf pr_warn("Test failed: Could not find key %u\n", key); 6087e1e7763SThomas Graf return -ENOENT; 6097e1e7763SThomas Graf } else if (!expected && obj) { 6107e1e7763SThomas Graf pr_warn("Test failed: Unexpected entry found for key %u\n", 6117e1e7763SThomas Graf key); 6127e1e7763SThomas Graf return -EEXIST; 6137e1e7763SThomas Graf } else if (expected && obj) { 6147e1e7763SThomas Graf if (obj->ptr != TEST_PTR || obj->value != i) { 6157e1e7763SThomas Graf pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", 6167e1e7763SThomas Graf obj->ptr, TEST_PTR, obj->value, i); 6177e1e7763SThomas Graf return -EINVAL; 6187e1e7763SThomas Graf } 6197e1e7763SThomas Graf } 6207e1e7763SThomas Graf } 6217e1e7763SThomas Graf 6227e1e7763SThomas Graf return 0; 6237e1e7763SThomas Graf } 6247e1e7763SThomas Graf 6253e7b2ec4SThomas Graf static void test_bucket_stats(struct rhashtable *ht, bool quiet) 6267e1e7763SThomas Graf { 6273e7b2ec4SThomas Graf unsigned int cnt, rcu_cnt, i, total = 0; 6287e1e7763SThomas Graf struct test_obj *obj; 6293e7b2ec4SThomas Graf struct bucket_table *tbl; 6307e1e7763SThomas Graf 6313e7b2ec4SThomas Graf tbl = rht_dereference_rcu(ht->tbl, ht); 6327e1e7763SThomas Graf for (i = 0; i < tbl->size; i++) { 6333e7b2ec4SThomas Graf rcu_cnt = cnt = 0; 6347e1e7763SThomas Graf 6357e1e7763SThomas Graf if (!quiet) 6367e1e7763SThomas Graf pr_info(" [%#4x/%zu]", i, tbl->size); 6377e1e7763SThomas Graf 6387e1e7763SThomas Graf rht_for_each_entry_rcu(obj, tbl->buckets[i], node) { 6397e1e7763SThomas Graf cnt++; 6407e1e7763SThomas Graf total++; 6417e1e7763SThomas Graf if (!quiet) 6427e1e7763SThomas Graf pr_cont(" [%p],", obj); 6437e1e7763SThomas Graf } 6447e1e7763SThomas Graf 6453e7b2ec4SThomas Graf rht_for_each_entry_rcu(obj, tbl->buckets[i], node) 6463e7b2ec4SThomas Graf rcu_cnt++; 6473e7b2ec4SThomas Graf 6483e7b2ec4SThomas Graf if (rcu_cnt != cnt) 6493e7b2ec4SThomas Graf pr_warn("Test failed: Chain count mismach %d != %d", 6503e7b2ec4SThomas Graf cnt, rcu_cnt); 6513e7b2ec4SThomas Graf 6527e1e7763SThomas Graf if (!quiet) 6537e1e7763SThomas Graf pr_cont("\n [%#x] first element: %p, chain length: %u\n", 6547e1e7763SThomas Graf i, tbl->buckets[i], cnt); 6557e1e7763SThomas Graf } 6567e1e7763SThomas Graf 6577e1e7763SThomas Graf pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", 6587e1e7763SThomas Graf total, ht->nelems, TEST_ENTRIES); 6593e7b2ec4SThomas Graf 6603e7b2ec4SThomas Graf if (total != ht->nelems || total != TEST_ENTRIES) 6613e7b2ec4SThomas Graf pr_warn("Test failed: Total count mismatch ^^^"); 6627e1e7763SThomas Graf } 6637e1e7763SThomas Graf 6647e1e7763SThomas Graf static int __init test_rhashtable(struct rhashtable *ht) 6657e1e7763SThomas Graf { 6667e1e7763SThomas Graf struct bucket_table *tbl; 6677e1e7763SThomas Graf struct test_obj *obj, *next; 6687e1e7763SThomas Graf int err; 6697e1e7763SThomas Graf unsigned int i; 6707e1e7763SThomas Graf 6717e1e7763SThomas Graf /* 6727e1e7763SThomas Graf * Insertion Test: 6737e1e7763SThomas Graf * Insert TEST_ENTRIES into table with all keys even numbers 6747e1e7763SThomas Graf */ 6757e1e7763SThomas Graf pr_info(" Adding %d keys\n", TEST_ENTRIES); 6767e1e7763SThomas Graf for (i = 0; i < TEST_ENTRIES; i++) { 6777e1e7763SThomas Graf struct test_obj *obj; 6787e1e7763SThomas Graf 6797e1e7763SThomas Graf obj = kzalloc(sizeof(*obj), GFP_KERNEL); 6807e1e7763SThomas Graf if (!obj) { 6817e1e7763SThomas Graf err = -ENOMEM; 6827e1e7763SThomas Graf goto error; 6837e1e7763SThomas Graf } 6847e1e7763SThomas Graf 6857e1e7763SThomas Graf obj->ptr = TEST_PTR; 6867e1e7763SThomas Graf obj->value = i * 2; 6877e1e7763SThomas Graf 6886eba8224SThomas Graf rhashtable_insert(ht, &obj->node); 6897e1e7763SThomas Graf } 6907e1e7763SThomas Graf 6917e1e7763SThomas Graf rcu_read_lock(); 6923e7b2ec4SThomas Graf test_bucket_stats(ht, true); 6937e1e7763SThomas Graf test_rht_lookup(ht); 6947e1e7763SThomas Graf rcu_read_unlock(); 6957e1e7763SThomas Graf 6967e1e7763SThomas Graf for (i = 0; i < TEST_NEXPANDS; i++) { 6977e1e7763SThomas Graf pr_info(" Table expansion iteration %u...\n", i); 6986eba8224SThomas Graf rhashtable_expand(ht); 6997e1e7763SThomas Graf 7007e1e7763SThomas Graf rcu_read_lock(); 7017e1e7763SThomas Graf pr_info(" Verifying lookups...\n"); 7027e1e7763SThomas Graf test_rht_lookup(ht); 7037e1e7763SThomas Graf rcu_read_unlock(); 7047e1e7763SThomas Graf } 7057e1e7763SThomas Graf 7067e1e7763SThomas Graf for (i = 0; i < TEST_NEXPANDS; i++) { 7077e1e7763SThomas Graf pr_info(" Table shrinkage iteration %u...\n", i); 7086eba8224SThomas Graf rhashtable_shrink(ht); 7097e1e7763SThomas Graf 7107e1e7763SThomas Graf rcu_read_lock(); 7117e1e7763SThomas Graf pr_info(" Verifying lookups...\n"); 7127e1e7763SThomas Graf test_rht_lookup(ht); 7137e1e7763SThomas Graf rcu_read_unlock(); 7147e1e7763SThomas Graf } 7157e1e7763SThomas Graf 7163e7b2ec4SThomas Graf rcu_read_lock(); 7173e7b2ec4SThomas Graf test_bucket_stats(ht, true); 7183e7b2ec4SThomas Graf rcu_read_unlock(); 7193e7b2ec4SThomas Graf 7207e1e7763SThomas Graf pr_info(" Deleting %d keys\n", TEST_ENTRIES); 7217e1e7763SThomas Graf for (i = 0; i < TEST_ENTRIES; i++) { 7227e1e7763SThomas Graf u32 key = i * 2; 7237e1e7763SThomas Graf 7247e1e7763SThomas Graf obj = rhashtable_lookup(ht, &key); 7257e1e7763SThomas Graf BUG_ON(!obj); 7267e1e7763SThomas Graf 7276eba8224SThomas Graf rhashtable_remove(ht, &obj->node); 7287e1e7763SThomas Graf kfree(obj); 7297e1e7763SThomas Graf } 7307e1e7763SThomas Graf 7317e1e7763SThomas Graf return 0; 7327e1e7763SThomas Graf 7337e1e7763SThomas Graf error: 7347e1e7763SThomas Graf tbl = rht_dereference_rcu(ht->tbl, ht); 7357e1e7763SThomas Graf for (i = 0; i < tbl->size; i++) 7367e1e7763SThomas Graf rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node) 7377e1e7763SThomas Graf kfree(obj); 7387e1e7763SThomas Graf 7397e1e7763SThomas Graf return err; 7407e1e7763SThomas Graf } 7417e1e7763SThomas Graf 7427e1e7763SThomas Graf static int __init test_rht_init(void) 7437e1e7763SThomas Graf { 7447e1e7763SThomas Graf struct rhashtable ht; 7457e1e7763SThomas Graf struct rhashtable_params params = { 7467e1e7763SThomas Graf .nelem_hint = TEST_HT_SIZE, 7477e1e7763SThomas Graf .head_offset = offsetof(struct test_obj, node), 7487e1e7763SThomas Graf .key_offset = offsetof(struct test_obj, value), 7497e1e7763SThomas Graf .key_len = sizeof(int), 75087545899SDaniel Borkmann .hashfn = jhash, 7511b2f309dSHerbert Xu #ifdef CONFIG_PROVE_LOCKING 7527e1e7763SThomas Graf .mutex_is_held = &test_mutex_is_held, 7531b2f309dSHerbert Xu #endif 7547e1e7763SThomas Graf .grow_decision = rht_grow_above_75, 7557e1e7763SThomas Graf .shrink_decision = rht_shrink_below_30, 7567e1e7763SThomas Graf }; 7577e1e7763SThomas Graf int err; 7587e1e7763SThomas Graf 7597e1e7763SThomas Graf pr_info("Running resizable hashtable tests...\n"); 7607e1e7763SThomas Graf 7617e1e7763SThomas Graf err = rhashtable_init(&ht, ¶ms); 7627e1e7763SThomas Graf if (err < 0) { 7637e1e7763SThomas Graf pr_warn("Test failed: Unable to initialize hashtable: %d\n", 7647e1e7763SThomas Graf err); 7657e1e7763SThomas Graf return err; 7667e1e7763SThomas Graf } 7677e1e7763SThomas Graf 7687e1e7763SThomas Graf err = test_rhashtable(&ht); 7697e1e7763SThomas Graf 7707e1e7763SThomas Graf rhashtable_destroy(&ht); 7717e1e7763SThomas Graf 7727e1e7763SThomas Graf return err; 7737e1e7763SThomas Graf } 7747e1e7763SThomas Graf 7757e1e7763SThomas Graf subsys_initcall(test_rht_init); 7767e1e7763SThomas Graf 7777e1e7763SThomas Graf #endif /* CONFIG_TEST_RHASHTABLE */ 778