12cdb54c9SMauro Carvalho Chehab.. SPDX-License-Identifier: GPL-2.0 22cdb54c9SMauro Carvalho Chehab 32cdb54c9SMauro Carvalho Chehab================================================= 42cdb54c9SMauro Carvalho ChehabUsing RCU hlist_nulls to protect list and objects 52cdb54c9SMauro Carvalho Chehab================================================= 62cdb54c9SMauro Carvalho Chehab 72cdb54c9SMauro Carvalho ChehabThis section describes how to use hlist_nulls to 82cdb54c9SMauro Carvalho Chehabprotect read-mostly linked lists and 92cdb54c9SMauro Carvalho Chehabobjects using SLAB_TYPESAFE_BY_RCU allocations. 102cdb54c9SMauro Carvalho Chehab 11404147faSAkira YokosawaPlease read the basics in listRCU.rst. 122cdb54c9SMauro Carvalho Chehab 132d9c318bSMauro Carvalho ChehabUsing 'nulls' 142d9c318bSMauro Carvalho Chehab============= 152d9c318bSMauro Carvalho Chehab 162cdb54c9SMauro Carvalho ChehabUsing special makers (called 'nulls') is a convenient way 17da82af04SPaul E. McKenneyto solve following problem. 182cdb54c9SMauro Carvalho Chehab 19da82af04SPaul E. McKenneyWithout 'nulls', a typical RCU linked list managing objects which are 20da82af04SPaul E. McKenneyallocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following 215326caa7SSeongJae Parkalgorithms. Following examples assume 'obj' is a pointer to such 225326caa7SSeongJae Parkobjects, which is having below type. 235326caa7SSeongJae Park 245326caa7SSeongJae Park:: 255326caa7SSeongJae Park 265326caa7SSeongJae Park struct object { 275326caa7SSeongJae Park struct hlist_node obj_node; 285326caa7SSeongJae Park atomic_t refcnt; 295326caa7SSeongJae Park unsigned int key; 305326caa7SSeongJae Park }; 312cdb54c9SMauro Carvalho Chehab 32da82af04SPaul E. McKenney1) Lookup algorithm 33da82af04SPaul E. McKenney------------------- 342cdb54c9SMauro Carvalho Chehab 352cdb54c9SMauro Carvalho Chehab:: 362cdb54c9SMauro Carvalho Chehab 372cdb54c9SMauro Carvalho Chehab begin: 383f831e38SSeongJae Park rcu_read_lock(); 392cdb54c9SMauro Carvalho Chehab obj = lockless_lookup(key); 402cdb54c9SMauro Carvalho Chehab if (obj) { 41bc25e7c3SAlan Huang if (!try_get_ref(obj)) { // might fail for free objects 42bc25e7c3SAlan Huang rcu_read_unlock(); 432cdb54c9SMauro Carvalho Chehab goto begin; 44bc25e7c3SAlan Huang } 452cdb54c9SMauro Carvalho Chehab /* 462cdb54c9SMauro Carvalho Chehab * Because a writer could delete object, and a writer could 472cdb54c9SMauro Carvalho Chehab * reuse these object before the RCU grace period, we 482cdb54c9SMauro Carvalho Chehab * must check key after getting the reference on object 492cdb54c9SMauro Carvalho Chehab */ 502cdb54c9SMauro Carvalho Chehab if (obj->key != key) { // not the object we expected 512cdb54c9SMauro Carvalho Chehab put_ref(obj); 52da82af04SPaul E. McKenney rcu_read_unlock(); 532cdb54c9SMauro Carvalho Chehab goto begin; 542cdb54c9SMauro Carvalho Chehab } 552cdb54c9SMauro Carvalho Chehab } 562cdb54c9SMauro Carvalho Chehab rcu_read_unlock(); 572cdb54c9SMauro Carvalho Chehab 582cdb54c9SMauro Carvalho ChehabBeware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu() 592cdb54c9SMauro Carvalho Chehabbut a version with an additional memory barrier (smp_rmb()) 602cdb54c9SMauro Carvalho Chehab 612cdb54c9SMauro Carvalho Chehab:: 622cdb54c9SMauro Carvalho Chehab 632cdb54c9SMauro Carvalho Chehab lockless_lookup(key) 642cdb54c9SMauro Carvalho Chehab { 652cdb54c9SMauro Carvalho Chehab struct hlist_node *node, *next; 662cdb54c9SMauro Carvalho Chehab for (pos = rcu_dereference((head)->first); 672cdb54c9SMauro Carvalho Chehab pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && 68*d186204aSSeongJae Park ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; }); 692cdb54c9SMauro Carvalho Chehab pos = rcu_dereference(next)) 702cdb54c9SMauro Carvalho Chehab if (obj->key == key) 712cdb54c9SMauro Carvalho Chehab return obj; 722cdb54c9SMauro Carvalho Chehab return NULL; 732cdb54c9SMauro Carvalho Chehab } 742cdb54c9SMauro Carvalho Chehab 752cdb54c9SMauro Carvalho ChehabAnd note the traditional hlist_for_each_entry_rcu() misses this smp_rmb():: 762cdb54c9SMauro Carvalho Chehab 772cdb54c9SMauro Carvalho Chehab struct hlist_node *node; 782cdb54c9SMauro Carvalho Chehab for (pos = rcu_dereference((head)->first); 792cdb54c9SMauro Carvalho Chehab pos && ({ prefetch(pos->next); 1; }) && 80*d186204aSSeongJae Park ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; }); 812cdb54c9SMauro Carvalho Chehab pos = rcu_dereference(pos->next)) 822cdb54c9SMauro Carvalho Chehab if (obj->key == key) 832cdb54c9SMauro Carvalho Chehab return obj; 842cdb54c9SMauro Carvalho Chehab return NULL; 852cdb54c9SMauro Carvalho Chehab 862cdb54c9SMauro Carvalho ChehabQuoting Corey Minyard:: 872cdb54c9SMauro Carvalho Chehab 882cdb54c9SMauro Carvalho Chehab "If the object is moved from one list to another list in-between the 892cdb54c9SMauro Carvalho Chehab time the hash is calculated and the next field is accessed, and the 902cdb54c9SMauro Carvalho Chehab object has moved to the end of a new list, the traversal will not 912cdb54c9SMauro Carvalho Chehab complete properly on the list it should have, since the object will 922cdb54c9SMauro Carvalho Chehab be on the end of the new list and there's not a way to tell it's on a 932cdb54c9SMauro Carvalho Chehab new list and restart the list traversal. I think that this can be 942cdb54c9SMauro Carvalho Chehab solved by pre-fetching the "next" field (with proper barriers) before 952cdb54c9SMauro Carvalho Chehab checking the key." 962cdb54c9SMauro Carvalho Chehab 97da82af04SPaul E. McKenney2) Insertion algorithm 98da82af04SPaul E. McKenney---------------------- 992cdb54c9SMauro Carvalho Chehab 100*d186204aSSeongJae ParkWe need to make sure a reader cannot read the new 'obj->obj_node.next' value 101da82af04SPaul E. McKenneyand previous value of 'obj->key'. Otherwise, an item could be deleted 1022cdb54c9SMauro Carvalho Chehabfrom a chain, and inserted into another chain. If new chain was empty 103da82af04SPaul E. McKenneybefore the move, 'next' pointer is NULL, and lockless reader can not 104da82af04SPaul E. McKenneydetect the fact that it missed following items in original chain. 1052cdb54c9SMauro Carvalho Chehab 1062cdb54c9SMauro Carvalho Chehab:: 1072cdb54c9SMauro Carvalho Chehab 1082cdb54c9SMauro Carvalho Chehab /* 1092cdb54c9SMauro Carvalho Chehab * Please note that new inserts are done at the head of list, 1102cdb54c9SMauro Carvalho Chehab * not in the middle or end. 1112cdb54c9SMauro Carvalho Chehab */ 1122cdb54c9SMauro Carvalho Chehab obj = kmem_cache_alloc(...); 1132cdb54c9SMauro Carvalho Chehab lock_chain(); // typically a spin_lock() 1142cdb54c9SMauro Carvalho Chehab obj->key = key; 115da82af04SPaul E. McKenney atomic_set_release(&obj->refcnt, 1); // key before refcnt 1162cdb54c9SMauro Carvalho Chehab hlist_add_head_rcu(&obj->obj_node, list); 1172cdb54c9SMauro Carvalho Chehab unlock_chain(); // typically a spin_unlock() 1182cdb54c9SMauro Carvalho Chehab 1192cdb54c9SMauro Carvalho Chehab 120da82af04SPaul E. McKenney3) Removal algorithm 121da82af04SPaul E. McKenney-------------------- 122da82af04SPaul E. McKenney 1232cdb54c9SMauro Carvalho ChehabNothing special here, we can use a standard RCU hlist deletion. 1242cdb54c9SMauro Carvalho ChehabBut thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused 1252cdb54c9SMauro Carvalho Chehabvery very fast (before the end of RCU grace period) 1262cdb54c9SMauro Carvalho Chehab 1272cdb54c9SMauro Carvalho Chehab:: 1282cdb54c9SMauro Carvalho Chehab 1292cdb54c9SMauro Carvalho Chehab if (put_last_reference_on(obj) { 1302cdb54c9SMauro Carvalho Chehab lock_chain(); // typically a spin_lock() 1312cdb54c9SMauro Carvalho Chehab hlist_del_init_rcu(&obj->obj_node); 1322cdb54c9SMauro Carvalho Chehab unlock_chain(); // typically a spin_unlock() 1332cdb54c9SMauro Carvalho Chehab kmem_cache_free(cachep, obj); 1342cdb54c9SMauro Carvalho Chehab } 1352cdb54c9SMauro Carvalho Chehab 1362cdb54c9SMauro Carvalho Chehab 1372cdb54c9SMauro Carvalho Chehab 1382cdb54c9SMauro Carvalho Chehab-------------------------------------------------------------------------- 1392cdb54c9SMauro Carvalho Chehab 1402d9c318bSMauro Carvalho ChehabAvoiding extra smp_rmb() 1412d9c318bSMauro Carvalho Chehab======================== 1422d9c318bSMauro Carvalho Chehab 1432cdb54c9SMauro Carvalho ChehabWith hlist_nulls we can avoid extra smp_rmb() in lockless_lookup() 144da82af04SPaul E. McKenneyand extra _release() in insert function. 1452cdb54c9SMauro Carvalho Chehab 1462cdb54c9SMauro Carvalho ChehabFor example, if we choose to store the slot number as the 'nulls' 1472cdb54c9SMauro Carvalho Chehabend-of-list marker for each slot of the hash table, we can detect 1482cdb54c9SMauro Carvalho Chehaba race (some writer did a delete and/or a move of an object 1492cdb54c9SMauro Carvalho Chehabto another chain) checking the final 'nulls' value if 1502cdb54c9SMauro Carvalho Chehabthe lookup met the end of chain. If final 'nulls' value 1512cdb54c9SMauro Carvalho Chehabis not the slot number, then we must restart the lookup at 1522cdb54c9SMauro Carvalho Chehabthe beginning. If the object was moved to the same chain, 153da82af04SPaul E. McKenneythen the reader doesn't care: It might occasionally 1542cdb54c9SMauro Carvalho Chehabscan the list again without harm. 1552cdb54c9SMauro Carvalho Chehab 1565326caa7SSeongJae ParkNote that using hlist_nulls means the type of 'obj_node' field of 1575326caa7SSeongJae Park'struct object' becomes 'struct hlist_nulls_node'. 1585326caa7SSeongJae Park 1592cdb54c9SMauro Carvalho Chehab 160da82af04SPaul E. McKenney1) lookup algorithm 161da82af04SPaul E. McKenney------------------- 1622cdb54c9SMauro Carvalho Chehab 1632cdb54c9SMauro Carvalho Chehab:: 1642cdb54c9SMauro Carvalho Chehab 1652cdb54c9SMauro Carvalho Chehab head = &table[slot]; 1662cdb54c9SMauro Carvalho Chehab begin: 167da82af04SPaul E. McKenney rcu_read_lock(); 168*d186204aSSeongJae Park hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) { 1692cdb54c9SMauro Carvalho Chehab if (obj->key == key) { 170da82af04SPaul E. McKenney if (!try_get_ref(obj)) { // might fail for free objects 171da82af04SPaul E. McKenney rcu_read_unlock(); 1722cdb54c9SMauro Carvalho Chehab goto begin; 173da82af04SPaul E. McKenney } 1742cdb54c9SMauro Carvalho Chehab if (obj->key != key) { // not the object we expected 1752cdb54c9SMauro Carvalho Chehab put_ref(obj); 176da82af04SPaul E. McKenney rcu_read_unlock(); 1772cdb54c9SMauro Carvalho Chehab goto begin; 1782cdb54c9SMauro Carvalho Chehab } 1792cdb54c9SMauro Carvalho Chehab goto out; 1802cdb54c9SMauro Carvalho Chehab } 181da82af04SPaul E. McKenney } 182da82af04SPaul E. McKenney 183da82af04SPaul E. McKenney // If the nulls value we got at the end of this lookup is 184da82af04SPaul E. McKenney // not the expected one, we must restart lookup. 185da82af04SPaul E. McKenney // We probably met an item that was moved to another chain. 186da82af04SPaul E. McKenney if (get_nulls_value(node) != slot) { 187da82af04SPaul E. McKenney put_ref(obj); 188da82af04SPaul E. McKenney rcu_read_unlock(); 1892cdb54c9SMauro Carvalho Chehab goto begin; 190da82af04SPaul E. McKenney } 1912cdb54c9SMauro Carvalho Chehab obj = NULL; 1922cdb54c9SMauro Carvalho Chehab 1932cdb54c9SMauro Carvalho Chehab out: 1942cdb54c9SMauro Carvalho Chehab rcu_read_unlock(); 1952cdb54c9SMauro Carvalho Chehab 196da82af04SPaul E. McKenney2) Insert algorithm 197da82af04SPaul E. McKenney------------------- 1982cdb54c9SMauro Carvalho Chehab 1992cdb54c9SMauro Carvalho Chehab:: 2002cdb54c9SMauro Carvalho Chehab 2012cdb54c9SMauro Carvalho Chehab /* 2022cdb54c9SMauro Carvalho Chehab * Please note that new inserts are done at the head of list, 2032cdb54c9SMauro Carvalho Chehab * not in the middle or end. 2042cdb54c9SMauro Carvalho Chehab */ 2052cdb54c9SMauro Carvalho Chehab obj = kmem_cache_alloc(cachep); 2062cdb54c9SMauro Carvalho Chehab lock_chain(); // typically a spin_lock() 2072cdb54c9SMauro Carvalho Chehab obj->key = key; 208da82af04SPaul E. McKenney atomic_set_release(&obj->refcnt, 1); // key before refcnt 2092cdb54c9SMauro Carvalho Chehab /* 2102cdb54c9SMauro Carvalho Chehab * insert obj in RCU way (readers might be traversing chain) 2112cdb54c9SMauro Carvalho Chehab */ 2122cdb54c9SMauro Carvalho Chehab hlist_nulls_add_head_rcu(&obj->obj_node, list); 2132cdb54c9SMauro Carvalho Chehab unlock_chain(); // typically a spin_unlock() 214