xref: /linux/Documentation/RCU/rculist_nulls.rst (revision a1c613ae4c322ddd58d5a8539dbfba2a0380a8c0)
12cdb54c9SMauro Carvalho Chehab.. SPDX-License-Identifier: GPL-2.0
22cdb54c9SMauro Carvalho Chehab
32cdb54c9SMauro Carvalho Chehab=================================================
42cdb54c9SMauro Carvalho ChehabUsing RCU hlist_nulls to protect list and objects
52cdb54c9SMauro Carvalho Chehab=================================================
62cdb54c9SMauro Carvalho Chehab
72cdb54c9SMauro Carvalho ChehabThis section describes how to use hlist_nulls to
82cdb54c9SMauro Carvalho Chehabprotect read-mostly linked lists and
92cdb54c9SMauro Carvalho Chehabobjects using SLAB_TYPESAFE_BY_RCU allocations.
102cdb54c9SMauro Carvalho Chehab
11404147faSAkira YokosawaPlease read the basics in listRCU.rst.
122cdb54c9SMauro Carvalho Chehab
132d9c318bSMauro Carvalho ChehabUsing 'nulls'
142d9c318bSMauro Carvalho Chehab=============
152d9c318bSMauro Carvalho Chehab
162cdb54c9SMauro Carvalho ChehabUsing special makers (called 'nulls') is a convenient way
17da82af04SPaul E. McKenneyto solve following problem.
182cdb54c9SMauro Carvalho Chehab
19da82af04SPaul E. McKenneyWithout 'nulls', a typical RCU linked list managing objects which are
20da82af04SPaul E. McKenneyallocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
215326caa7SSeongJae Parkalgorithms.  Following examples assume 'obj' is a pointer to such
225326caa7SSeongJae Parkobjects, which is having below type.
235326caa7SSeongJae Park
245326caa7SSeongJae Park::
255326caa7SSeongJae Park
265326caa7SSeongJae Park  struct object {
275326caa7SSeongJae Park    struct hlist_node obj_node;
285326caa7SSeongJae Park    atomic_t refcnt;
295326caa7SSeongJae Park    unsigned int key;
305326caa7SSeongJae Park  };
312cdb54c9SMauro Carvalho Chehab
32da82af04SPaul E. McKenney1) Lookup algorithm
33da82af04SPaul E. McKenney-------------------
342cdb54c9SMauro Carvalho Chehab
352cdb54c9SMauro Carvalho Chehab::
362cdb54c9SMauro Carvalho Chehab
372cdb54c9SMauro Carvalho Chehab  begin:
383f831e38SSeongJae Park  rcu_read_lock();
392cdb54c9SMauro Carvalho Chehab  obj = lockless_lookup(key);
402cdb54c9SMauro Carvalho Chehab  if (obj) {
41bc25e7c3SAlan Huang    if (!try_get_ref(obj)) { // might fail for free objects
42bc25e7c3SAlan Huang      rcu_read_unlock();
432cdb54c9SMauro Carvalho Chehab      goto begin;
44bc25e7c3SAlan Huang    }
452cdb54c9SMauro Carvalho Chehab    /*
462cdb54c9SMauro Carvalho Chehab    * Because a writer could delete object, and a writer could
472cdb54c9SMauro Carvalho Chehab    * reuse these object before the RCU grace period, we
482cdb54c9SMauro Carvalho Chehab    * must check key after getting the reference on object
492cdb54c9SMauro Carvalho Chehab    */
502cdb54c9SMauro Carvalho Chehab    if (obj->key != key) { // not the object we expected
512cdb54c9SMauro Carvalho Chehab      put_ref(obj);
52da82af04SPaul E. McKenney      rcu_read_unlock();
532cdb54c9SMauro Carvalho Chehab      goto begin;
542cdb54c9SMauro Carvalho Chehab    }
552cdb54c9SMauro Carvalho Chehab  }
562cdb54c9SMauro Carvalho Chehab  rcu_read_unlock();
572cdb54c9SMauro Carvalho Chehab
582cdb54c9SMauro Carvalho ChehabBeware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
592cdb54c9SMauro Carvalho Chehabbut a version with an additional memory barrier (smp_rmb())
602cdb54c9SMauro Carvalho Chehab
612cdb54c9SMauro Carvalho Chehab::
622cdb54c9SMauro Carvalho Chehab
632cdb54c9SMauro Carvalho Chehab  lockless_lookup(key)
642cdb54c9SMauro Carvalho Chehab  {
652cdb54c9SMauro Carvalho Chehab    struct hlist_node *node, *next;
662cdb54c9SMauro Carvalho Chehab    for (pos = rcu_dereference((head)->first);
672cdb54c9SMauro Carvalho Chehab         pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
68d186204aSSeongJae Park         ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
692cdb54c9SMauro Carvalho Chehab         pos = rcu_dereference(next))
702cdb54c9SMauro Carvalho Chehab      if (obj->key == key)
712cdb54c9SMauro Carvalho Chehab        return obj;
722cdb54c9SMauro Carvalho Chehab    return NULL;
732cdb54c9SMauro Carvalho Chehab  }
742cdb54c9SMauro Carvalho Chehab
752cdb54c9SMauro Carvalho ChehabAnd note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
762cdb54c9SMauro Carvalho Chehab
772cdb54c9SMauro Carvalho Chehab  struct hlist_node *node;
782cdb54c9SMauro Carvalho Chehab  for (pos = rcu_dereference((head)->first);
792cdb54c9SMauro Carvalho Chehab       pos && ({ prefetch(pos->next); 1; }) &&
80d186204aSSeongJae Park       ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
812cdb54c9SMauro Carvalho Chehab       pos = rcu_dereference(pos->next))
822cdb54c9SMauro Carvalho Chehab    if (obj->key == key)
832cdb54c9SMauro Carvalho Chehab      return obj;
842cdb54c9SMauro Carvalho Chehab  return NULL;
852cdb54c9SMauro Carvalho Chehab
862cdb54c9SMauro Carvalho ChehabQuoting Corey Minyard::
872cdb54c9SMauro Carvalho Chehab
882cdb54c9SMauro Carvalho Chehab  "If the object is moved from one list to another list in-between the
892cdb54c9SMauro Carvalho Chehab  time the hash is calculated and the next field is accessed, and the
902cdb54c9SMauro Carvalho Chehab  object has moved to the end of a new list, the traversal will not
912cdb54c9SMauro Carvalho Chehab  complete properly on the list it should have, since the object will
922cdb54c9SMauro Carvalho Chehab  be on the end of the new list and there's not a way to tell it's on a
932cdb54c9SMauro Carvalho Chehab  new list and restart the list traversal. I think that this can be
942cdb54c9SMauro Carvalho Chehab  solved by pre-fetching the "next" field (with proper barriers) before
952cdb54c9SMauro Carvalho Chehab  checking the key."
962cdb54c9SMauro Carvalho Chehab
97da82af04SPaul E. McKenney2) Insertion algorithm
98da82af04SPaul E. McKenney----------------------
992cdb54c9SMauro Carvalho Chehab
100d186204aSSeongJae ParkWe need to make sure a reader cannot read the new 'obj->obj_node.next' value
101da82af04SPaul E. McKenneyand previous value of 'obj->key'. Otherwise, an item could be deleted
1022cdb54c9SMauro Carvalho Chehabfrom a chain, and inserted into another chain. If new chain was empty
103da82af04SPaul E. McKenneybefore the move, 'next' pointer is NULL, and lockless reader can not
104da82af04SPaul E. McKenneydetect the fact that it missed following items in original chain.
1052cdb54c9SMauro Carvalho Chehab
1062cdb54c9SMauro Carvalho Chehab::
1072cdb54c9SMauro Carvalho Chehab
1082cdb54c9SMauro Carvalho Chehab  /*
1092cdb54c9SMauro Carvalho Chehab   * Please note that new inserts are done at the head of list,
1102cdb54c9SMauro Carvalho Chehab   * not in the middle or end.
1112cdb54c9SMauro Carvalho Chehab   */
1122cdb54c9SMauro Carvalho Chehab  obj = kmem_cache_alloc(...);
1132cdb54c9SMauro Carvalho Chehab  lock_chain(); // typically a spin_lock()
1142cdb54c9SMauro Carvalho Chehab  obj->key = key;
115da82af04SPaul E. McKenney  atomic_set_release(&obj->refcnt, 1); // key before refcnt
1162cdb54c9SMauro Carvalho Chehab  hlist_add_head_rcu(&obj->obj_node, list);
1172cdb54c9SMauro Carvalho Chehab  unlock_chain(); // typically a spin_unlock()
1182cdb54c9SMauro Carvalho Chehab
1192cdb54c9SMauro Carvalho Chehab
120da82af04SPaul E. McKenney3) Removal algorithm
121da82af04SPaul E. McKenney--------------------
122da82af04SPaul E. McKenney
1232cdb54c9SMauro Carvalho ChehabNothing special here, we can use a standard RCU hlist deletion.
1242cdb54c9SMauro Carvalho ChehabBut thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
1252cdb54c9SMauro Carvalho Chehabvery very fast (before the end of RCU grace period)
1262cdb54c9SMauro Carvalho Chehab
1272cdb54c9SMauro Carvalho Chehab::
1282cdb54c9SMauro Carvalho Chehab
1292cdb54c9SMauro Carvalho Chehab  if (put_last_reference_on(obj) {
1302cdb54c9SMauro Carvalho Chehab    lock_chain(); // typically a spin_lock()
1312cdb54c9SMauro Carvalho Chehab    hlist_del_init_rcu(&obj->obj_node);
1322cdb54c9SMauro Carvalho Chehab    unlock_chain(); // typically a spin_unlock()
1332cdb54c9SMauro Carvalho Chehab    kmem_cache_free(cachep, obj);
1342cdb54c9SMauro Carvalho Chehab  }
1352cdb54c9SMauro Carvalho Chehab
1362cdb54c9SMauro Carvalho Chehab
1372cdb54c9SMauro Carvalho Chehab
1382cdb54c9SMauro Carvalho Chehab--------------------------------------------------------------------------
1392cdb54c9SMauro Carvalho Chehab
1402d9c318bSMauro Carvalho ChehabAvoiding extra smp_rmb()
1412d9c318bSMauro Carvalho Chehab========================
1422d9c318bSMauro Carvalho Chehab
143*47d63d7aSSeongJae ParkWith hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().
1442cdb54c9SMauro Carvalho Chehab
1452cdb54c9SMauro Carvalho ChehabFor example, if we choose to store the slot number as the 'nulls'
1462cdb54c9SMauro Carvalho Chehabend-of-list marker for each slot of the hash table, we can detect
1472cdb54c9SMauro Carvalho Chehaba race (some writer did a delete and/or a move of an object
1482cdb54c9SMauro Carvalho Chehabto another chain) checking the final 'nulls' value if
1492cdb54c9SMauro Carvalho Chehabthe lookup met the end of chain. If final 'nulls' value
1502cdb54c9SMauro Carvalho Chehabis not the slot number, then we must restart the lookup at
1512cdb54c9SMauro Carvalho Chehabthe beginning. If the object was moved to the same chain,
152da82af04SPaul E. McKenneythen the reader doesn't care: It might occasionally
1532cdb54c9SMauro Carvalho Chehabscan the list again without harm.
1542cdb54c9SMauro Carvalho Chehab
1555326caa7SSeongJae ParkNote that using hlist_nulls means the type of 'obj_node' field of
1565326caa7SSeongJae Park'struct object' becomes 'struct hlist_nulls_node'.
1575326caa7SSeongJae Park
1582cdb54c9SMauro Carvalho Chehab
159da82af04SPaul E. McKenney1) lookup algorithm
160da82af04SPaul E. McKenney-------------------
1612cdb54c9SMauro Carvalho Chehab
1622cdb54c9SMauro Carvalho Chehab::
1632cdb54c9SMauro Carvalho Chehab
1642cdb54c9SMauro Carvalho Chehab  head = &table[slot];
1652cdb54c9SMauro Carvalho Chehab  begin:
166da82af04SPaul E. McKenney  rcu_read_lock();
167d186204aSSeongJae Park  hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
1682cdb54c9SMauro Carvalho Chehab    if (obj->key == key) {
169da82af04SPaul E. McKenney      if (!try_get_ref(obj)) { // might fail for free objects
170da82af04SPaul E. McKenney	rcu_read_unlock();
1712cdb54c9SMauro Carvalho Chehab        goto begin;
172da82af04SPaul E. McKenney      }
1732cdb54c9SMauro Carvalho Chehab      if (obj->key != key) { // not the object we expected
1742cdb54c9SMauro Carvalho Chehab        put_ref(obj);
175da82af04SPaul E. McKenney	rcu_read_unlock();
1762cdb54c9SMauro Carvalho Chehab        goto begin;
1772cdb54c9SMauro Carvalho Chehab      }
1782cdb54c9SMauro Carvalho Chehab      goto out;
1792cdb54c9SMauro Carvalho Chehab    }
180da82af04SPaul E. McKenney  }
181da82af04SPaul E. McKenney
182da82af04SPaul E. McKenney  // If the nulls value we got at the end of this lookup is
183da82af04SPaul E. McKenney  // not the expected one, we must restart lookup.
184da82af04SPaul E. McKenney  // We probably met an item that was moved to another chain.
185da82af04SPaul E. McKenney  if (get_nulls_value(node) != slot) {
186da82af04SPaul E. McKenney    put_ref(obj);
187da82af04SPaul E. McKenney    rcu_read_unlock();
1882cdb54c9SMauro Carvalho Chehab    goto begin;
189da82af04SPaul E. McKenney  }
1902cdb54c9SMauro Carvalho Chehab  obj = NULL;
1912cdb54c9SMauro Carvalho Chehab
1922cdb54c9SMauro Carvalho Chehab  out:
1932cdb54c9SMauro Carvalho Chehab  rcu_read_unlock();
1942cdb54c9SMauro Carvalho Chehab
195da82af04SPaul E. McKenney2) Insert algorithm
196da82af04SPaul E. McKenney-------------------
1972cdb54c9SMauro Carvalho Chehab
198*47d63d7aSSeongJae ParkSame to the above one, but uses hlist_nulls_add_head_rcu() instead of
199*47d63d7aSSeongJae Parkhlist_add_head_rcu().
200*47d63d7aSSeongJae Park
2012cdb54c9SMauro Carvalho Chehab::
2022cdb54c9SMauro Carvalho Chehab
2032cdb54c9SMauro Carvalho Chehab  /*
2042cdb54c9SMauro Carvalho Chehab   * Please note that new inserts are done at the head of list,
2052cdb54c9SMauro Carvalho Chehab   * not in the middle or end.
2062cdb54c9SMauro Carvalho Chehab   */
2072cdb54c9SMauro Carvalho Chehab  obj = kmem_cache_alloc(cachep);
2082cdb54c9SMauro Carvalho Chehab  lock_chain(); // typically a spin_lock()
2092cdb54c9SMauro Carvalho Chehab  obj->key = key;
210da82af04SPaul E. McKenney  atomic_set_release(&obj->refcnt, 1); // key before refcnt
2112cdb54c9SMauro Carvalho Chehab  /*
2122cdb54c9SMauro Carvalho Chehab   * insert obj in RCU way (readers might be traversing chain)
2132cdb54c9SMauro Carvalho Chehab   */
2142cdb54c9SMauro Carvalho Chehab  hlist_nulls_add_head_rcu(&obj->obj_node, list);
2152cdb54c9SMauro Carvalho Chehab  unlock_chain(); // typically a spin_unlock()
216