1a8fcf2a3SKumar Kartikeya Dwivedi // SPDX-License-Identifier: GPL-2.0-or-later 2a8fcf2a3SKumar Kartikeya Dwivedi /* 3a8fcf2a3SKumar Kartikeya Dwivedi * Resilient Queued Spin Lock 4a8fcf2a3SKumar Kartikeya Dwivedi * 5a8fcf2a3SKumar Kartikeya Dwivedi * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. 6a8fcf2a3SKumar Kartikeya Dwivedi * (C) Copyright 2013-2014,2018 Red Hat, Inc. 7a8fcf2a3SKumar Kartikeya Dwivedi * (C) Copyright 2015 Intel Corp. 8a8fcf2a3SKumar Kartikeya Dwivedi * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP 914c48ee8SKumar Kartikeya Dwivedi * (C) Copyright 2024-2025 Meta Platforms, Inc. and affiliates. 10a8fcf2a3SKumar Kartikeya Dwivedi * 11a8fcf2a3SKumar Kartikeya Dwivedi * Authors: Waiman Long <longman@redhat.com> 12a8fcf2a3SKumar Kartikeya Dwivedi * Peter Zijlstra <peterz@infradead.org> 1314c48ee8SKumar Kartikeya Dwivedi * Kumar Kartikeya Dwivedi <memxor@gmail.com> 14a8fcf2a3SKumar Kartikeya Dwivedi */ 15a8fcf2a3SKumar Kartikeya Dwivedi 16a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/smp.h> 17a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/bug.h> 18a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/cpumask.h> 19a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/percpu.h> 20a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/hardirq.h> 21a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/mutex.h> 22a8fcf2a3SKumar Kartikeya Dwivedi #include <linux/prefetch.h> 23a8fcf2a3SKumar Kartikeya Dwivedi #include <asm/byteorder.h> 24a8fcf2a3SKumar Kartikeya Dwivedi #include <asm/qspinlock.h> 25a8fcf2a3SKumar Kartikeya Dwivedi #include <trace/events/lock.h> 2630ff1332SKumar Kartikeya Dwivedi #include <asm/rqspinlock.h> 2714c48ee8SKumar Kartikeya Dwivedi #include <linux/timekeeping.h> 28a8fcf2a3SKumar Kartikeya Dwivedi 29a8fcf2a3SKumar Kartikeya Dwivedi /* 30a8fcf2a3SKumar Kartikeya Dwivedi * Include queued spinlock definitions and statistics code 31a8fcf2a3SKumar Kartikeya Dwivedi */ 32a8fcf2a3SKumar Kartikeya Dwivedi #include "../locking/qspinlock.h" 33a926d099SKumar Kartikeya Dwivedi #include "../locking/lock_events.h" 34*31158ad0SKumar Kartikeya Dwivedi #include "rqspinlock.h" 35a8fcf2a3SKumar Kartikeya Dwivedi 36a8fcf2a3SKumar Kartikeya Dwivedi /* 37a8fcf2a3SKumar Kartikeya Dwivedi * The basic principle of a queue-based spinlock can best be understood 38a8fcf2a3SKumar Kartikeya Dwivedi * by studying a classic queue-based spinlock implementation called the 39a8fcf2a3SKumar Kartikeya Dwivedi * MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalable 40a8fcf2a3SKumar Kartikeya Dwivedi * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and 41a8fcf2a3SKumar Kartikeya Dwivedi * Scott") is available at 42a8fcf2a3SKumar Kartikeya Dwivedi * 43a8fcf2a3SKumar Kartikeya Dwivedi * https://bugzilla.kernel.org/show_bug.cgi?id=206115 44a8fcf2a3SKumar Kartikeya Dwivedi * 45a8fcf2a3SKumar Kartikeya Dwivedi * This queued spinlock implementation is based on the MCS lock, however to 46a8fcf2a3SKumar Kartikeya Dwivedi * make it fit the 4 bytes we assume spinlock_t to be, and preserve its 47a8fcf2a3SKumar Kartikeya Dwivedi * existing API, we must modify it somehow. 48a8fcf2a3SKumar Kartikeya Dwivedi * 49a8fcf2a3SKumar Kartikeya Dwivedi * In particular; where the traditional MCS lock consists of a tail pointer 50a8fcf2a3SKumar Kartikeya Dwivedi * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to 51a8fcf2a3SKumar Kartikeya Dwivedi * unlock the next pending (next->locked), we compress both these: {tail, 52a8fcf2a3SKumar Kartikeya Dwivedi * next->locked} into a single u32 value. 53a8fcf2a3SKumar Kartikeya Dwivedi * 54a8fcf2a3SKumar Kartikeya Dwivedi * Since a spinlock disables recursion of its own context and there is a limit 55a8fcf2a3SKumar Kartikeya Dwivedi * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there 56a8fcf2a3SKumar Kartikeya Dwivedi * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now 57a8fcf2a3SKumar Kartikeya Dwivedi * we can encode the tail by combining the 2-bit nesting level with the cpu 58a8fcf2a3SKumar Kartikeya Dwivedi * number. With one byte for the lock value and 3 bytes for the tail, only a 59a8fcf2a3SKumar Kartikeya Dwivedi * 32-bit word is now needed. Even though we only need 1 bit for the lock, 60a8fcf2a3SKumar Kartikeya Dwivedi * we extend it to a full byte to achieve better performance for architectures 61a8fcf2a3SKumar Kartikeya Dwivedi * that support atomic byte write. 62a8fcf2a3SKumar Kartikeya Dwivedi * 63a8fcf2a3SKumar Kartikeya Dwivedi * We also change the first spinner to spin on the lock bit instead of its 64a8fcf2a3SKumar Kartikeya Dwivedi * node; whereby avoiding the need to carry a node from lock to unlock, and 65a8fcf2a3SKumar Kartikeya Dwivedi * preserving existing lock API. This also makes the unlock code simpler and 66a8fcf2a3SKumar Kartikeya Dwivedi * faster. 67a8fcf2a3SKumar Kartikeya Dwivedi * 68a8fcf2a3SKumar Kartikeya Dwivedi * N.B. The current implementation only supports architectures that allow 69a8fcf2a3SKumar Kartikeya Dwivedi * atomic operations on smaller 8-bit and 16-bit data types. 70a8fcf2a3SKumar Kartikeya Dwivedi * 71a8fcf2a3SKumar Kartikeya Dwivedi */ 72a8fcf2a3SKumar Kartikeya Dwivedi 73a8fcf2a3SKumar Kartikeya Dwivedi #include "../locking/mcs_spinlock.h" 74a8fcf2a3SKumar Kartikeya Dwivedi 7514c48ee8SKumar Kartikeya Dwivedi struct rqspinlock_timeout { 7614c48ee8SKumar Kartikeya Dwivedi u64 timeout_end; 7714c48ee8SKumar Kartikeya Dwivedi u64 duration; 78*31158ad0SKumar Kartikeya Dwivedi u64 cur; 7914c48ee8SKumar Kartikeya Dwivedi u16 spin; 8014c48ee8SKumar Kartikeya Dwivedi }; 8114c48ee8SKumar Kartikeya Dwivedi 82164c2465SKumar Kartikeya Dwivedi #define RES_TIMEOUT_VAL 2 83164c2465SKumar Kartikeya Dwivedi 84*31158ad0SKumar Kartikeya Dwivedi DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks); 85*31158ad0SKumar Kartikeya Dwivedi EXPORT_SYMBOL_GPL(rqspinlock_held_locks); 86*31158ad0SKumar Kartikeya Dwivedi 87*31158ad0SKumar Kartikeya Dwivedi static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) 88*31158ad0SKumar Kartikeya Dwivedi { 89*31158ad0SKumar Kartikeya Dwivedi if (!(atomic_read_acquire(&lock->val) & (mask))) 90*31158ad0SKumar Kartikeya Dwivedi return true; 91*31158ad0SKumar Kartikeya Dwivedi return false; 92*31158ad0SKumar Kartikeya Dwivedi } 93*31158ad0SKumar Kartikeya Dwivedi 94*31158ad0SKumar Kartikeya Dwivedi static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask, 95*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_timeout *ts) 96*31158ad0SKumar Kartikeya Dwivedi { 97*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); 98*31158ad0SKumar Kartikeya Dwivedi int cnt = min(RES_NR_HELD, rqh->cnt); 99*31158ad0SKumar Kartikeya Dwivedi 100*31158ad0SKumar Kartikeya Dwivedi /* 101*31158ad0SKumar Kartikeya Dwivedi * Return an error if we hold the lock we are attempting to acquire. 102*31158ad0SKumar Kartikeya Dwivedi * We'll iterate over max 32 locks; no need to do is_lock_released. 103*31158ad0SKumar Kartikeya Dwivedi */ 104*31158ad0SKumar Kartikeya Dwivedi for (int i = 0; i < cnt - 1; i++) { 105*31158ad0SKumar Kartikeya Dwivedi if (rqh->locks[i] == lock) 106*31158ad0SKumar Kartikeya Dwivedi return -EDEADLK; 107*31158ad0SKumar Kartikeya Dwivedi } 108*31158ad0SKumar Kartikeya Dwivedi return 0; 109*31158ad0SKumar Kartikeya Dwivedi } 110*31158ad0SKumar Kartikeya Dwivedi 111*31158ad0SKumar Kartikeya Dwivedi /* 112*31158ad0SKumar Kartikeya Dwivedi * This focuses on the most common case of ABBA deadlocks (or ABBA involving 113*31158ad0SKumar Kartikeya Dwivedi * more locks, which reduce to ABBA). This is not exhaustive, and we rely on 114*31158ad0SKumar Kartikeya Dwivedi * timeouts as the final line of defense. 115*31158ad0SKumar Kartikeya Dwivedi */ 116*31158ad0SKumar Kartikeya Dwivedi static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, 117*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_timeout *ts) 118*31158ad0SKumar Kartikeya Dwivedi { 119*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); 120*31158ad0SKumar Kartikeya Dwivedi int rqh_cnt = min(RES_NR_HELD, rqh->cnt); 121*31158ad0SKumar Kartikeya Dwivedi void *remote_lock; 122*31158ad0SKumar Kartikeya Dwivedi int cpu; 123*31158ad0SKumar Kartikeya Dwivedi 124*31158ad0SKumar Kartikeya Dwivedi /* 125*31158ad0SKumar Kartikeya Dwivedi * Find the CPU holding the lock that we want to acquire. If there is a 126*31158ad0SKumar Kartikeya Dwivedi * deadlock scenario, we will read a stable set on the remote CPU and 127*31158ad0SKumar Kartikeya Dwivedi * find the target. This would be a constant time operation instead of 128*31158ad0SKumar Kartikeya Dwivedi * O(NR_CPUS) if we could determine the owning CPU from a lock value, but 129*31158ad0SKumar Kartikeya Dwivedi * that requires increasing the size of the lock word. 130*31158ad0SKumar Kartikeya Dwivedi */ 131*31158ad0SKumar Kartikeya Dwivedi for_each_possible_cpu(cpu) { 132*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_held *rqh_cpu = per_cpu_ptr(&rqspinlock_held_locks, cpu); 133*31158ad0SKumar Kartikeya Dwivedi int real_cnt = READ_ONCE(rqh_cpu->cnt); 134*31158ad0SKumar Kartikeya Dwivedi int cnt = min(RES_NR_HELD, real_cnt); 135*31158ad0SKumar Kartikeya Dwivedi 136*31158ad0SKumar Kartikeya Dwivedi /* 137*31158ad0SKumar Kartikeya Dwivedi * Let's ensure to break out of this loop if the lock is available for 138*31158ad0SKumar Kartikeya Dwivedi * us to potentially acquire. 139*31158ad0SKumar Kartikeya Dwivedi */ 140*31158ad0SKumar Kartikeya Dwivedi if (is_lock_released(lock, mask, ts)) 141*31158ad0SKumar Kartikeya Dwivedi return 0; 142*31158ad0SKumar Kartikeya Dwivedi 143*31158ad0SKumar Kartikeya Dwivedi /* 144*31158ad0SKumar Kartikeya Dwivedi * Skip ourselves, and CPUs whose count is less than 2, as they need at 145*31158ad0SKumar Kartikeya Dwivedi * least one held lock and one acquisition attempt (reflected as top 146*31158ad0SKumar Kartikeya Dwivedi * most entry) to participate in an ABBA deadlock. 147*31158ad0SKumar Kartikeya Dwivedi * 148*31158ad0SKumar Kartikeya Dwivedi * If cnt is more than RES_NR_HELD, it means the current lock being 149*31158ad0SKumar Kartikeya Dwivedi * acquired won't appear in the table, and other locks in the table are 150*31158ad0SKumar Kartikeya Dwivedi * already held, so we can't determine ABBA. 151*31158ad0SKumar Kartikeya Dwivedi */ 152*31158ad0SKumar Kartikeya Dwivedi if (cpu == smp_processor_id() || real_cnt < 2 || real_cnt > RES_NR_HELD) 153*31158ad0SKumar Kartikeya Dwivedi continue; 154*31158ad0SKumar Kartikeya Dwivedi 155*31158ad0SKumar Kartikeya Dwivedi /* 156*31158ad0SKumar Kartikeya Dwivedi * Obtain the entry at the top, this corresponds to the lock the 157*31158ad0SKumar Kartikeya Dwivedi * remote CPU is attempting to acquire in a deadlock situation, 158*31158ad0SKumar Kartikeya Dwivedi * and would be one of the locks we hold on the current CPU. 159*31158ad0SKumar Kartikeya Dwivedi */ 160*31158ad0SKumar Kartikeya Dwivedi remote_lock = READ_ONCE(rqh_cpu->locks[cnt - 1]); 161*31158ad0SKumar Kartikeya Dwivedi /* 162*31158ad0SKumar Kartikeya Dwivedi * If it is NULL, we've raced and cannot determine a deadlock 163*31158ad0SKumar Kartikeya Dwivedi * conclusively, skip this CPU. 164*31158ad0SKumar Kartikeya Dwivedi */ 165*31158ad0SKumar Kartikeya Dwivedi if (!remote_lock) 166*31158ad0SKumar Kartikeya Dwivedi continue; 167*31158ad0SKumar Kartikeya Dwivedi /* 168*31158ad0SKumar Kartikeya Dwivedi * Find if the lock we're attempting to acquire is held by this CPU. 169*31158ad0SKumar Kartikeya Dwivedi * Don't consider the topmost entry, as that must be the latest lock 170*31158ad0SKumar Kartikeya Dwivedi * being held or acquired. For a deadlock, the target CPU must also 171*31158ad0SKumar Kartikeya Dwivedi * attempt to acquire a lock we hold, so for this search only 'cnt - 1' 172*31158ad0SKumar Kartikeya Dwivedi * entries are important. 173*31158ad0SKumar Kartikeya Dwivedi */ 174*31158ad0SKumar Kartikeya Dwivedi for (int i = 0; i < cnt - 1; i++) { 175*31158ad0SKumar Kartikeya Dwivedi if (READ_ONCE(rqh_cpu->locks[i]) != lock) 176*31158ad0SKumar Kartikeya Dwivedi continue; 177*31158ad0SKumar Kartikeya Dwivedi /* 178*31158ad0SKumar Kartikeya Dwivedi * We found our lock as held on the remote CPU. Is the 179*31158ad0SKumar Kartikeya Dwivedi * acquisition attempt on the remote CPU for a lock held 180*31158ad0SKumar Kartikeya Dwivedi * by us? If so, we have a deadlock situation, and need 181*31158ad0SKumar Kartikeya Dwivedi * to recover. 182*31158ad0SKumar Kartikeya Dwivedi */ 183*31158ad0SKumar Kartikeya Dwivedi for (int i = 0; i < rqh_cnt - 1; i++) { 184*31158ad0SKumar Kartikeya Dwivedi if (rqh->locks[i] == remote_lock) 185*31158ad0SKumar Kartikeya Dwivedi return -EDEADLK; 186*31158ad0SKumar Kartikeya Dwivedi } 187*31158ad0SKumar Kartikeya Dwivedi /* 188*31158ad0SKumar Kartikeya Dwivedi * Inconclusive; retry again later. 189*31158ad0SKumar Kartikeya Dwivedi */ 190*31158ad0SKumar Kartikeya Dwivedi return 0; 191*31158ad0SKumar Kartikeya Dwivedi } 192*31158ad0SKumar Kartikeya Dwivedi } 193*31158ad0SKumar Kartikeya Dwivedi return 0; 194*31158ad0SKumar Kartikeya Dwivedi } 195*31158ad0SKumar Kartikeya Dwivedi 196*31158ad0SKumar Kartikeya Dwivedi static noinline int check_deadlock(rqspinlock_t *lock, u32 mask, 197*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_timeout *ts) 198*31158ad0SKumar Kartikeya Dwivedi { 199*31158ad0SKumar Kartikeya Dwivedi int ret; 200*31158ad0SKumar Kartikeya Dwivedi 201*31158ad0SKumar Kartikeya Dwivedi ret = check_deadlock_AA(lock, mask, ts); 202*31158ad0SKumar Kartikeya Dwivedi if (ret) 203*31158ad0SKumar Kartikeya Dwivedi return ret; 204*31158ad0SKumar Kartikeya Dwivedi ret = check_deadlock_ABBA(lock, mask, ts); 205*31158ad0SKumar Kartikeya Dwivedi if (ret) 206*31158ad0SKumar Kartikeya Dwivedi return ret; 207*31158ad0SKumar Kartikeya Dwivedi 208*31158ad0SKumar Kartikeya Dwivedi return 0; 209*31158ad0SKumar Kartikeya Dwivedi } 210*31158ad0SKumar Kartikeya Dwivedi 211*31158ad0SKumar Kartikeya Dwivedi static noinline int check_timeout(rqspinlock_t *lock, u32 mask, 212*31158ad0SKumar Kartikeya Dwivedi struct rqspinlock_timeout *ts) 21314c48ee8SKumar Kartikeya Dwivedi { 21414c48ee8SKumar Kartikeya Dwivedi u64 time = ktime_get_mono_fast_ns(); 215*31158ad0SKumar Kartikeya Dwivedi u64 prev = ts->cur; 21614c48ee8SKumar Kartikeya Dwivedi 21714c48ee8SKumar Kartikeya Dwivedi if (!ts->timeout_end) { 218*31158ad0SKumar Kartikeya Dwivedi ts->cur = time; 21914c48ee8SKumar Kartikeya Dwivedi ts->timeout_end = time + ts->duration; 22014c48ee8SKumar Kartikeya Dwivedi return 0; 22114c48ee8SKumar Kartikeya Dwivedi } 22214c48ee8SKumar Kartikeya Dwivedi 22314c48ee8SKumar Kartikeya Dwivedi if (time > ts->timeout_end) 22414c48ee8SKumar Kartikeya Dwivedi return -ETIMEDOUT; 22514c48ee8SKumar Kartikeya Dwivedi 226*31158ad0SKumar Kartikeya Dwivedi /* 227*31158ad0SKumar Kartikeya Dwivedi * A millisecond interval passed from last time? Trigger deadlock 228*31158ad0SKumar Kartikeya Dwivedi * checks. 229*31158ad0SKumar Kartikeya Dwivedi */ 230*31158ad0SKumar Kartikeya Dwivedi if (prev + NSEC_PER_MSEC < time) { 231*31158ad0SKumar Kartikeya Dwivedi ts->cur = time; 232*31158ad0SKumar Kartikeya Dwivedi return check_deadlock(lock, mask, ts); 233*31158ad0SKumar Kartikeya Dwivedi } 234*31158ad0SKumar Kartikeya Dwivedi 23514c48ee8SKumar Kartikeya Dwivedi return 0; 23614c48ee8SKumar Kartikeya Dwivedi } 23714c48ee8SKumar Kartikeya Dwivedi 238ebababcdSKumar Kartikeya Dwivedi /* 239ebababcdSKumar Kartikeya Dwivedi * Do not amortize with spins when res_smp_cond_load_acquire is defined, 240ebababcdSKumar Kartikeya Dwivedi * as the macro does internal amortization for us. 241ebababcdSKumar Kartikeya Dwivedi */ 242ebababcdSKumar Kartikeya Dwivedi #ifndef res_smp_cond_load_acquire 243*31158ad0SKumar Kartikeya Dwivedi #define RES_CHECK_TIMEOUT(ts, ret, mask) \ 24414c48ee8SKumar Kartikeya Dwivedi ({ \ 24514c48ee8SKumar Kartikeya Dwivedi if (!(ts).spin++) \ 246*31158ad0SKumar Kartikeya Dwivedi (ret) = check_timeout((lock), (mask), &(ts)); \ 24714c48ee8SKumar Kartikeya Dwivedi (ret); \ 24814c48ee8SKumar Kartikeya Dwivedi }) 249ebababcdSKumar Kartikeya Dwivedi #else 250ebababcdSKumar Kartikeya Dwivedi #define RES_CHECK_TIMEOUT(ts, ret, mask) \ 251ebababcdSKumar Kartikeya Dwivedi ({ (ret) = check_timeout(&(ts)); }) 252ebababcdSKumar Kartikeya Dwivedi #endif 25314c48ee8SKumar Kartikeya Dwivedi 25414c48ee8SKumar Kartikeya Dwivedi /* 25514c48ee8SKumar Kartikeya Dwivedi * Initialize the 'spin' member. 256*31158ad0SKumar Kartikeya Dwivedi * Set spin member to 0 to trigger AA/ABBA checks immediately. 25714c48ee8SKumar Kartikeya Dwivedi */ 258*31158ad0SKumar Kartikeya Dwivedi #define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 0; }) 25914c48ee8SKumar Kartikeya Dwivedi 26014c48ee8SKumar Kartikeya Dwivedi /* 26114c48ee8SKumar Kartikeya Dwivedi * We only need to reset 'timeout_end', 'spin' will just wrap around as necessary. 26214c48ee8SKumar Kartikeya Dwivedi * Duration is defined for each spin attempt, so set it here. 26314c48ee8SKumar Kartikeya Dwivedi */ 26414c48ee8SKumar Kartikeya Dwivedi #define RES_RESET_TIMEOUT(ts, _duration) ({ (ts).timeout_end = 0; (ts).duration = _duration; }) 26514c48ee8SKumar Kartikeya Dwivedi 266a8fcf2a3SKumar Kartikeya Dwivedi /* 267a8fcf2a3SKumar Kartikeya Dwivedi * Per-CPU queue node structures; we can never have more than 4 nested 268a8fcf2a3SKumar Kartikeya Dwivedi * contexts: task, softirq, hardirq, nmi. 269a8fcf2a3SKumar Kartikeya Dwivedi * 270a8fcf2a3SKumar Kartikeya Dwivedi * Exactly fits one 64-byte cacheline on a 64-bit architecture. 271a8fcf2a3SKumar Kartikeya Dwivedi */ 272a8fcf2a3SKumar Kartikeya Dwivedi static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]); 273a8fcf2a3SKumar Kartikeya Dwivedi 274ebababcdSKumar Kartikeya Dwivedi #ifndef res_smp_cond_load_acquire 275ebababcdSKumar Kartikeya Dwivedi #define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire(v, c) 276ebababcdSKumar Kartikeya Dwivedi #endif 277ebababcdSKumar Kartikeya Dwivedi 278ebababcdSKumar Kartikeya Dwivedi #define res_atomic_cond_read_acquire(v, c) res_smp_cond_load_acquire(&(v)->counter, (c)) 279ebababcdSKumar Kartikeya Dwivedi 280a8fcf2a3SKumar Kartikeya Dwivedi /** 281a8fcf2a3SKumar Kartikeya Dwivedi * resilient_queued_spin_lock_slowpath - acquire the queued spinlock 282a8fcf2a3SKumar Kartikeya Dwivedi * @lock: Pointer to queued spinlock structure 283a8fcf2a3SKumar Kartikeya Dwivedi * @val: Current value of the queued spinlock 32-bit word 284a8fcf2a3SKumar Kartikeya Dwivedi * 285337ffea5SKumar Kartikeya Dwivedi * Return: 286337ffea5SKumar Kartikeya Dwivedi * * 0 - Lock was acquired successfully. 287*31158ad0SKumar Kartikeya Dwivedi * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock. 288337ffea5SKumar Kartikeya Dwivedi * * -ETIMEDOUT - Lock acquisition failed because of timeout. 289337ffea5SKumar Kartikeya Dwivedi * 290a8fcf2a3SKumar Kartikeya Dwivedi * (queue tail, pending bit, lock value) 291a8fcf2a3SKumar Kartikeya Dwivedi * 292a8fcf2a3SKumar Kartikeya Dwivedi * fast : slow : unlock 293a8fcf2a3SKumar Kartikeya Dwivedi * : : 294a8fcf2a3SKumar Kartikeya Dwivedi * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0) 295a8fcf2a3SKumar Kartikeya Dwivedi * : | ^--------.------. / : 296a8fcf2a3SKumar Kartikeya Dwivedi * : v \ \ | : 297a8fcf2a3SKumar Kartikeya Dwivedi * pending : (0,1,1) +--> (0,1,0) \ | : 298a8fcf2a3SKumar Kartikeya Dwivedi * : | ^--' | | : 299a8fcf2a3SKumar Kartikeya Dwivedi * : v | | : 300a8fcf2a3SKumar Kartikeya Dwivedi * uncontended : (n,x,y) +--> (n,0,0) --' | : 301a8fcf2a3SKumar Kartikeya Dwivedi * queue : | ^--' | : 302a8fcf2a3SKumar Kartikeya Dwivedi * : v | : 303a8fcf2a3SKumar Kartikeya Dwivedi * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : 304a8fcf2a3SKumar Kartikeya Dwivedi * queue : ^--' : 305a8fcf2a3SKumar Kartikeya Dwivedi */ 306337ffea5SKumar Kartikeya Dwivedi int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) 307a8fcf2a3SKumar Kartikeya Dwivedi { 308a8fcf2a3SKumar Kartikeya Dwivedi struct mcs_spinlock *prev, *next, *node; 30914c48ee8SKumar Kartikeya Dwivedi struct rqspinlock_timeout ts; 310337ffea5SKumar Kartikeya Dwivedi int idx, ret = 0; 311a8fcf2a3SKumar Kartikeya Dwivedi u32 old, tail; 312a8fcf2a3SKumar Kartikeya Dwivedi 313a8fcf2a3SKumar Kartikeya Dwivedi BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); 314a8fcf2a3SKumar Kartikeya Dwivedi 31514c48ee8SKumar Kartikeya Dwivedi RES_INIT_TIMEOUT(ts); 31614c48ee8SKumar Kartikeya Dwivedi 317a8fcf2a3SKumar Kartikeya Dwivedi /* 318a8fcf2a3SKumar Kartikeya Dwivedi * Wait for in-progress pending->locked hand-overs with a bounded 319a8fcf2a3SKumar Kartikeya Dwivedi * number of spins so that we guarantee forward progress. 320a8fcf2a3SKumar Kartikeya Dwivedi * 321a8fcf2a3SKumar Kartikeya Dwivedi * 0,1,0 -> 0,0,1 322a8fcf2a3SKumar Kartikeya Dwivedi */ 323a8fcf2a3SKumar Kartikeya Dwivedi if (val == _Q_PENDING_VAL) { 324a8fcf2a3SKumar Kartikeya Dwivedi int cnt = _Q_PENDING_LOOPS; 325a8fcf2a3SKumar Kartikeya Dwivedi val = atomic_cond_read_relaxed(&lock->val, 326a8fcf2a3SKumar Kartikeya Dwivedi (VAL != _Q_PENDING_VAL) || !cnt--); 327a8fcf2a3SKumar Kartikeya Dwivedi } 328a8fcf2a3SKumar Kartikeya Dwivedi 329a8fcf2a3SKumar Kartikeya Dwivedi /* 330a8fcf2a3SKumar Kartikeya Dwivedi * If we observe any contention; queue. 331a8fcf2a3SKumar Kartikeya Dwivedi */ 332a8fcf2a3SKumar Kartikeya Dwivedi if (val & ~_Q_LOCKED_MASK) 333a8fcf2a3SKumar Kartikeya Dwivedi goto queue; 334a8fcf2a3SKumar Kartikeya Dwivedi 335a8fcf2a3SKumar Kartikeya Dwivedi /* 336a8fcf2a3SKumar Kartikeya Dwivedi * trylock || pending 337a8fcf2a3SKumar Kartikeya Dwivedi * 338a8fcf2a3SKumar Kartikeya Dwivedi * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock 339a8fcf2a3SKumar Kartikeya Dwivedi */ 340a8fcf2a3SKumar Kartikeya Dwivedi val = queued_fetch_set_pending_acquire(lock); 341a8fcf2a3SKumar Kartikeya Dwivedi 342a8fcf2a3SKumar Kartikeya Dwivedi /* 343a8fcf2a3SKumar Kartikeya Dwivedi * If we observe contention, there is a concurrent locker. 344a8fcf2a3SKumar Kartikeya Dwivedi * 345a8fcf2a3SKumar Kartikeya Dwivedi * Undo and queue; our setting of PENDING might have made the 346a8fcf2a3SKumar Kartikeya Dwivedi * n,0,0 -> 0,0,0 transition fail and it will now be waiting 347a8fcf2a3SKumar Kartikeya Dwivedi * on @next to become !NULL. 348a8fcf2a3SKumar Kartikeya Dwivedi */ 349a8fcf2a3SKumar Kartikeya Dwivedi if (unlikely(val & ~_Q_LOCKED_MASK)) { 350a8fcf2a3SKumar Kartikeya Dwivedi 351a8fcf2a3SKumar Kartikeya Dwivedi /* Undo PENDING if we set it. */ 352a8fcf2a3SKumar Kartikeya Dwivedi if (!(val & _Q_PENDING_MASK)) 353a8fcf2a3SKumar Kartikeya Dwivedi clear_pending(lock); 354a8fcf2a3SKumar Kartikeya Dwivedi 355a8fcf2a3SKumar Kartikeya Dwivedi goto queue; 356a8fcf2a3SKumar Kartikeya Dwivedi } 357a8fcf2a3SKumar Kartikeya Dwivedi 358a8fcf2a3SKumar Kartikeya Dwivedi /* 359*31158ad0SKumar Kartikeya Dwivedi * Grab an entry in the held locks array, to enable deadlock detection. 360*31158ad0SKumar Kartikeya Dwivedi */ 361*31158ad0SKumar Kartikeya Dwivedi grab_held_lock_entry(lock); 362*31158ad0SKumar Kartikeya Dwivedi 363*31158ad0SKumar Kartikeya Dwivedi /* 364a8fcf2a3SKumar Kartikeya Dwivedi * We're pending, wait for the owner to go away. 365a8fcf2a3SKumar Kartikeya Dwivedi * 366a8fcf2a3SKumar Kartikeya Dwivedi * 0,1,1 -> *,1,0 367a8fcf2a3SKumar Kartikeya Dwivedi * 368a8fcf2a3SKumar Kartikeya Dwivedi * this wait loop must be a load-acquire such that we match the 369a8fcf2a3SKumar Kartikeya Dwivedi * store-release that clears the locked bit and create lock 370a8fcf2a3SKumar Kartikeya Dwivedi * sequentiality; this is because not all 371a8fcf2a3SKumar Kartikeya Dwivedi * clear_pending_set_locked() implementations imply full 372a8fcf2a3SKumar Kartikeya Dwivedi * barriers. 373a8fcf2a3SKumar Kartikeya Dwivedi */ 374337ffea5SKumar Kartikeya Dwivedi if (val & _Q_LOCKED_MASK) { 375337ffea5SKumar Kartikeya Dwivedi RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); 376*31158ad0SKumar Kartikeya Dwivedi res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK)); 377337ffea5SKumar Kartikeya Dwivedi } 378337ffea5SKumar Kartikeya Dwivedi 379337ffea5SKumar Kartikeya Dwivedi if (ret) { 380337ffea5SKumar Kartikeya Dwivedi /* 381337ffea5SKumar Kartikeya Dwivedi * We waited for the locked bit to go back to 0, as the pending 382337ffea5SKumar Kartikeya Dwivedi * waiter, but timed out. We need to clear the pending bit since 383337ffea5SKumar Kartikeya Dwivedi * we own it. Once a stuck owner has been recovered, the lock 384337ffea5SKumar Kartikeya Dwivedi * must be restored to a valid state, hence removing the pending 385337ffea5SKumar Kartikeya Dwivedi * bit is necessary. 386337ffea5SKumar Kartikeya Dwivedi * 387337ffea5SKumar Kartikeya Dwivedi * *,1,* -> *,0,* 388337ffea5SKumar Kartikeya Dwivedi */ 389337ffea5SKumar Kartikeya Dwivedi clear_pending(lock); 390337ffea5SKumar Kartikeya Dwivedi lockevent_inc(rqspinlock_lock_timeout); 391*31158ad0SKumar Kartikeya Dwivedi goto err_release_entry; 392337ffea5SKumar Kartikeya Dwivedi } 393a8fcf2a3SKumar Kartikeya Dwivedi 394a8fcf2a3SKumar Kartikeya Dwivedi /* 395a8fcf2a3SKumar Kartikeya Dwivedi * take ownership and clear the pending bit. 396a8fcf2a3SKumar Kartikeya Dwivedi * 397a8fcf2a3SKumar Kartikeya Dwivedi * 0,1,0 -> 0,0,1 398a8fcf2a3SKumar Kartikeya Dwivedi */ 399a8fcf2a3SKumar Kartikeya Dwivedi clear_pending_set_locked(lock); 400a8fcf2a3SKumar Kartikeya Dwivedi lockevent_inc(lock_pending); 401337ffea5SKumar Kartikeya Dwivedi return 0; 402a8fcf2a3SKumar Kartikeya Dwivedi 403a8fcf2a3SKumar Kartikeya Dwivedi /* 404a8fcf2a3SKumar Kartikeya Dwivedi * End of pending bit optimistic spinning and beginning of MCS 405a8fcf2a3SKumar Kartikeya Dwivedi * queuing. 406a8fcf2a3SKumar Kartikeya Dwivedi */ 407a8fcf2a3SKumar Kartikeya Dwivedi queue: 408a8fcf2a3SKumar Kartikeya Dwivedi lockevent_inc(lock_slowpath); 409*31158ad0SKumar Kartikeya Dwivedi /* 410*31158ad0SKumar Kartikeya Dwivedi * Grab deadlock detection entry for the queue path. 411*31158ad0SKumar Kartikeya Dwivedi */ 412*31158ad0SKumar Kartikeya Dwivedi grab_held_lock_entry(lock); 413*31158ad0SKumar Kartikeya Dwivedi 414a8fcf2a3SKumar Kartikeya Dwivedi node = this_cpu_ptr(&rqnodes[0].mcs); 415a8fcf2a3SKumar Kartikeya Dwivedi idx = node->count++; 416a8fcf2a3SKumar Kartikeya Dwivedi tail = encode_tail(smp_processor_id(), idx); 417a8fcf2a3SKumar Kartikeya Dwivedi 418a8fcf2a3SKumar Kartikeya Dwivedi trace_contention_begin(lock, LCB_F_SPIN); 419a8fcf2a3SKumar Kartikeya Dwivedi 420a8fcf2a3SKumar Kartikeya Dwivedi /* 421a8fcf2a3SKumar Kartikeya Dwivedi * 4 nodes are allocated based on the assumption that there will 422a8fcf2a3SKumar Kartikeya Dwivedi * not be nested NMIs taking spinlocks. That may not be true in 423a8fcf2a3SKumar Kartikeya Dwivedi * some architectures even though the chance of needing more than 424a8fcf2a3SKumar Kartikeya Dwivedi * 4 nodes will still be extremely unlikely. When that happens, 425a8fcf2a3SKumar Kartikeya Dwivedi * we fall back to spinning on the lock directly without using 426a8fcf2a3SKumar Kartikeya Dwivedi * any MCS node. This is not the most elegant solution, but is 427a8fcf2a3SKumar Kartikeya Dwivedi * simple enough. 428a8fcf2a3SKumar Kartikeya Dwivedi */ 429a8fcf2a3SKumar Kartikeya Dwivedi if (unlikely(idx >= _Q_MAX_NODES)) { 430a8fcf2a3SKumar Kartikeya Dwivedi lockevent_inc(lock_no_node); 4313bb15936SKumar Kartikeya Dwivedi RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); 4323bb15936SKumar Kartikeya Dwivedi while (!queued_spin_trylock(lock)) { 433*31158ad0SKumar Kartikeya Dwivedi if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) { 4343bb15936SKumar Kartikeya Dwivedi lockevent_inc(rqspinlock_lock_timeout); 435*31158ad0SKumar Kartikeya Dwivedi goto err_release_node; 4363bb15936SKumar Kartikeya Dwivedi } 437a8fcf2a3SKumar Kartikeya Dwivedi cpu_relax(); 4383bb15936SKumar Kartikeya Dwivedi } 439a8fcf2a3SKumar Kartikeya Dwivedi goto release; 440a8fcf2a3SKumar Kartikeya Dwivedi } 441a8fcf2a3SKumar Kartikeya Dwivedi 442a8fcf2a3SKumar Kartikeya Dwivedi node = grab_mcs_node(node, idx); 443a8fcf2a3SKumar Kartikeya Dwivedi 444a8fcf2a3SKumar Kartikeya Dwivedi /* 445a8fcf2a3SKumar Kartikeya Dwivedi * Keep counts of non-zero index values: 446a8fcf2a3SKumar Kartikeya Dwivedi */ 447a8fcf2a3SKumar Kartikeya Dwivedi lockevent_cond_inc(lock_use_node2 + idx - 1, idx); 448a8fcf2a3SKumar Kartikeya Dwivedi 449a8fcf2a3SKumar Kartikeya Dwivedi /* 450a8fcf2a3SKumar Kartikeya Dwivedi * Ensure that we increment the head node->count before initialising 451a8fcf2a3SKumar Kartikeya Dwivedi * the actual node. If the compiler is kind enough to reorder these 452a8fcf2a3SKumar Kartikeya Dwivedi * stores, then an IRQ could overwrite our assignments. 453a8fcf2a3SKumar Kartikeya Dwivedi */ 454a8fcf2a3SKumar Kartikeya Dwivedi barrier(); 455a8fcf2a3SKumar Kartikeya Dwivedi 456a8fcf2a3SKumar Kartikeya Dwivedi node->locked = 0; 457a8fcf2a3SKumar Kartikeya Dwivedi node->next = NULL; 458a8fcf2a3SKumar Kartikeya Dwivedi 459a8fcf2a3SKumar Kartikeya Dwivedi /* 460a8fcf2a3SKumar Kartikeya Dwivedi * We touched a (possibly) cold cacheline in the per-cpu queue node; 461a8fcf2a3SKumar Kartikeya Dwivedi * attempt the trylock once more in the hope someone let go while we 462a8fcf2a3SKumar Kartikeya Dwivedi * weren't watching. 463a8fcf2a3SKumar Kartikeya Dwivedi */ 464a8fcf2a3SKumar Kartikeya Dwivedi if (queued_spin_trylock(lock)) 465a8fcf2a3SKumar Kartikeya Dwivedi goto release; 466a8fcf2a3SKumar Kartikeya Dwivedi 467a8fcf2a3SKumar Kartikeya Dwivedi /* 468a8fcf2a3SKumar Kartikeya Dwivedi * Ensure that the initialisation of @node is complete before we 469a8fcf2a3SKumar Kartikeya Dwivedi * publish the updated tail via xchg_tail() and potentially link 470a8fcf2a3SKumar Kartikeya Dwivedi * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. 471a8fcf2a3SKumar Kartikeya Dwivedi */ 472a8fcf2a3SKumar Kartikeya Dwivedi smp_wmb(); 473a8fcf2a3SKumar Kartikeya Dwivedi 474a8fcf2a3SKumar Kartikeya Dwivedi /* 475a8fcf2a3SKumar Kartikeya Dwivedi * Publish the updated tail. 476a8fcf2a3SKumar Kartikeya Dwivedi * We have already touched the queueing cacheline; don't bother with 477a8fcf2a3SKumar Kartikeya Dwivedi * pending stuff. 478a8fcf2a3SKumar Kartikeya Dwivedi * 479a8fcf2a3SKumar Kartikeya Dwivedi * p,*,* -> n,*,* 480a8fcf2a3SKumar Kartikeya Dwivedi */ 481a8fcf2a3SKumar Kartikeya Dwivedi old = xchg_tail(lock, tail); 482a8fcf2a3SKumar Kartikeya Dwivedi next = NULL; 483a8fcf2a3SKumar Kartikeya Dwivedi 484a8fcf2a3SKumar Kartikeya Dwivedi /* 485a8fcf2a3SKumar Kartikeya Dwivedi * if there was a previous node; link it and wait until reaching the 486a8fcf2a3SKumar Kartikeya Dwivedi * head of the waitqueue. 487a8fcf2a3SKumar Kartikeya Dwivedi */ 488a8fcf2a3SKumar Kartikeya Dwivedi if (old & _Q_TAIL_MASK) { 489164c2465SKumar Kartikeya Dwivedi int val; 490164c2465SKumar Kartikeya Dwivedi 491a8fcf2a3SKumar Kartikeya Dwivedi prev = decode_tail(old, rqnodes); 492a8fcf2a3SKumar Kartikeya Dwivedi 493a8fcf2a3SKumar Kartikeya Dwivedi /* Link @node into the waitqueue. */ 494a8fcf2a3SKumar Kartikeya Dwivedi WRITE_ONCE(prev->next, node); 495a8fcf2a3SKumar Kartikeya Dwivedi 496164c2465SKumar Kartikeya Dwivedi val = arch_mcs_spin_lock_contended(&node->locked); 497164c2465SKumar Kartikeya Dwivedi if (val == RES_TIMEOUT_VAL) { 498164c2465SKumar Kartikeya Dwivedi ret = -EDEADLK; 499164c2465SKumar Kartikeya Dwivedi goto waitq_timeout; 500164c2465SKumar Kartikeya Dwivedi } 501a8fcf2a3SKumar Kartikeya Dwivedi 502a8fcf2a3SKumar Kartikeya Dwivedi /* 503a8fcf2a3SKumar Kartikeya Dwivedi * While waiting for the MCS lock, the next pointer may have 504a8fcf2a3SKumar Kartikeya Dwivedi * been set by another lock waiter. We optimistically load 505a8fcf2a3SKumar Kartikeya Dwivedi * the next pointer & prefetch the cacheline for writing 506a8fcf2a3SKumar Kartikeya Dwivedi * to reduce latency in the upcoming MCS unlock operation. 507a8fcf2a3SKumar Kartikeya Dwivedi */ 508a8fcf2a3SKumar Kartikeya Dwivedi next = READ_ONCE(node->next); 509a8fcf2a3SKumar Kartikeya Dwivedi if (next) 510a8fcf2a3SKumar Kartikeya Dwivedi prefetchw(next); 511a8fcf2a3SKumar Kartikeya Dwivedi } 512a8fcf2a3SKumar Kartikeya Dwivedi 513a8fcf2a3SKumar Kartikeya Dwivedi /* 514a8fcf2a3SKumar Kartikeya Dwivedi * we're at the head of the waitqueue, wait for the owner & pending to 515a8fcf2a3SKumar Kartikeya Dwivedi * go away. 516a8fcf2a3SKumar Kartikeya Dwivedi * 517a8fcf2a3SKumar Kartikeya Dwivedi * *,x,y -> *,0,0 518a8fcf2a3SKumar Kartikeya Dwivedi * 519a8fcf2a3SKumar Kartikeya Dwivedi * this wait loop must use a load-acquire such that we match the 520a8fcf2a3SKumar Kartikeya Dwivedi * store-release that clears the locked bit and create lock 521a8fcf2a3SKumar Kartikeya Dwivedi * sequentiality; this is because the set_locked() function below 522a8fcf2a3SKumar Kartikeya Dwivedi * does not imply a full barrier. 523164c2465SKumar Kartikeya Dwivedi * 524164c2465SKumar Kartikeya Dwivedi * We use RES_DEF_TIMEOUT * 2 as the duration, as RES_DEF_TIMEOUT is 525164c2465SKumar Kartikeya Dwivedi * meant to span maximum allowed time per critical section, and we may 526164c2465SKumar Kartikeya Dwivedi * have both the owner of the lock and the pending bit waiter ahead of 527164c2465SKumar Kartikeya Dwivedi * us. 528a8fcf2a3SKumar Kartikeya Dwivedi */ 529164c2465SKumar Kartikeya Dwivedi RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2); 530164c2465SKumar Kartikeya Dwivedi val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || 531*31158ad0SKumar Kartikeya Dwivedi RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK)); 532164c2465SKumar Kartikeya Dwivedi 533164c2465SKumar Kartikeya Dwivedi waitq_timeout: 534164c2465SKumar Kartikeya Dwivedi if (ret) { 535164c2465SKumar Kartikeya Dwivedi /* 536164c2465SKumar Kartikeya Dwivedi * If the tail is still pointing to us, then we are the final waiter, 537164c2465SKumar Kartikeya Dwivedi * and are responsible for resetting the tail back to 0. Otherwise, if 538164c2465SKumar Kartikeya Dwivedi * the cmpxchg operation fails, we signal the next waiter to take exit 539164c2465SKumar Kartikeya Dwivedi * and try the same. For a waiter with tail node 'n': 540164c2465SKumar Kartikeya Dwivedi * 541164c2465SKumar Kartikeya Dwivedi * n,*,* -> 0,*,* 542164c2465SKumar Kartikeya Dwivedi * 543164c2465SKumar Kartikeya Dwivedi * When performing cmpxchg for the whole word (NR_CPUS > 16k), it is 544164c2465SKumar Kartikeya Dwivedi * possible locked/pending bits keep changing and we see failures even 545164c2465SKumar Kartikeya Dwivedi * when we remain the head of wait queue. However, eventually, 546164c2465SKumar Kartikeya Dwivedi * pending bit owner will unset the pending bit, and new waiters 547164c2465SKumar Kartikeya Dwivedi * will queue behind us. This will leave the lock owner in 548164c2465SKumar Kartikeya Dwivedi * charge, and it will eventually either set locked bit to 0, or 549164c2465SKumar Kartikeya Dwivedi * leave it as 1, allowing us to make progress. 550164c2465SKumar Kartikeya Dwivedi * 551164c2465SKumar Kartikeya Dwivedi * We terminate the whole wait queue for two reasons. Firstly, 552164c2465SKumar Kartikeya Dwivedi * we eschew per-waiter timeouts with one applied at the head of 553164c2465SKumar Kartikeya Dwivedi * the wait queue. This allows everyone to break out faster 554164c2465SKumar Kartikeya Dwivedi * once we've seen the owner / pending waiter not responding for 555164c2465SKumar Kartikeya Dwivedi * the timeout duration from the head. Secondly, it avoids 556164c2465SKumar Kartikeya Dwivedi * complicated synchronization, because when not leaving in FIFO 557164c2465SKumar Kartikeya Dwivedi * order, prev's next pointer needs to be fixed up etc. 558164c2465SKumar Kartikeya Dwivedi */ 559164c2465SKumar Kartikeya Dwivedi if (!try_cmpxchg_tail(lock, tail, 0)) { 560164c2465SKumar Kartikeya Dwivedi next = smp_cond_load_relaxed(&node->next, VAL); 561164c2465SKumar Kartikeya Dwivedi WRITE_ONCE(next->locked, RES_TIMEOUT_VAL); 562164c2465SKumar Kartikeya Dwivedi } 563164c2465SKumar Kartikeya Dwivedi lockevent_inc(rqspinlock_lock_timeout); 564*31158ad0SKumar Kartikeya Dwivedi goto err_release_node; 565164c2465SKumar Kartikeya Dwivedi } 566a8fcf2a3SKumar Kartikeya Dwivedi 567a8fcf2a3SKumar Kartikeya Dwivedi /* 568a8fcf2a3SKumar Kartikeya Dwivedi * claim the lock: 569a8fcf2a3SKumar Kartikeya Dwivedi * 570a8fcf2a3SKumar Kartikeya Dwivedi * n,0,0 -> 0,0,1 : lock, uncontended 571a8fcf2a3SKumar Kartikeya Dwivedi * *,*,0 -> *,*,1 : lock, contended 572a8fcf2a3SKumar Kartikeya Dwivedi * 573a8fcf2a3SKumar Kartikeya Dwivedi * If the queue head is the only one in the queue (lock value == tail) 574a8fcf2a3SKumar Kartikeya Dwivedi * and nobody is pending, clear the tail code and grab the lock. 575a8fcf2a3SKumar Kartikeya Dwivedi * Otherwise, we only need to grab the lock. 576a8fcf2a3SKumar Kartikeya Dwivedi */ 577a8fcf2a3SKumar Kartikeya Dwivedi 578a8fcf2a3SKumar Kartikeya Dwivedi /* 579a8fcf2a3SKumar Kartikeya Dwivedi * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the 580a8fcf2a3SKumar Kartikeya Dwivedi * above wait condition, therefore any concurrent setting of 581a8fcf2a3SKumar Kartikeya Dwivedi * PENDING will make the uncontended transition fail. 582a8fcf2a3SKumar Kartikeya Dwivedi */ 583a8fcf2a3SKumar Kartikeya Dwivedi if ((val & _Q_TAIL_MASK) == tail) { 584a8fcf2a3SKumar Kartikeya Dwivedi if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) 585a8fcf2a3SKumar Kartikeya Dwivedi goto release; /* No contention */ 586a8fcf2a3SKumar Kartikeya Dwivedi } 587a8fcf2a3SKumar Kartikeya Dwivedi 588a8fcf2a3SKumar Kartikeya Dwivedi /* 589a8fcf2a3SKumar Kartikeya Dwivedi * Either somebody is queued behind us or _Q_PENDING_VAL got set 590a8fcf2a3SKumar Kartikeya Dwivedi * which will then detect the remaining tail and queue behind us 591a8fcf2a3SKumar Kartikeya Dwivedi * ensuring we'll see a @next. 592a8fcf2a3SKumar Kartikeya Dwivedi */ 593a8fcf2a3SKumar Kartikeya Dwivedi set_locked(lock); 594a8fcf2a3SKumar Kartikeya Dwivedi 595a8fcf2a3SKumar Kartikeya Dwivedi /* 596a8fcf2a3SKumar Kartikeya Dwivedi * contended path; wait for next if not observed yet, release. 597a8fcf2a3SKumar Kartikeya Dwivedi */ 598a8fcf2a3SKumar Kartikeya Dwivedi if (!next) 599a8fcf2a3SKumar Kartikeya Dwivedi next = smp_cond_load_relaxed(&node->next, (VAL)); 600a8fcf2a3SKumar Kartikeya Dwivedi 601a8fcf2a3SKumar Kartikeya Dwivedi arch_mcs_spin_unlock_contended(&next->locked); 602a8fcf2a3SKumar Kartikeya Dwivedi 603a8fcf2a3SKumar Kartikeya Dwivedi release: 604a8fcf2a3SKumar Kartikeya Dwivedi trace_contention_end(lock, 0); 605a8fcf2a3SKumar Kartikeya Dwivedi 606a8fcf2a3SKumar Kartikeya Dwivedi /* 607a8fcf2a3SKumar Kartikeya Dwivedi * release the node 608a8fcf2a3SKumar Kartikeya Dwivedi */ 609a8fcf2a3SKumar Kartikeya Dwivedi __this_cpu_dec(rqnodes[0].mcs.count); 610164c2465SKumar Kartikeya Dwivedi return ret; 611*31158ad0SKumar Kartikeya Dwivedi err_release_node: 612*31158ad0SKumar Kartikeya Dwivedi trace_contention_end(lock, ret); 613*31158ad0SKumar Kartikeya Dwivedi __this_cpu_dec(rqnodes[0].mcs.count); 614*31158ad0SKumar Kartikeya Dwivedi err_release_entry: 615*31158ad0SKumar Kartikeya Dwivedi release_held_lock_entry(); 616*31158ad0SKumar Kartikeya Dwivedi return ret; 617a8fcf2a3SKumar Kartikeya Dwivedi } 618a8fcf2a3SKumar Kartikeya Dwivedi EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath); 619