1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/percpu.h> 3 #include <linux/sched.h> 4 #include <linux/osq_lock.h> 5 6 /* 7 * An MCS like lock especially tailored for optimistic spinning for sleeping 8 * lock implementations (mutex, rwsem, etc). 9 * 10 * Using a single mcs node per CPU is safe because sleeping locks should not be 11 * called from interrupt context and we have preemption disabled while 12 * spinning. 13 */ 14 15 struct optimistic_spin_node { 16 struct optimistic_spin_node *next, *prev; 17 int locked; /* 1 if lock acquired */ 18 int cpu; /* encoded CPU # + 1 value */ 19 }; 20 21 static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); 22 23 /* 24 * We use the value 0 to represent "no CPU", thus the encoded value 25 * will be the CPU number incremented by 1. 26 */ 27 static inline int encode_cpu(int cpu_nr) 28 { 29 return cpu_nr + 1; 30 } 31 32 static inline int node_cpu(struct optimistic_spin_node *node) 33 { 34 return node->cpu - 1; 35 } 36 37 static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) 38 { 39 int cpu_nr = encoded_cpu_val - 1; 40 41 return per_cpu_ptr(&osq_node, cpu_nr); 42 } 43 44 /* 45 * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. 46 * Can return NULL in case we were the last queued and we updated @lock instead. 47 * 48 * If osq_lock() is being cancelled there must be a previous node 49 * and 'old_cpu' is its CPU #. 50 * For osq_unlock() there is never a previous node and old_cpu is 51 * set to OSQ_UNLOCKED_VAL. 52 */ 53 static inline struct optimistic_spin_node * 54 osq_wait_next(struct optimistic_spin_queue *lock, 55 struct optimistic_spin_node *node, 56 int old_cpu) 57 { 58 int curr = encode_cpu(smp_processor_id()); 59 60 for (;;) { 61 if (atomic_read(&lock->tail) == curr && 62 atomic_cmpxchg_acquire(&lock->tail, curr, old_cpu) == curr) { 63 /* 64 * We were the last queued, we moved @lock back. @prev 65 * will now observe @lock and will complete its 66 * unlock()/unqueue(). 67 */ 68 return NULL; 69 } 70 71 /* 72 * We must xchg() the @node->next value, because if we were to 73 * leave it in, a concurrent unlock()/unqueue() from 74 * @node->next might complete Step-A and think its @prev is 75 * still valid. 76 * 77 * If the concurrent unlock()/unqueue() wins the race, we'll 78 * wait for either @lock to point to us, through its Step-B, or 79 * wait for a new @node->next from its Step-C. 80 */ 81 if (node->next) { 82 struct optimistic_spin_node *next; 83 84 next = xchg(&node->next, NULL); 85 if (next) 86 return next; 87 } 88 89 cpu_relax(); 90 } 91 } 92 93 bool osq_lock(struct optimistic_spin_queue *lock) 94 { 95 struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); 96 struct optimistic_spin_node *prev, *next; 97 int curr = encode_cpu(smp_processor_id()); 98 int old; 99 100 node->locked = 0; 101 node->next = NULL; 102 node->cpu = curr; 103 104 /* 105 * We need both ACQUIRE (pairs with corresponding RELEASE in 106 * unlock() uncontended, or fastpath) and RELEASE (to publish 107 * the node fields we just initialised) semantics when updating 108 * the lock tail. 109 */ 110 old = atomic_xchg(&lock->tail, curr); 111 if (old == OSQ_UNLOCKED_VAL) 112 return true; 113 114 prev = decode_cpu(old); 115 node->prev = prev; 116 117 /* 118 * osq_lock() unqueue 119 * 120 * node->prev = prev osq_wait_next() 121 * WMB MB 122 * prev->next = node next->prev = prev // unqueue-C 123 * 124 * Here 'node->prev' and 'next->prev' are the same variable and we need 125 * to ensure these stores happen in-order to avoid corrupting the list. 126 */ 127 smp_wmb(); 128 129 WRITE_ONCE(prev->next, node); 130 131 /* 132 * Normally @prev is untouchable after the above store; because at that 133 * moment unlock can proceed and wipe the node element from stack. 134 * 135 * However, since our nodes are static per-cpu storage, we're 136 * guaranteed their existence -- this allows us to apply 137 * cmpxchg in an attempt to undo our queueing. 138 */ 139 140 /* 141 * Wait to acquire the lock or cancellation. Note that need_resched() 142 * will come with an IPI, which will wake smp_cond_load_relaxed() if it 143 * is implemented with a monitor-wait. vcpu_is_preempted() relies on 144 * polling, be careful. 145 */ 146 if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || 147 vcpu_is_preempted(node_cpu(node->prev)))) 148 return true; 149 150 /* unqueue */ 151 /* 152 * Step - A -- stabilize @prev 153 * 154 * Undo our @prev->next assignment; this will make @prev's 155 * unlock()/unqueue() wait for a next pointer since @lock points to us 156 * (or later). 157 */ 158 159 for (;;) { 160 /* 161 * cpu_relax() below implies a compiler barrier which would 162 * prevent this comparison being optimized away. 163 */ 164 if (data_race(prev->next) == node && 165 cmpxchg(&prev->next, node, NULL) == node) 166 break; 167 168 /* 169 * We can only fail the cmpxchg() racing against an unlock(), 170 * in which case we should observe @node->locked becoming 171 * true. 172 */ 173 if (smp_load_acquire(&node->locked)) 174 return true; 175 176 cpu_relax(); 177 178 /* 179 * Or we race against a concurrent unqueue()'s step-B, in which 180 * case its step-C will write us a new @node->prev pointer. 181 */ 182 prev = READ_ONCE(node->prev); 183 } 184 185 /* 186 * Step - B -- stabilize @next 187 * 188 * Similar to unlock(), wait for @node->next or move @lock from @node 189 * back to @prev. 190 */ 191 192 next = osq_wait_next(lock, node, prev->cpu); 193 if (!next) 194 return false; 195 196 /* 197 * Step - C -- unlink 198 * 199 * @prev is stable because its still waiting for a new @prev->next 200 * pointer, @next is stable because our @node->next pointer is NULL and 201 * it will wait in Step-A. 202 */ 203 204 WRITE_ONCE(next->prev, prev); 205 WRITE_ONCE(prev->next, next); 206 207 return false; 208 } 209 210 void osq_unlock(struct optimistic_spin_queue *lock) 211 { 212 struct optimistic_spin_node *node, *next; 213 int curr = encode_cpu(smp_processor_id()); 214 215 /* 216 * Fast path for the uncontended case. 217 */ 218 if (likely(atomic_cmpxchg_release(&lock->tail, curr, 219 OSQ_UNLOCKED_VAL) == curr)) 220 return; 221 222 /* 223 * Second most likely case. 224 */ 225 node = this_cpu_ptr(&osq_node); 226 next = xchg(&node->next, NULL); 227 if (next) { 228 WRITE_ONCE(next->locked, 1); 229 return; 230 } 231 232 next = osq_wait_next(lock, node, OSQ_UNLOCKED_VAL); 233 if (next) 234 WRITE_ONCE(next->locked, 1); 235 } 236