1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Ldisc rw semaphore 4 * 5 * The ldisc semaphore is semantically a rw_semaphore but which enforces 6 * an alternate policy, namely: 7 * 1) Supports lock wait timeouts 8 * 2) Write waiter has priority 9 * 3) Downgrading is not supported 10 * 11 * Implementation notes: 12 * 1) Upper half of semaphore count is a wait count (differs from rwsem 13 * in that rwsem normalizes the upper half to the wait bias) 14 * 2) Lacks overflow checking 15 * 16 * The generic counting was copied and modified from include/asm-generic/rwsem.h 17 * by Paul Mackerras <paulus@samba.org>. 18 * 19 * The scheduling policy was copied and modified from lib/rwsem.c 20 * Written by David Howells (dhowells@redhat.com). 21 * 22 * This implementation incorporates the write lock stealing work of 23 * Michel Lespinasse <walken@google.com>. 24 * 25 * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> 26 */ 27 28 #include <linux/list.h> 29 #include <linux/spinlock.h> 30 #include <linux/atomic.h> 31 #include <linux/tty.h> 32 #include <linux/sched.h> 33 #include <linux/sched/debug.h> 34 #include <linux/sched/task.h> 35 36 37 #ifdef CONFIG_DEBUG_LOCK_ALLOC 38 # define __acq(l, s, t, r, c, n, i) \ 39 lock_acquire(&(l)->dep_map, s, t, r, c, n, i) 40 # define __rel(l, n, i) \ 41 lock_release(&(l)->dep_map, n, i) 42 #define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) 43 #define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) 44 #define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) 45 #define lockdep_release(l, n, i) __rel(l, n, i) 46 #else 47 # define lockdep_acquire(l, s, t, i) do { } while (0) 48 # define lockdep_acquire_nest(l, s, t, n, i) do { } while (0) 49 # define lockdep_acquire_read(l, s, t, i) do { } while (0) 50 # define lockdep_release(l, n, i) do { } while (0) 51 #endif 52 53 #ifdef CONFIG_LOCK_STAT 54 # define lock_stat(_lock, stat) lock_##stat(&(_lock)->dep_map, _RET_IP_) 55 #else 56 # define lock_stat(_lock, stat) do { } while (0) 57 #endif 58 59 60 #if BITS_PER_LONG == 64 61 # define LDSEM_ACTIVE_MASK 0xffffffffL 62 #else 63 # define LDSEM_ACTIVE_MASK 0x0000ffffL 64 #endif 65 66 #define LDSEM_UNLOCKED 0L 67 #define LDSEM_ACTIVE_BIAS 1L 68 #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) 69 #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS 70 #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) 71 72 struct ldsem_waiter { 73 struct list_head list; 74 struct task_struct *task; 75 }; 76 77 /* 78 * Initialize an ldsem: 79 */ 80 void __init_ldsem(struct ld_semaphore *sem, const char *name, 81 struct lock_class_key *key) 82 { 83 #ifdef CONFIG_DEBUG_LOCK_ALLOC 84 /* 85 * Make sure we are not reinitializing a held semaphore: 86 */ 87 debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 88 lockdep_init_map(&sem->dep_map, name, key, 0); 89 #endif 90 atomic_long_set(&sem->count, LDSEM_UNLOCKED); 91 sem->wait_readers = 0; 92 raw_spin_lock_init(&sem->wait_lock); 93 INIT_LIST_HEAD(&sem->read_wait); 94 INIT_LIST_HEAD(&sem->write_wait); 95 } 96 97 static void __ldsem_wake_readers(struct ld_semaphore *sem) 98 { 99 struct ldsem_waiter *waiter, *next; 100 struct task_struct *tsk; 101 long adjust, count; 102 103 /* 104 * Try to grant read locks to all readers on the read wait list. 105 * Note the 'active part' of the count is incremented by 106 * the number of readers before waking any processes up. 107 */ 108 adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); 109 count = atomic_long_add_return(adjust, &sem->count); 110 do { 111 if (count > 0) 112 break; 113 if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) 114 return; 115 } while (1); 116 117 list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { 118 tsk = waiter->task; 119 smp_mb(); 120 waiter->task = NULL; 121 wake_up_process(tsk); 122 put_task_struct(tsk); 123 } 124 INIT_LIST_HEAD(&sem->read_wait); 125 sem->wait_readers = 0; 126 } 127 128 static inline int writer_trylock(struct ld_semaphore *sem) 129 { 130 /* 131 * Only wake this writer if the active part of the count can be 132 * transitioned from 0 -> 1 133 */ 134 long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); 135 do { 136 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) 137 return 1; 138 if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) 139 return 0; 140 } while (1); 141 } 142 143 static void __ldsem_wake_writer(struct ld_semaphore *sem) 144 { 145 struct ldsem_waiter *waiter; 146 147 waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); 148 wake_up_process(waiter->task); 149 } 150 151 /* 152 * handle the lock release when processes blocked on it that can now run 153 * - if we come here from up_xxxx(), then: 154 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) 155 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) 156 * - the spinlock must be held by the caller 157 * - woken process blocks are discarded from the list after having task zeroed 158 */ 159 static void __ldsem_wake(struct ld_semaphore *sem) 160 { 161 if (!list_empty(&sem->write_wait)) 162 __ldsem_wake_writer(sem); 163 else if (!list_empty(&sem->read_wait)) 164 __ldsem_wake_readers(sem); 165 } 166 167 static void ldsem_wake(struct ld_semaphore *sem) 168 { 169 unsigned long flags; 170 171 raw_spin_lock_irqsave(&sem->wait_lock, flags); 172 __ldsem_wake(sem); 173 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 174 } 175 176 /* 177 * wait for the read lock to be granted 178 */ 179 static struct ld_semaphore __sched * 180 down_read_failed(struct ld_semaphore *sem, long count, long timeout) 181 { 182 struct ldsem_waiter waiter; 183 long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; 184 185 /* set up my own style of waitqueue */ 186 raw_spin_lock_irq(&sem->wait_lock); 187 188 /* 189 * Try to reverse the lock attempt but if the count has changed 190 * so that reversing fails, check if there are are no waiters, 191 * and early-out if not 192 */ 193 do { 194 if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { 195 count += adjust; 196 break; 197 } 198 if (count > 0) { 199 raw_spin_unlock_irq(&sem->wait_lock); 200 return sem; 201 } 202 } while (1); 203 204 list_add_tail(&waiter.list, &sem->read_wait); 205 sem->wait_readers++; 206 207 waiter.task = current; 208 get_task_struct(current); 209 210 /* if there are no active locks, wake the new lock owner(s) */ 211 if ((count & LDSEM_ACTIVE_MASK) == 0) 212 __ldsem_wake(sem); 213 214 raw_spin_unlock_irq(&sem->wait_lock); 215 216 /* wait to be given the lock */ 217 for (;;) { 218 set_current_state(TASK_UNINTERRUPTIBLE); 219 220 if (!waiter.task) 221 break; 222 if (!timeout) 223 break; 224 timeout = schedule_timeout(timeout); 225 } 226 227 __set_current_state(TASK_RUNNING); 228 229 if (!timeout) { 230 /* 231 * Lock timed out but check if this task was just 232 * granted lock ownership - if so, pretend there 233 * was no timeout; otherwise, cleanup lock wait. 234 */ 235 raw_spin_lock_irq(&sem->wait_lock); 236 if (waiter.task) { 237 atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); 238 list_del(&waiter.list); 239 raw_spin_unlock_irq(&sem->wait_lock); 240 put_task_struct(waiter.task); 241 return NULL; 242 } 243 raw_spin_unlock_irq(&sem->wait_lock); 244 } 245 246 return sem; 247 } 248 249 /* 250 * wait for the write lock to be granted 251 */ 252 static struct ld_semaphore __sched * 253 down_write_failed(struct ld_semaphore *sem, long count, long timeout) 254 { 255 struct ldsem_waiter waiter; 256 long adjust = -LDSEM_ACTIVE_BIAS; 257 int locked = 0; 258 259 /* set up my own style of waitqueue */ 260 raw_spin_lock_irq(&sem->wait_lock); 261 262 /* 263 * Try to reverse the lock attempt but if the count has changed 264 * so that reversing fails, check if the lock is now owned, 265 * and early-out if so. 266 */ 267 do { 268 if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) 269 break; 270 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { 271 raw_spin_unlock_irq(&sem->wait_lock); 272 return sem; 273 } 274 } while (1); 275 276 list_add_tail(&waiter.list, &sem->write_wait); 277 278 waiter.task = current; 279 280 set_current_state(TASK_UNINTERRUPTIBLE); 281 for (;;) { 282 if (!timeout) 283 break; 284 raw_spin_unlock_irq(&sem->wait_lock); 285 timeout = schedule_timeout(timeout); 286 raw_spin_lock_irq(&sem->wait_lock); 287 set_current_state(TASK_UNINTERRUPTIBLE); 288 locked = writer_trylock(sem); 289 if (locked) 290 break; 291 } 292 293 if (!locked) 294 atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); 295 list_del(&waiter.list); 296 raw_spin_unlock_irq(&sem->wait_lock); 297 298 __set_current_state(TASK_RUNNING); 299 300 /* lock wait may have timed out */ 301 if (!locked) 302 return NULL; 303 return sem; 304 } 305 306 307 308 static int __ldsem_down_read_nested(struct ld_semaphore *sem, 309 int subclass, long timeout) 310 { 311 long count; 312 313 lockdep_acquire_read(sem, subclass, 0, _RET_IP_); 314 315 count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); 316 if (count <= 0) { 317 lock_stat(sem, contended); 318 if (!down_read_failed(sem, count, timeout)) { 319 lockdep_release(sem, 1, _RET_IP_); 320 return 0; 321 } 322 } 323 lock_stat(sem, acquired); 324 return 1; 325 } 326 327 static int __ldsem_down_write_nested(struct ld_semaphore *sem, 328 int subclass, long timeout) 329 { 330 long count; 331 332 lockdep_acquire(sem, subclass, 0, _RET_IP_); 333 334 count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); 335 if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { 336 lock_stat(sem, contended); 337 if (!down_write_failed(sem, count, timeout)) { 338 lockdep_release(sem, 1, _RET_IP_); 339 return 0; 340 } 341 } 342 lock_stat(sem, acquired); 343 return 1; 344 } 345 346 347 /* 348 * lock for reading -- returns 1 if successful, 0 if timed out 349 */ 350 int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) 351 { 352 might_sleep(); 353 return __ldsem_down_read_nested(sem, 0, timeout); 354 } 355 356 /* 357 * trylock for reading -- returns 1 if successful, 0 if contention 358 */ 359 int ldsem_down_read_trylock(struct ld_semaphore *sem) 360 { 361 long count = atomic_long_read(&sem->count); 362 363 while (count >= 0) { 364 if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { 365 lockdep_acquire_read(sem, 0, 1, _RET_IP_); 366 lock_stat(sem, acquired); 367 return 1; 368 } 369 } 370 return 0; 371 } 372 373 /* 374 * lock for writing -- returns 1 if successful, 0 if timed out 375 */ 376 int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) 377 { 378 might_sleep(); 379 return __ldsem_down_write_nested(sem, 0, timeout); 380 } 381 382 /* 383 * trylock for writing -- returns 1 if successful, 0 if contention 384 */ 385 int ldsem_down_write_trylock(struct ld_semaphore *sem) 386 { 387 long count = atomic_long_read(&sem->count); 388 389 while ((count & LDSEM_ACTIVE_MASK) == 0) { 390 if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) { 391 lockdep_acquire(sem, 0, 1, _RET_IP_); 392 lock_stat(sem, acquired); 393 return 1; 394 } 395 } 396 return 0; 397 } 398 399 /* 400 * release a read lock 401 */ 402 void ldsem_up_read(struct ld_semaphore *sem) 403 { 404 long count; 405 406 lockdep_release(sem, 1, _RET_IP_); 407 408 count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); 409 if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) 410 ldsem_wake(sem); 411 } 412 413 /* 414 * release a write lock 415 */ 416 void ldsem_up_write(struct ld_semaphore *sem) 417 { 418 long count; 419 420 lockdep_release(sem, 1, _RET_IP_); 421 422 count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); 423 if (count < 0) 424 ldsem_wake(sem); 425 } 426 427 428 #ifdef CONFIG_DEBUG_LOCK_ALLOC 429 430 int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) 431 { 432 might_sleep(); 433 return __ldsem_down_read_nested(sem, subclass, timeout); 434 } 435 436 int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, 437 long timeout) 438 { 439 might_sleep(); 440 return __ldsem_down_write_nested(sem, subclass, timeout); 441 } 442 443 #endif 444