1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Ldisc rw semaphore 4 * 5 * The ldisc semaphore is semantically a rw_semaphore but which enforces 6 * an alternate policy, namely: 7 * 1) Supports lock wait timeouts 8 * 2) Write waiter has priority 9 * 3) Downgrading is not supported 10 * 11 * Implementation notes: 12 * 1) Upper half of semaphore count is a wait count (differs from rwsem 13 * in that rwsem normalizes the upper half to the wait bias) 14 * 2) Lacks overflow checking 15 * 16 * The generic counting was copied and modified from include/asm-generic/rwsem.h 17 * by Paul Mackerras <paulus@samba.org>. 18 * 19 * The scheduling policy was copied and modified from lib/rwsem.c 20 * Written by David Howells (dhowells@redhat.com). 21 * 22 * This implementation incorporates the write lock stealing work of 23 * Michel Lespinasse <walken@google.com>. 24 * 25 * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> 26 * 27 * This file may be redistributed under the terms of the GNU General Public 28 * License v2. 29 */ 30 31 #include <linux/list.h> 32 #include <linux/spinlock.h> 33 #include <linux/atomic.h> 34 #include <linux/tty.h> 35 #include <linux/sched.h> 36 #include <linux/sched/debug.h> 37 #include <linux/sched/task.h> 38 39 40 #ifdef CONFIG_DEBUG_LOCK_ALLOC 41 # define __acq(l, s, t, r, c, n, i) \ 42 lock_acquire(&(l)->dep_map, s, t, r, c, n, i) 43 # define __rel(l, n, i) \ 44 lock_release(&(l)->dep_map, n, i) 45 #define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) 46 #define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) 47 #define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) 48 #define lockdep_release(l, n, i) __rel(l, n, i) 49 #else 50 # define lockdep_acquire(l, s, t, i) do { } while (0) 51 # define lockdep_acquire_nest(l, s, t, n, i) do { } while (0) 52 # define lockdep_acquire_read(l, s, t, i) do { } while (0) 53 # define lockdep_release(l, n, i) do { } while (0) 54 #endif 55 56 #ifdef CONFIG_LOCK_STAT 57 # define lock_stat(_lock, stat) lock_##stat(&(_lock)->dep_map, _RET_IP_) 58 #else 59 # define lock_stat(_lock, stat) do { } while (0) 60 #endif 61 62 63 #if BITS_PER_LONG == 64 64 # define LDSEM_ACTIVE_MASK 0xffffffffL 65 #else 66 # define LDSEM_ACTIVE_MASK 0x0000ffffL 67 #endif 68 69 #define LDSEM_UNLOCKED 0L 70 #define LDSEM_ACTIVE_BIAS 1L 71 #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) 72 #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS 73 #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) 74 75 struct ldsem_waiter { 76 struct list_head list; 77 struct task_struct *task; 78 }; 79 80 static inline long ldsem_atomic_update(long delta, struct ld_semaphore *sem) 81 { 82 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); 83 } 84 85 /* 86 * ldsem_cmpxchg() updates @*old with the last-known sem->count value. 87 * Returns 1 if count was successfully changed; @*old will have @new value. 88 * Returns 0 if count was not changed; @*old will have most recent sem->count 89 */ 90 static inline int ldsem_cmpxchg(long *old, long new, struct ld_semaphore *sem) 91 { 92 long tmp = atomic_long_cmpxchg(&sem->count, *old, new); 93 if (tmp == *old) { 94 *old = new; 95 return 1; 96 } else { 97 *old = tmp; 98 return 0; 99 } 100 } 101 102 /* 103 * Initialize an ldsem: 104 */ 105 void __init_ldsem(struct ld_semaphore *sem, const char *name, 106 struct lock_class_key *key) 107 { 108 #ifdef CONFIG_DEBUG_LOCK_ALLOC 109 /* 110 * Make sure we are not reinitializing a held semaphore: 111 */ 112 debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 113 lockdep_init_map(&sem->dep_map, name, key, 0); 114 #endif 115 sem->count = LDSEM_UNLOCKED; 116 sem->wait_readers = 0; 117 raw_spin_lock_init(&sem->wait_lock); 118 INIT_LIST_HEAD(&sem->read_wait); 119 INIT_LIST_HEAD(&sem->write_wait); 120 } 121 122 static void __ldsem_wake_readers(struct ld_semaphore *sem) 123 { 124 struct ldsem_waiter *waiter, *next; 125 struct task_struct *tsk; 126 long adjust, count; 127 128 /* Try to grant read locks to all readers on the read wait list. 129 * Note the 'active part' of the count is incremented by 130 * the number of readers before waking any processes up. 131 */ 132 adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); 133 count = ldsem_atomic_update(adjust, sem); 134 do { 135 if (count > 0) 136 break; 137 if (ldsem_cmpxchg(&count, count - adjust, sem)) 138 return; 139 } while (1); 140 141 list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { 142 tsk = waiter->task; 143 smp_mb(); 144 waiter->task = NULL; 145 wake_up_process(tsk); 146 put_task_struct(tsk); 147 } 148 INIT_LIST_HEAD(&sem->read_wait); 149 sem->wait_readers = 0; 150 } 151 152 static inline int writer_trylock(struct ld_semaphore *sem) 153 { 154 /* only wake this writer if the active part of the count can be 155 * transitioned from 0 -> 1 156 */ 157 long count = ldsem_atomic_update(LDSEM_ACTIVE_BIAS, sem); 158 do { 159 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) 160 return 1; 161 if (ldsem_cmpxchg(&count, count - LDSEM_ACTIVE_BIAS, sem)) 162 return 0; 163 } while (1); 164 } 165 166 static void __ldsem_wake_writer(struct ld_semaphore *sem) 167 { 168 struct ldsem_waiter *waiter; 169 170 waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); 171 wake_up_process(waiter->task); 172 } 173 174 /* 175 * handle the lock release when processes blocked on it that can now run 176 * - if we come here from up_xxxx(), then: 177 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) 178 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) 179 * - the spinlock must be held by the caller 180 * - woken process blocks are discarded from the list after having task zeroed 181 */ 182 static void __ldsem_wake(struct ld_semaphore *sem) 183 { 184 if (!list_empty(&sem->write_wait)) 185 __ldsem_wake_writer(sem); 186 else if (!list_empty(&sem->read_wait)) 187 __ldsem_wake_readers(sem); 188 } 189 190 static void ldsem_wake(struct ld_semaphore *sem) 191 { 192 unsigned long flags; 193 194 raw_spin_lock_irqsave(&sem->wait_lock, flags); 195 __ldsem_wake(sem); 196 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 197 } 198 199 /* 200 * wait for the read lock to be granted 201 */ 202 static struct ld_semaphore __sched * 203 down_read_failed(struct ld_semaphore *sem, long count, long timeout) 204 { 205 struct ldsem_waiter waiter; 206 long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; 207 208 /* set up my own style of waitqueue */ 209 raw_spin_lock_irq(&sem->wait_lock); 210 211 /* Try to reverse the lock attempt but if the count has changed 212 * so that reversing fails, check if there are are no waiters, 213 * and early-out if not */ 214 do { 215 if (ldsem_cmpxchg(&count, count + adjust, sem)) 216 break; 217 if (count > 0) { 218 raw_spin_unlock_irq(&sem->wait_lock); 219 return sem; 220 } 221 } while (1); 222 223 list_add_tail(&waiter.list, &sem->read_wait); 224 sem->wait_readers++; 225 226 waiter.task = current; 227 get_task_struct(current); 228 229 /* if there are no active locks, wake the new lock owner(s) */ 230 if ((count & LDSEM_ACTIVE_MASK) == 0) 231 __ldsem_wake(sem); 232 233 raw_spin_unlock_irq(&sem->wait_lock); 234 235 /* wait to be given the lock */ 236 for (;;) { 237 set_current_state(TASK_UNINTERRUPTIBLE); 238 239 if (!waiter.task) 240 break; 241 if (!timeout) 242 break; 243 timeout = schedule_timeout(timeout); 244 } 245 246 __set_current_state(TASK_RUNNING); 247 248 if (!timeout) { 249 /* lock timed out but check if this task was just 250 * granted lock ownership - if so, pretend there 251 * was no timeout; otherwise, cleanup lock wait */ 252 raw_spin_lock_irq(&sem->wait_lock); 253 if (waiter.task) { 254 ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem); 255 list_del(&waiter.list); 256 raw_spin_unlock_irq(&sem->wait_lock); 257 put_task_struct(waiter.task); 258 return NULL; 259 } 260 raw_spin_unlock_irq(&sem->wait_lock); 261 } 262 263 return sem; 264 } 265 266 /* 267 * wait for the write lock to be granted 268 */ 269 static struct ld_semaphore __sched * 270 down_write_failed(struct ld_semaphore *sem, long count, long timeout) 271 { 272 struct ldsem_waiter waiter; 273 long adjust = -LDSEM_ACTIVE_BIAS; 274 int locked = 0; 275 276 /* set up my own style of waitqueue */ 277 raw_spin_lock_irq(&sem->wait_lock); 278 279 /* Try to reverse the lock attempt but if the count has changed 280 * so that reversing fails, check if the lock is now owned, 281 * and early-out if so */ 282 do { 283 if (ldsem_cmpxchg(&count, count + adjust, sem)) 284 break; 285 if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { 286 raw_spin_unlock_irq(&sem->wait_lock); 287 return sem; 288 } 289 } while (1); 290 291 list_add_tail(&waiter.list, &sem->write_wait); 292 293 waiter.task = current; 294 295 set_current_state(TASK_UNINTERRUPTIBLE); 296 for (;;) { 297 if (!timeout) 298 break; 299 raw_spin_unlock_irq(&sem->wait_lock); 300 timeout = schedule_timeout(timeout); 301 raw_spin_lock_irq(&sem->wait_lock); 302 set_current_state(TASK_UNINTERRUPTIBLE); 303 locked = writer_trylock(sem); 304 if (locked) 305 break; 306 } 307 308 if (!locked) 309 ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem); 310 list_del(&waiter.list); 311 raw_spin_unlock_irq(&sem->wait_lock); 312 313 __set_current_state(TASK_RUNNING); 314 315 /* lock wait may have timed out */ 316 if (!locked) 317 return NULL; 318 return sem; 319 } 320 321 322 323 static int __ldsem_down_read_nested(struct ld_semaphore *sem, 324 int subclass, long timeout) 325 { 326 long count; 327 328 lockdep_acquire_read(sem, subclass, 0, _RET_IP_); 329 330 count = ldsem_atomic_update(LDSEM_READ_BIAS, sem); 331 if (count <= 0) { 332 lock_stat(sem, contended); 333 if (!down_read_failed(sem, count, timeout)) { 334 lockdep_release(sem, 1, _RET_IP_); 335 return 0; 336 } 337 } 338 lock_stat(sem, acquired); 339 return 1; 340 } 341 342 static int __ldsem_down_write_nested(struct ld_semaphore *sem, 343 int subclass, long timeout) 344 { 345 long count; 346 347 lockdep_acquire(sem, subclass, 0, _RET_IP_); 348 349 count = ldsem_atomic_update(LDSEM_WRITE_BIAS, sem); 350 if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { 351 lock_stat(sem, contended); 352 if (!down_write_failed(sem, count, timeout)) { 353 lockdep_release(sem, 1, _RET_IP_); 354 return 0; 355 } 356 } 357 lock_stat(sem, acquired); 358 return 1; 359 } 360 361 362 /* 363 * lock for reading -- returns 1 if successful, 0 if timed out 364 */ 365 int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) 366 { 367 might_sleep(); 368 return __ldsem_down_read_nested(sem, 0, timeout); 369 } 370 371 /* 372 * trylock for reading -- returns 1 if successful, 0 if contention 373 */ 374 int ldsem_down_read_trylock(struct ld_semaphore *sem) 375 { 376 long count = sem->count; 377 378 while (count >= 0) { 379 if (ldsem_cmpxchg(&count, count + LDSEM_READ_BIAS, sem)) { 380 lockdep_acquire_read(sem, 0, 1, _RET_IP_); 381 lock_stat(sem, acquired); 382 return 1; 383 } 384 } 385 return 0; 386 } 387 388 /* 389 * lock for writing -- returns 1 if successful, 0 if timed out 390 */ 391 int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) 392 { 393 might_sleep(); 394 return __ldsem_down_write_nested(sem, 0, timeout); 395 } 396 397 /* 398 * trylock for writing -- returns 1 if successful, 0 if contention 399 */ 400 int ldsem_down_write_trylock(struct ld_semaphore *sem) 401 { 402 long count = sem->count; 403 404 while ((count & LDSEM_ACTIVE_MASK) == 0) { 405 if (ldsem_cmpxchg(&count, count + LDSEM_WRITE_BIAS, sem)) { 406 lockdep_acquire(sem, 0, 1, _RET_IP_); 407 lock_stat(sem, acquired); 408 return 1; 409 } 410 } 411 return 0; 412 } 413 414 /* 415 * release a read lock 416 */ 417 void ldsem_up_read(struct ld_semaphore *sem) 418 { 419 long count; 420 421 lockdep_release(sem, 1, _RET_IP_); 422 423 count = ldsem_atomic_update(-LDSEM_READ_BIAS, sem); 424 if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) 425 ldsem_wake(sem); 426 } 427 428 /* 429 * release a write lock 430 */ 431 void ldsem_up_write(struct ld_semaphore *sem) 432 { 433 long count; 434 435 lockdep_release(sem, 1, _RET_IP_); 436 437 count = ldsem_atomic_update(-LDSEM_WRITE_BIAS, sem); 438 if (count < 0) 439 ldsem_wake(sem); 440 } 441 442 443 #ifdef CONFIG_DEBUG_LOCK_ALLOC 444 445 int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) 446 { 447 might_sleep(); 448 return __ldsem_down_read_nested(sem, subclass, timeout); 449 } 450 451 int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, 452 long timeout) 453 { 454 might_sleep(); 455 return __ldsem_down_write_nested(sem, subclass, timeout); 456 } 457 458 #endif 459