1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2ed428bfcSPeter Zijlstra /* kernel/rwsem.c: R/W semaphores, public implementation 3ed428bfcSPeter Zijlstra * 4ed428bfcSPeter Zijlstra * Written by David Howells (dhowells@redhat.com). 5ed428bfcSPeter Zijlstra * Derived from asm-i386/semaphore.h 65dec94d4SWaiman Long * 75dec94d4SWaiman Long * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 85dec94d4SWaiman Long * and Michel Lespinasse <walken@google.com> 95dec94d4SWaiman Long * 105dec94d4SWaiman Long * Optimistic spinning by Tim Chen <tim.c.chen@intel.com> 115dec94d4SWaiman Long * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. 125dec94d4SWaiman Long * 134f23dbc1SWaiman Long * Rwsem count bit fields re-definition and rwsem rearchitecture by 144f23dbc1SWaiman Long * Waiman Long <longman@redhat.com> and 154f23dbc1SWaiman Long * Peter Zijlstra <peterz@infradead.org>. 16ed428bfcSPeter Zijlstra */ 17ed428bfcSPeter Zijlstra 18ed428bfcSPeter Zijlstra #include <linux/types.h> 19ed428bfcSPeter Zijlstra #include <linux/kernel.h> 20ed428bfcSPeter Zijlstra #include <linux/sched.h> 215dec94d4SWaiman Long #include <linux/sched/rt.h> 225dec94d4SWaiman Long #include <linux/sched/task.h> 23b17b0153SIngo Molnar #include <linux/sched/debug.h> 245dec94d4SWaiman Long #include <linux/sched/wake_q.h> 255dec94d4SWaiman Long #include <linux/sched/signal.h> 267d43f1ceSWaiman Long #include <linux/sched/clock.h> 27ed428bfcSPeter Zijlstra #include <linux/export.h> 28ed428bfcSPeter Zijlstra #include <linux/rwsem.h> 29ed428bfcSPeter Zijlstra #include <linux/atomic.h> 30ee042be1SNamhyung Kim #include <trace/events/lock.h> 31ed428bfcSPeter Zijlstra 3242254105SThomas Gleixner #ifndef CONFIG_PREEMPT_RT 335dec94d4SWaiman Long #include "lock_events.h" 345dec94d4SWaiman Long 355dec94d4SWaiman Long /* 36617f3ef9SWaiman Long * The least significant 2 bits of the owner value has the following 375dec94d4SWaiman Long * meanings when set. 38d566c786SWaiman Long * - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint) 39617f3ef9SWaiman Long * - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock 405dec94d4SWaiman Long * 41617f3ef9SWaiman Long * When the rwsem is reader-owned and a spinning writer has timed out, 42617f3ef9SWaiman Long * the nonspinnable bit will be set to disable optimistic spinning. 437d43f1ceSWaiman Long 445dec94d4SWaiman Long * When a writer acquires a rwsem, it puts its task_struct pointer 455dec94d4SWaiman Long * into the owner field. It is cleared after an unlock. 465dec94d4SWaiman Long * 475dec94d4SWaiman Long * When a reader acquires a rwsem, it will also puts its task_struct 487d43f1ceSWaiman Long * pointer into the owner field with the RWSEM_READER_OWNED bit set. 497d43f1ceSWaiman Long * On unlock, the owner field will largely be left untouched. So 507d43f1ceSWaiman Long * for a free or reader-owned rwsem, the owner value may contain 517d43f1ceSWaiman Long * information about the last reader that acquires the rwsem. 525dec94d4SWaiman Long * 535dec94d4SWaiman Long * That information may be helpful in debugging cases where the system 545dec94d4SWaiman Long * seems to hang on a reader owned rwsem especially if only one reader 555dec94d4SWaiman Long * is involved. Ideally we would like to track all the readers that own 565dec94d4SWaiman Long * a rwsem, but the overhead is simply too big. 575cfd92e1SWaiman Long * 58617f3ef9SWaiman Long * A fast path reader optimistic lock stealing is supported when the rwsem 59617f3ef9SWaiman Long * is previously owned by a writer and the following conditions are met: 60617f3ef9SWaiman Long * - rwsem is not currently writer owned 61617f3ef9SWaiman Long * - the handoff isn't set. 625dec94d4SWaiman Long */ 635dec94d4SWaiman Long #define RWSEM_READER_OWNED (1UL << 0) 64617f3ef9SWaiman Long #define RWSEM_NONSPINNABLE (1UL << 1) 6502f1082bSWaiman Long #define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE) 665dec94d4SWaiman Long 675dec94d4SWaiman Long #ifdef CONFIG_DEBUG_RWSEMS 685dec94d4SWaiman Long # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ 695dec94d4SWaiman Long if (!debug_locks_silent && \ 70fce45cd4SDavidlohr Bueso WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ 715dec94d4SWaiman Long #c, atomic_long_read(&(sem)->count), \ 72fce45cd4SDavidlohr Bueso (unsigned long) sem->magic, \ 7394a9717bSWaiman Long atomic_long_read(&(sem)->owner), (long)current, \ 745dec94d4SWaiman Long list_empty(&(sem)->wait_list) ? "" : "not ")) \ 755dec94d4SWaiman Long debug_locks_off(); \ 765dec94d4SWaiman Long } while (0) 775dec94d4SWaiman Long #else 785dec94d4SWaiman Long # define DEBUG_RWSEMS_WARN_ON(c, sem) 795dec94d4SWaiman Long #endif 805dec94d4SWaiman Long 815dec94d4SWaiman Long /* 82a15ea1a3SWaiman Long * On 64-bit architectures, the bit definitions of the count are: 835dec94d4SWaiman Long * 845dec94d4SWaiman Long * Bit 0 - writer locked bit 855dec94d4SWaiman Long * Bit 1 - waiters present bit 864f23dbc1SWaiman Long * Bit 2 - lock handoff bit 874f23dbc1SWaiman Long * Bits 3-7 - reserved 88a15ea1a3SWaiman Long * Bits 8-62 - 55-bit reader count 89a15ea1a3SWaiman Long * Bit 63 - read fail bit 90a15ea1a3SWaiman Long * 91a15ea1a3SWaiman Long * On 32-bit architectures, the bit definitions of the count are: 92a15ea1a3SWaiman Long * 93a15ea1a3SWaiman Long * Bit 0 - writer locked bit 94a15ea1a3SWaiman Long * Bit 1 - waiters present bit 95a15ea1a3SWaiman Long * Bit 2 - lock handoff bit 96a15ea1a3SWaiman Long * Bits 3-7 - reserved 97a15ea1a3SWaiman Long * Bits 8-30 - 23-bit reader count 98a15ea1a3SWaiman Long * Bit 31 - read fail bit 99a15ea1a3SWaiman Long * 100a15ea1a3SWaiman Long * It is not likely that the most significant bit (read fail bit) will ever 101a15ea1a3SWaiman Long * be set. This guard bit is still checked anyway in the down_read() fastpath 102a15ea1a3SWaiman Long * just in case we need to use up more of the reader bits for other purpose 103a15ea1a3SWaiman Long * in the future. 1045dec94d4SWaiman Long * 1055dec94d4SWaiman Long * atomic_long_fetch_add() is used to obtain reader lock, whereas 1065dec94d4SWaiman Long * atomic_long_cmpxchg() will be used to obtain writer lock. 1074f23dbc1SWaiman Long * 1084f23dbc1SWaiman Long * There are three places where the lock handoff bit may be set or cleared. 109d257cc8cSWaiman Long * 1) rwsem_mark_wake() for readers -- set, clear 110d257cc8cSWaiman Long * 2) rwsem_try_write_lock() for writers -- set, clear 111d257cc8cSWaiman Long * 3) rwsem_del_waiter() -- clear 1124f23dbc1SWaiman Long * 1134f23dbc1SWaiman Long * For all the above cases, wait_lock will be held. A writer must also 1144f23dbc1SWaiman Long * be the first one in the wait_list to be eligible for setting the handoff 1154f23dbc1SWaiman Long * bit. So concurrent setting/clearing of handoff bit is not possible. 1165dec94d4SWaiman Long */ 1175dec94d4SWaiman Long #define RWSEM_WRITER_LOCKED (1UL << 0) 1185dec94d4SWaiman Long #define RWSEM_FLAG_WAITERS (1UL << 1) 1194f23dbc1SWaiman Long #define RWSEM_FLAG_HANDOFF (1UL << 2) 120a15ea1a3SWaiman Long #define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1)) 1214f23dbc1SWaiman Long 1225dec94d4SWaiman Long #define RWSEM_READER_SHIFT 8 1235dec94d4SWaiman Long #define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT) 1245dec94d4SWaiman Long #define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1)) 1255dec94d4SWaiman Long #define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED 1265dec94d4SWaiman Long #define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK) 1274f23dbc1SWaiman Long #define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\ 128a15ea1a3SWaiman Long RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL) 1295dec94d4SWaiman Long 1305dec94d4SWaiman Long /* 1315dec94d4SWaiman Long * All writes to owner are protected by WRITE_ONCE() to make sure that 1325dec94d4SWaiman Long * store tearing can't happen as optimistic spinners may read and use 1335dec94d4SWaiman Long * the owner value concurrently without lock. Read from owner, however, 1345dec94d4SWaiman Long * may not need READ_ONCE() as long as the pointer value is only used 1355dec94d4SWaiman Long * for comparison and isn't being dereferenced. 13648dfb5d2SGokul krishna Krishnakumar * 13748dfb5d2SGokul krishna Krishnakumar * Both rwsem_{set,clear}_owner() functions should be in the same 13848dfb5d2SGokul krishna Krishnakumar * preempt disable section as the atomic op that changes sem->count. 1395dec94d4SWaiman Long */ 1405dec94d4SWaiman Long static inline void rwsem_set_owner(struct rw_semaphore *sem) 1415dec94d4SWaiman Long { 14248dfb5d2SGokul krishna Krishnakumar lockdep_assert_preemption_disabled(); 14394a9717bSWaiman Long atomic_long_set(&sem->owner, (long)current); 1445dec94d4SWaiman Long } 1455dec94d4SWaiman Long 1465dec94d4SWaiman Long static inline void rwsem_clear_owner(struct rw_semaphore *sem) 1475dec94d4SWaiman Long { 14848dfb5d2SGokul krishna Krishnakumar lockdep_assert_preemption_disabled(); 14994a9717bSWaiman Long atomic_long_set(&sem->owner, 0); 15094a9717bSWaiman Long } 15194a9717bSWaiman Long 15294a9717bSWaiman Long /* 15394a9717bSWaiman Long * Test the flags in the owner field. 15494a9717bSWaiman Long */ 15594a9717bSWaiman Long static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags) 15694a9717bSWaiman Long { 15794a9717bSWaiman Long return atomic_long_read(&sem->owner) & flags; 1585dec94d4SWaiman Long } 1595dec94d4SWaiman Long 1605dec94d4SWaiman Long /* 1615dec94d4SWaiman Long * The task_struct pointer of the last owning reader will be left in 1625dec94d4SWaiman Long * the owner field. 1635dec94d4SWaiman Long * 1645dec94d4SWaiman Long * Note that the owner value just indicates the task has owned the rwsem 1655dec94d4SWaiman Long * previously, it may not be the real owner or one of the real owners 1665dec94d4SWaiman Long * anymore when that field is examined, so take it with a grain of salt. 1675cfd92e1SWaiman Long * 1685cfd92e1SWaiman Long * The reader non-spinnable bit is preserved. 1695dec94d4SWaiman Long */ 1705dec94d4SWaiman Long static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, 1715dec94d4SWaiman Long struct task_struct *owner) 1725dec94d4SWaiman Long { 1735cfd92e1SWaiman Long unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | 174617f3ef9SWaiman Long (atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE); 1755dec94d4SWaiman Long 17694a9717bSWaiman Long atomic_long_set(&sem->owner, val); 1775dec94d4SWaiman Long } 1785dec94d4SWaiman Long 1795dec94d4SWaiman Long static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) 1805dec94d4SWaiman Long { 1815dec94d4SWaiman Long __rwsem_set_reader_owned(sem, current); 1825dec94d4SWaiman Long } 1835dec94d4SWaiman Long 1845dec94d4SWaiman Long /* 18594a9717bSWaiman Long * Return true if the rwsem is owned by a reader. 1865dec94d4SWaiman Long */ 18794a9717bSWaiman Long static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) 1885dec94d4SWaiman Long { 18994a9717bSWaiman Long #ifdef CONFIG_DEBUG_RWSEMS 19094a9717bSWaiman Long /* 19194a9717bSWaiman Long * Check the count to see if it is write-locked. 19294a9717bSWaiman Long */ 19394a9717bSWaiman Long long count = atomic_long_read(&sem->count); 19494a9717bSWaiman Long 19594a9717bSWaiman Long if (count & RWSEM_WRITER_MASK) 19694a9717bSWaiman Long return false; 19794a9717bSWaiman Long #endif 19894a9717bSWaiman Long return rwsem_test_oflags(sem, RWSEM_READER_OWNED); 1995dec94d4SWaiman Long } 2005dec94d4SWaiman Long 2015dec94d4SWaiman Long #ifdef CONFIG_DEBUG_RWSEMS 2025dec94d4SWaiman Long /* 2035dec94d4SWaiman Long * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there 2045dec94d4SWaiman Long * is a task pointer in owner of a reader-owned rwsem, it will be the 2055dec94d4SWaiman Long * real owner or one of the real owners. The only exception is when the 2065dec94d4SWaiman Long * unlock is done by up_read_non_owner(). 2075dec94d4SWaiman Long */ 2085dec94d4SWaiman Long static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) 2095dec94d4SWaiman Long { 21094a9717bSWaiman Long unsigned long val = atomic_long_read(&sem->owner); 21194a9717bSWaiman Long 21294a9717bSWaiman Long while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) { 21394a9717bSWaiman Long if (atomic_long_try_cmpxchg(&sem->owner, &val, 21494a9717bSWaiman Long val & RWSEM_OWNER_FLAGS_MASK)) 21594a9717bSWaiman Long return; 21694a9717bSWaiman Long } 2175dec94d4SWaiman Long } 2185dec94d4SWaiman Long #else 2195dec94d4SWaiman Long static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) 2205dec94d4SWaiman Long { 2215dec94d4SWaiman Long } 2225dec94d4SWaiman Long #endif 2235dec94d4SWaiman Long 2245dec94d4SWaiman Long /* 2257d43f1ceSWaiman Long * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag 2267d43f1ceSWaiman Long * remains set. Otherwise, the operation will be aborted. 2277d43f1ceSWaiman Long */ 2287d43f1ceSWaiman Long static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem) 2297d43f1ceSWaiman Long { 2307d43f1ceSWaiman Long unsigned long owner = atomic_long_read(&sem->owner); 2317d43f1ceSWaiman Long 2327d43f1ceSWaiman Long do { 2337d43f1ceSWaiman Long if (!(owner & RWSEM_READER_OWNED)) 2347d43f1ceSWaiman Long break; 2357d43f1ceSWaiman Long if (owner & RWSEM_NONSPINNABLE) 2367d43f1ceSWaiman Long break; 2377d43f1ceSWaiman Long } while (!atomic_long_try_cmpxchg(&sem->owner, &owner, 2387d43f1ceSWaiman Long owner | RWSEM_NONSPINNABLE)); 2397d43f1ceSWaiman Long } 2407d43f1ceSWaiman Long 241c8fe8b05SWaiman Long static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp) 242a15ea1a3SWaiman Long { 243c8fe8b05SWaiman Long *cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count); 2443379116aSPeter Zijlstra 245c8fe8b05SWaiman Long if (WARN_ON_ONCE(*cntp < 0)) 246a15ea1a3SWaiman Long rwsem_set_nonspinnable(sem); 2473379116aSPeter Zijlstra 248c8fe8b05SWaiman Long if (!(*cntp & RWSEM_READ_FAILED_MASK)) { 2493379116aSPeter Zijlstra rwsem_set_reader_owned(sem); 2503379116aSPeter Zijlstra return true; 2513379116aSPeter Zijlstra } 2523379116aSPeter Zijlstra 2533379116aSPeter Zijlstra return false; 254a15ea1a3SWaiman Long } 255a15ea1a3SWaiman Long 256285c61aeSPeter Zijlstra static inline bool rwsem_write_trylock(struct rw_semaphore *sem) 257285c61aeSPeter Zijlstra { 258285c61aeSPeter Zijlstra long tmp = RWSEM_UNLOCKED_VALUE; 259285c61aeSPeter Zijlstra 260285c61aeSPeter Zijlstra if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) { 261285c61aeSPeter Zijlstra rwsem_set_owner(sem); 2621d61659cSWaiman Long return true; 263285c61aeSPeter Zijlstra } 264285c61aeSPeter Zijlstra 2651d61659cSWaiman Long return false; 266285c61aeSPeter Zijlstra } 267285c61aeSPeter Zijlstra 2687d43f1ceSWaiman Long /* 26994a9717bSWaiman Long * Return just the real task structure pointer of the owner 27094a9717bSWaiman Long */ 27194a9717bSWaiman Long static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem) 27294a9717bSWaiman Long { 27394a9717bSWaiman Long return (struct task_struct *) 27494a9717bSWaiman Long (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK); 27594a9717bSWaiman Long } 27694a9717bSWaiman Long 27794a9717bSWaiman Long /* 27894a9717bSWaiman Long * Return the real task structure pointer of the owner and the embedded 27994a9717bSWaiman Long * flags in the owner. pflags must be non-NULL. 28094a9717bSWaiman Long */ 28194a9717bSWaiman Long static inline struct task_struct * 28294a9717bSWaiman Long rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags) 28394a9717bSWaiman Long { 28494a9717bSWaiman Long unsigned long owner = atomic_long_read(&sem->owner); 28594a9717bSWaiman Long 28694a9717bSWaiman Long *pflags = owner & RWSEM_OWNER_FLAGS_MASK; 28794a9717bSWaiman Long return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK); 28894a9717bSWaiman Long } 28994a9717bSWaiman Long 29094a9717bSWaiman Long /* 2915dec94d4SWaiman Long * Guide to the rw_semaphore's count field. 2925dec94d4SWaiman Long * 2935dec94d4SWaiman Long * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned 2945dec94d4SWaiman Long * by a writer. 2955dec94d4SWaiman Long * 2965dec94d4SWaiman Long * The lock is owned by readers when 2975dec94d4SWaiman Long * (1) the RWSEM_WRITER_LOCKED isn't set in count, 2985dec94d4SWaiman Long * (2) some of the reader bits are set in count, and 2995dec94d4SWaiman Long * (3) the owner field has RWSEM_READ_OWNED bit set. 3005dec94d4SWaiman Long * 3015dec94d4SWaiman Long * Having some reader bits set is not enough to guarantee a readers owned 3025dec94d4SWaiman Long * lock as the readers may be in the process of backing out from the count 3035dec94d4SWaiman Long * and a writer has just released the lock. So another writer may steal 3045dec94d4SWaiman Long * the lock immediately after that. 3055dec94d4SWaiman Long */ 3065dec94d4SWaiman Long 3075dec94d4SWaiman Long /* 3085dec94d4SWaiman Long * Initialize an rwsem: 3095dec94d4SWaiman Long */ 3105dec94d4SWaiman Long void __init_rwsem(struct rw_semaphore *sem, const char *name, 3115dec94d4SWaiman Long struct lock_class_key *key) 3125dec94d4SWaiman Long { 3135dec94d4SWaiman Long #ifdef CONFIG_DEBUG_LOCK_ALLOC 3145dec94d4SWaiman Long /* 3155dec94d4SWaiman Long * Make sure we are not reinitializing a held semaphore: 3165dec94d4SWaiman Long */ 3175dec94d4SWaiman Long debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 318de8f5e4fSPeter Zijlstra lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); 3195dec94d4SWaiman Long #endif 320fce45cd4SDavidlohr Bueso #ifdef CONFIG_DEBUG_RWSEMS 321fce45cd4SDavidlohr Bueso sem->magic = sem; 322fce45cd4SDavidlohr Bueso #endif 3235dec94d4SWaiman Long atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); 3245dec94d4SWaiman Long raw_spin_lock_init(&sem->wait_lock); 3255dec94d4SWaiman Long INIT_LIST_HEAD(&sem->wait_list); 32694a9717bSWaiman Long atomic_long_set(&sem->owner, 0L); 3275dec94d4SWaiman Long #ifdef CONFIG_RWSEM_SPIN_ON_OWNER 3285dec94d4SWaiman Long osq_lock_init(&sem->osq); 3295dec94d4SWaiman Long #endif 3305dec94d4SWaiman Long } 3315dec94d4SWaiman Long EXPORT_SYMBOL(__init_rwsem); 3325dec94d4SWaiman Long 3335dec94d4SWaiman Long enum rwsem_waiter_type { 3345dec94d4SWaiman Long RWSEM_WAITING_FOR_WRITE, 3355dec94d4SWaiman Long RWSEM_WAITING_FOR_READ 3365dec94d4SWaiman Long }; 3375dec94d4SWaiman Long 3385dec94d4SWaiman Long struct rwsem_waiter { 3395dec94d4SWaiman Long struct list_head list; 3405dec94d4SWaiman Long struct task_struct *task; 3415dec94d4SWaiman Long enum rwsem_waiter_type type; 3424f23dbc1SWaiman Long unsigned long timeout; 343d257cc8cSWaiman Long bool handoff_set; 3445dec94d4SWaiman Long }; 3454f23dbc1SWaiman Long #define rwsem_first_waiter(sem) \ 3464f23dbc1SWaiman Long list_first_entry(&sem->wait_list, struct rwsem_waiter, list) 3475dec94d4SWaiman Long 3485dec94d4SWaiman Long enum rwsem_wake_type { 3495dec94d4SWaiman Long RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ 3505dec94d4SWaiman Long RWSEM_WAKE_READERS, /* Wake readers only */ 3515dec94d4SWaiman Long RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ 3525dec94d4SWaiman Long }; 3535dec94d4SWaiman Long 3544f23dbc1SWaiman Long /* 3554f23dbc1SWaiman Long * The typical HZ value is either 250 or 1000. So set the minimum waiting 3564f23dbc1SWaiman Long * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait 3574f23dbc1SWaiman Long * queue before initiating the handoff protocol. 3584f23dbc1SWaiman Long */ 3594f23dbc1SWaiman Long #define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250) 3604f23dbc1SWaiman Long 3615dec94d4SWaiman Long /* 362d3681e26SWaiman Long * Magic number to batch-wakeup waiting readers, even when writers are 363d3681e26SWaiman Long * also present in the queue. This both limits the amount of work the 364d3681e26SWaiman Long * waking thread must do and also prevents any potential counter overflow, 365d3681e26SWaiman Long * however unlikely. 366d3681e26SWaiman Long */ 367d3681e26SWaiman Long #define MAX_READERS_WAKEUP 0x100 368d3681e26SWaiman Long 369d257cc8cSWaiman Long static inline void 370d257cc8cSWaiman Long rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) 371d257cc8cSWaiman Long { 372d257cc8cSWaiman Long lockdep_assert_held(&sem->wait_lock); 373d257cc8cSWaiman Long list_add_tail(&waiter->list, &sem->wait_list); 374d257cc8cSWaiman Long /* caller will set RWSEM_FLAG_WAITERS */ 375d257cc8cSWaiman Long } 376d257cc8cSWaiman Long 377d257cc8cSWaiman Long /* 378d257cc8cSWaiman Long * Remove a waiter from the wait_list and clear flags. 379d257cc8cSWaiman Long * 380d257cc8cSWaiman Long * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of 381d257cc8cSWaiman Long * this function. Modify with care. 3821ee32619SWaiman Long * 3831ee32619SWaiman Long * Return: true if wait_list isn't empty and false otherwise 384d257cc8cSWaiman Long */ 3851ee32619SWaiman Long static inline bool 386d257cc8cSWaiman Long rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) 387d257cc8cSWaiman Long { 388d257cc8cSWaiman Long lockdep_assert_held(&sem->wait_lock); 389d257cc8cSWaiman Long list_del(&waiter->list); 390d257cc8cSWaiman Long if (likely(!list_empty(&sem->wait_list))) 3911ee32619SWaiman Long return true; 392d257cc8cSWaiman Long 393d257cc8cSWaiman Long atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); 3941ee32619SWaiman Long return false; 395d257cc8cSWaiman Long } 396d257cc8cSWaiman Long 397d3681e26SWaiman Long /* 3985dec94d4SWaiman Long * handle the lock release when processes blocked on it that can now run 3995dec94d4SWaiman Long * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must 4005dec94d4SWaiman Long * have been set. 4015dec94d4SWaiman Long * - there must be someone on the queue 4025dec94d4SWaiman Long * - the wait_lock must be held by the caller 4035dec94d4SWaiman Long * - tasks are marked for wakeup, the caller must later invoke wake_up_q() 4045dec94d4SWaiman Long * to actually wakeup the blocked task(s) and drop the reference count, 4055dec94d4SWaiman Long * preferably when the wait_lock is released 4065dec94d4SWaiman Long * - woken process blocks are discarded from the list after having task zeroed 4075dec94d4SWaiman Long * - writers are only marked woken if downgrading is false 408d257cc8cSWaiman Long * 409d257cc8cSWaiman Long * Implies rwsem_del_waiter() for all woken readers. 4105dec94d4SWaiman Long */ 4116cef7ff6SWaiman Long static void rwsem_mark_wake(struct rw_semaphore *sem, 4125dec94d4SWaiman Long enum rwsem_wake_type wake_type, 4135dec94d4SWaiman Long struct wake_q_head *wake_q) 4145dec94d4SWaiman Long { 4155dec94d4SWaiman Long struct rwsem_waiter *waiter, *tmp; 4165dec94d4SWaiman Long long oldcount, woken = 0, adjustment = 0; 4175dec94d4SWaiman Long struct list_head wlist; 4185dec94d4SWaiman Long 4194f23dbc1SWaiman Long lockdep_assert_held(&sem->wait_lock); 4204f23dbc1SWaiman Long 4215dec94d4SWaiman Long /* 4225dec94d4SWaiman Long * Take a peek at the queue head waiter such that we can determine 4235dec94d4SWaiman Long * the wakeup(s) to perform. 4245dec94d4SWaiman Long */ 4254f23dbc1SWaiman Long waiter = rwsem_first_waiter(sem); 4265dec94d4SWaiman Long 4275dec94d4SWaiman Long if (waiter->type == RWSEM_WAITING_FOR_WRITE) { 4285dec94d4SWaiman Long if (wake_type == RWSEM_WAKE_ANY) { 4295dec94d4SWaiman Long /* 4305dec94d4SWaiman Long * Mark writer at the front of the queue for wakeup. 4315dec94d4SWaiman Long * Until the task is actually later awoken later by 4325dec94d4SWaiman Long * the caller, other writers are able to steal it. 4335dec94d4SWaiman Long * Readers, on the other hand, will block as they 4345dec94d4SWaiman Long * will notice the queued writer. 4355dec94d4SWaiman Long */ 4365dec94d4SWaiman Long wake_q_add(wake_q, waiter->task); 4375dec94d4SWaiman Long lockevent_inc(rwsem_wake_writer); 4385dec94d4SWaiman Long } 4395dec94d4SWaiman Long 4405dec94d4SWaiman Long return; 4415dec94d4SWaiman Long } 4425dec94d4SWaiman Long 4435dec94d4SWaiman Long /* 444a15ea1a3SWaiman Long * No reader wakeup if there are too many of them already. 445a15ea1a3SWaiman Long */ 446a15ea1a3SWaiman Long if (unlikely(atomic_long_read(&sem->count) < 0)) 447a15ea1a3SWaiman Long return; 448a15ea1a3SWaiman Long 449a15ea1a3SWaiman Long /* 4505dec94d4SWaiman Long * Writers might steal the lock before we grant it to the next reader. 4515dec94d4SWaiman Long * We prefer to do the first reader grant before counting readers 4525dec94d4SWaiman Long * so we can bail out early if a writer stole the lock. 4535dec94d4SWaiman Long */ 4545dec94d4SWaiman Long if (wake_type != RWSEM_WAKE_READ_OWNED) { 4555cfd92e1SWaiman Long struct task_struct *owner; 4565cfd92e1SWaiman Long 4575dec94d4SWaiman Long adjustment = RWSEM_READER_BIAS; 4585dec94d4SWaiman Long oldcount = atomic_long_fetch_add(adjustment, &sem->count); 4595dec94d4SWaiman Long if (unlikely(oldcount & RWSEM_WRITER_MASK)) { 4604f23dbc1SWaiman Long /* 4614f23dbc1SWaiman Long * When we've been waiting "too" long (for writers 4624f23dbc1SWaiman Long * to give up the lock), request a HANDOFF to 4634f23dbc1SWaiman Long * force the issue. 4644f23dbc1SWaiman Long */ 4656eebd5fbSWaiman Long if (time_after(jiffies, waiter->timeout)) { 4666eebd5fbSWaiman Long if (!(oldcount & RWSEM_FLAG_HANDOFF)) { 4674f23dbc1SWaiman Long adjustment -= RWSEM_FLAG_HANDOFF; 4684f23dbc1SWaiman Long lockevent_inc(rwsem_rlock_handoff); 4694f23dbc1SWaiman Long } 4706eebd5fbSWaiman Long waiter->handoff_set = true; 4716eebd5fbSWaiman Long } 4724f23dbc1SWaiman Long 4734f23dbc1SWaiman Long atomic_long_add(-adjustment, &sem->count); 4745dec94d4SWaiman Long return; 4755dec94d4SWaiman Long } 4765dec94d4SWaiman Long /* 4775dec94d4SWaiman Long * Set it to reader-owned to give spinners an early 4785dec94d4SWaiman Long * indication that readers now have the lock. 4795cfd92e1SWaiman Long * The reader nonspinnable bit seen at slowpath entry of 4805cfd92e1SWaiman Long * the reader is copied over. 4815dec94d4SWaiman Long */ 4825cfd92e1SWaiman Long owner = waiter->task; 4835cfd92e1SWaiman Long __rwsem_set_reader_owned(sem, owner); 4845dec94d4SWaiman Long } 4855dec94d4SWaiman Long 4865dec94d4SWaiman Long /* 487d3681e26SWaiman Long * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the 488d3681e26SWaiman Long * queue. We know that the woken will be at least 1 as we accounted 4895dec94d4SWaiman Long * for above. Note we increment the 'active part' of the count by the 4905dec94d4SWaiman Long * number of readers before waking any processes up. 4915dec94d4SWaiman Long * 492d3681e26SWaiman Long * This is an adaptation of the phase-fair R/W locks where at the 493d3681e26SWaiman Long * reader phase (first waiter is a reader), all readers are eligible 494d3681e26SWaiman Long * to acquire the lock at the same time irrespective of their order 495d3681e26SWaiman Long * in the queue. The writers acquire the lock according to their 496d3681e26SWaiman Long * order in the queue. 497d3681e26SWaiman Long * 4985dec94d4SWaiman Long * We have to do wakeup in 2 passes to prevent the possibility that 4995dec94d4SWaiman Long * the reader count may be decremented before it is incremented. It 5005dec94d4SWaiman Long * is because the to-be-woken waiter may not have slept yet. So it 5015dec94d4SWaiman Long * may see waiter->task got cleared, finish its critical section and 5025dec94d4SWaiman Long * do an unlock before the reader count increment. 5035dec94d4SWaiman Long * 5045dec94d4SWaiman Long * 1) Collect the read-waiters in a separate list, count them and 5055dec94d4SWaiman Long * fully increment the reader count in rwsem. 5065dec94d4SWaiman Long * 2) For each waiters in the new list, clear waiter->task and 5075dec94d4SWaiman Long * put them into wake_q to be woken up later. 5085dec94d4SWaiman Long */ 509d3681e26SWaiman Long INIT_LIST_HEAD(&wlist); 510d3681e26SWaiman Long list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { 5115dec94d4SWaiman Long if (waiter->type == RWSEM_WAITING_FOR_WRITE) 512d3681e26SWaiman Long continue; 5135dec94d4SWaiman Long 5145dec94d4SWaiman Long woken++; 515d3681e26SWaiman Long list_move_tail(&waiter->list, &wlist); 516d3681e26SWaiman Long 517d3681e26SWaiman Long /* 518d3681e26SWaiman Long * Limit # of readers that can be woken up per wakeup call. 519d3681e26SWaiman Long */ 5205197fcd0SYanfei Xu if (unlikely(woken >= MAX_READERS_WAKEUP)) 521d3681e26SWaiman Long break; 5225dec94d4SWaiman Long } 5235dec94d4SWaiman Long 5245dec94d4SWaiman Long adjustment = woken * RWSEM_READER_BIAS - adjustment; 5255dec94d4SWaiman Long lockevent_cond_inc(rwsem_wake_reader, woken); 5265dec94d4SWaiman Long 527d257cc8cSWaiman Long oldcount = atomic_long_read(&sem->count); 528d257cc8cSWaiman Long if (list_empty(&sem->wait_list)) { 5294f23dbc1SWaiman Long /* 530d257cc8cSWaiman Long * Combined with list_move_tail() above, this implies 531d257cc8cSWaiman Long * rwsem_del_waiter(). 5324f23dbc1SWaiman Long */ 533d257cc8cSWaiman Long adjustment -= RWSEM_FLAG_WAITERS; 534d257cc8cSWaiman Long if (oldcount & RWSEM_FLAG_HANDOFF) 5354f23dbc1SWaiman Long adjustment -= RWSEM_FLAG_HANDOFF; 536d257cc8cSWaiman Long } else if (woken) { 537d257cc8cSWaiman Long /* 538d257cc8cSWaiman Long * When we've woken a reader, we no longer need to force 539d257cc8cSWaiman Long * writers to give up the lock and we can clear HANDOFF. 540d257cc8cSWaiman Long */ 541d257cc8cSWaiman Long if (oldcount & RWSEM_FLAG_HANDOFF) 542d257cc8cSWaiman Long adjustment -= RWSEM_FLAG_HANDOFF; 543d257cc8cSWaiman Long } 5444f23dbc1SWaiman Long 5455dec94d4SWaiman Long if (adjustment) 5465dec94d4SWaiman Long atomic_long_add(adjustment, &sem->count); 5475dec94d4SWaiman Long 5485dec94d4SWaiman Long /* 2nd pass */ 5495dec94d4SWaiman Long list_for_each_entry_safe(waiter, tmp, &wlist, list) { 5505dec94d4SWaiman Long struct task_struct *tsk; 5515dec94d4SWaiman Long 5525dec94d4SWaiman Long tsk = waiter->task; 5535dec94d4SWaiman Long get_task_struct(tsk); 5545dec94d4SWaiman Long 5555dec94d4SWaiman Long /* 5565dec94d4SWaiman Long * Ensure calling get_task_struct() before setting the reader 5576cef7ff6SWaiman Long * waiter to nil such that rwsem_down_read_slowpath() cannot 5585dec94d4SWaiman Long * race with do_exit() by always holding a reference count 5595dec94d4SWaiman Long * to the task to wakeup. 5605dec94d4SWaiman Long */ 5615dec94d4SWaiman Long smp_store_release(&waiter->task, NULL); 5625dec94d4SWaiman Long /* 5635dec94d4SWaiman Long * Ensure issuing the wakeup (either by us or someone else) 5645dec94d4SWaiman Long * after setting the reader waiter to nil. 5655dec94d4SWaiman Long */ 5665dec94d4SWaiman Long wake_q_add_safe(wake_q, tsk); 5675dec94d4SWaiman Long } 5685dec94d4SWaiman Long } 5695dec94d4SWaiman Long 5705dec94d4SWaiman Long /* 5711ee32619SWaiman Long * Remove a waiter and try to wake up other waiters in the wait queue 5721ee32619SWaiman Long * This function is called from the out_nolock path of both the reader and 5731ee32619SWaiman Long * writer slowpaths with wait_lock held. It releases the wait_lock and 5741ee32619SWaiman Long * optionally wake up waiters before it returns. 5751ee32619SWaiman Long */ 5761ee32619SWaiman Long static inline void 5771ee32619SWaiman Long rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, 5781ee32619SWaiman Long struct wake_q_head *wake_q) 5791ee32619SWaiman Long __releases(&sem->wait_lock) 5801ee32619SWaiman Long { 5811ee32619SWaiman Long bool first = rwsem_first_waiter(sem) == waiter; 5821ee32619SWaiman Long 5831ee32619SWaiman Long wake_q_init(wake_q); 5841ee32619SWaiman Long 5851ee32619SWaiman Long /* 5861ee32619SWaiman Long * If the wait_list isn't empty and the waiter to be deleted is 5871ee32619SWaiman Long * the first waiter, we wake up the remaining waiters as they may 5881ee32619SWaiman Long * be eligible to acquire or spin on the lock. 5891ee32619SWaiman Long */ 5901ee32619SWaiman Long if (rwsem_del_waiter(sem, waiter) && first) 5911ee32619SWaiman Long rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q); 5921ee32619SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 5931ee32619SWaiman Long if (!wake_q_empty(wake_q)) 5941ee32619SWaiman Long wake_up_q(wake_q); 5951ee32619SWaiman Long } 5961ee32619SWaiman Long 5971ee32619SWaiman Long /* 5985dec94d4SWaiman Long * This function must be called with the sem->wait_lock held to prevent 5995dec94d4SWaiman Long * race conditions between checking the rwsem wait list and setting the 6005dec94d4SWaiman Long * sem->count accordingly. 6014f23dbc1SWaiman Long * 602d257cc8cSWaiman Long * Implies rwsem_del_waiter() on success. 6035dec94d4SWaiman Long */ 60400f3c5a3SWaiman Long static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, 605d257cc8cSWaiman Long struct rwsem_waiter *waiter) 6065dec94d4SWaiman Long { 6076eebd5fbSWaiman Long struct rwsem_waiter *first = rwsem_first_waiter(sem); 60800f3c5a3SWaiman Long long count, new; 6095dec94d4SWaiman Long 6104f23dbc1SWaiman Long lockdep_assert_held(&sem->wait_lock); 6114f23dbc1SWaiman Long 61200f3c5a3SWaiman Long count = atomic_long_read(&sem->count); 6134f23dbc1SWaiman Long do { 6144f23dbc1SWaiman Long bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); 6154f23dbc1SWaiman Long 616d257cc8cSWaiman Long if (has_handoff) { 6176eebd5fbSWaiman Long /* 6186eebd5fbSWaiman Long * Honor handoff bit and yield only when the first 6196eebd5fbSWaiman Long * waiter is the one that set it. Otherwisee, we 6206eebd5fbSWaiman Long * still try to acquire the rwsem. 6216eebd5fbSWaiman Long */ 6226eebd5fbSWaiman Long if (first->handoff_set && (waiter != first)) 6235dec94d4SWaiman Long return false; 624d257cc8cSWaiman Long } 625d257cc8cSWaiman Long 6264f23dbc1SWaiman Long new = count; 6275dec94d4SWaiman Long 6284f23dbc1SWaiman Long if (count & RWSEM_LOCK_MASK) { 629b613c7f3SWaiman Long /* 630b613c7f3SWaiman Long * A waiter (first or not) can set the handoff bit 631b613c7f3SWaiman Long * if it is an RT task or wait in the wait queue 632b613c7f3SWaiman Long * for too long. 633b613c7f3SWaiman Long */ 634d257cc8cSWaiman Long if (has_handoff || (!rt_task(waiter->task) && 635d257cc8cSWaiman Long !time_after(jiffies, waiter->timeout))) 6364f23dbc1SWaiman Long return false; 6374f23dbc1SWaiman Long 6384f23dbc1SWaiman Long new |= RWSEM_FLAG_HANDOFF; 6394f23dbc1SWaiman Long } else { 6404f23dbc1SWaiman Long new |= RWSEM_WRITER_LOCKED; 6414f23dbc1SWaiman Long new &= ~RWSEM_FLAG_HANDOFF; 6424f23dbc1SWaiman Long 6434f23dbc1SWaiman Long if (list_is_singular(&sem->wait_list)) 6444f23dbc1SWaiman Long new &= ~RWSEM_FLAG_WAITERS; 6454f23dbc1SWaiman Long } 6464f23dbc1SWaiman Long } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)); 6474f23dbc1SWaiman Long 6484f23dbc1SWaiman Long /* 649b613c7f3SWaiman Long * We have either acquired the lock with handoff bit cleared or set 650b613c7f3SWaiman Long * the handoff bit. Only the first waiter can have its handoff_set 651b613c7f3SWaiman Long * set here to enable optimistic spinning in slowpath loop. 6524f23dbc1SWaiman Long */ 653d257cc8cSWaiman Long if (new & RWSEM_FLAG_HANDOFF) { 654b613c7f3SWaiman Long first->handoff_set = true; 655d257cc8cSWaiman Long lockevent_inc(rwsem_wlock_handoff); 6564f23dbc1SWaiman Long return false; 657d257cc8cSWaiman Long } 6584f23dbc1SWaiman Long 659d257cc8cSWaiman Long /* 660d257cc8cSWaiman Long * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on 661d257cc8cSWaiman Long * success. 662d257cc8cSWaiman Long */ 663d257cc8cSWaiman Long list_del(&waiter->list); 6645dec94d4SWaiman Long rwsem_set_owner(sem); 6655dec94d4SWaiman Long return true; 6665dec94d4SWaiman Long } 6675dec94d4SWaiman Long 6687cdacc5fSYanfei Xu /* 6697cdacc5fSYanfei Xu * The rwsem_spin_on_owner() function returns the following 4 values 6707cdacc5fSYanfei Xu * depending on the lock owner state. 6717cdacc5fSYanfei Xu * OWNER_NULL : owner is currently NULL 6727cdacc5fSYanfei Xu * OWNER_WRITER: when owner changes and is a writer 6737cdacc5fSYanfei Xu * OWNER_READER: when owner changes and the new owner may be a reader. 6747cdacc5fSYanfei Xu * OWNER_NONSPINNABLE: 6757cdacc5fSYanfei Xu * when optimistic spinning has to stop because either the 6767cdacc5fSYanfei Xu * owner stops running, is unknown, or its timeslice has 6777cdacc5fSYanfei Xu * been used up. 6787cdacc5fSYanfei Xu */ 6797cdacc5fSYanfei Xu enum owner_state { 6807cdacc5fSYanfei Xu OWNER_NULL = 1 << 0, 6817cdacc5fSYanfei Xu OWNER_WRITER = 1 << 1, 6827cdacc5fSYanfei Xu OWNER_READER = 1 << 2, 6837cdacc5fSYanfei Xu OWNER_NONSPINNABLE = 1 << 3, 6847cdacc5fSYanfei Xu }; 6857cdacc5fSYanfei Xu 6865dec94d4SWaiman Long #ifdef CONFIG_RWSEM_SPIN_ON_OWNER 6875dec94d4SWaiman Long /* 6885dec94d4SWaiman Long * Try to acquire write lock before the writer has been put on wait queue. 6895dec94d4SWaiman Long */ 6905dec94d4SWaiman Long static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) 6915dec94d4SWaiman Long { 6925dec94d4SWaiman Long long count = atomic_long_read(&sem->count); 6935dec94d4SWaiman Long 6944f23dbc1SWaiman Long while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) { 6955dec94d4SWaiman Long if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, 6964f23dbc1SWaiman Long count | RWSEM_WRITER_LOCKED)) { 6975dec94d4SWaiman Long rwsem_set_owner(sem); 698617f3ef9SWaiman Long lockevent_inc(rwsem_opt_lock); 6995dec94d4SWaiman Long return true; 7005dec94d4SWaiman Long } 7015dec94d4SWaiman Long } 7025dec94d4SWaiman Long return false; 7035dec94d4SWaiman Long } 7045dec94d4SWaiman Long 705617f3ef9SWaiman Long static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) 7065dec94d4SWaiman Long { 7075dec94d4SWaiman Long struct task_struct *owner; 70894a9717bSWaiman Long unsigned long flags; 7095dec94d4SWaiman Long bool ret = true; 7105dec94d4SWaiman Long 711cf69482dSWaiman Long if (need_resched()) { 712cf69482dSWaiman Long lockevent_inc(rwsem_opt_fail); 7135dec94d4SWaiman Long return false; 714cf69482dSWaiman Long } 7155dec94d4SWaiman Long 7166c2787f2SYanfei Xu /* 7176c2787f2SYanfei Xu * Disable preemption is equal to the RCU read-side crital section, 7186c2787f2SYanfei Xu * thus the task_strcut structure won't go away. 7196c2787f2SYanfei Xu */ 72094a9717bSWaiman Long owner = rwsem_owner_flags(sem, &flags); 72178134300SWaiman Long /* 72278134300SWaiman Long * Don't check the read-owner as the entry may be stale. 72378134300SWaiman Long */ 724617f3ef9SWaiman Long if ((flags & RWSEM_NONSPINNABLE) || 72578134300SWaiman Long (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner))) 72694a9717bSWaiman Long ret = false; 727cf69482dSWaiman Long 728cf69482dSWaiman Long lockevent_cond_inc(rwsem_opt_fail, !ret); 7295dec94d4SWaiman Long return ret; 7305dec94d4SWaiman Long } 7315dec94d4SWaiman Long 7327d43f1ceSWaiman Long #define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER) 7335dec94d4SWaiman Long 73494a9717bSWaiman Long static inline enum owner_state 735617f3ef9SWaiman Long rwsem_owner_state(struct task_struct *owner, unsigned long flags) 7363f6d517aSWaiman Long { 737617f3ef9SWaiman Long if (flags & RWSEM_NONSPINNABLE) 7383f6d517aSWaiman Long return OWNER_NONSPINNABLE; 7393f6d517aSWaiman Long 74094a9717bSWaiman Long if (flags & RWSEM_READER_OWNED) 7413f6d517aSWaiman Long return OWNER_READER; 7423f6d517aSWaiman Long 74394a9717bSWaiman Long return owner ? OWNER_WRITER : OWNER_NULL; 7443f6d517aSWaiman Long } 7453f6d517aSWaiman Long 7467d43f1ceSWaiman Long static noinline enum owner_state 747617f3ef9SWaiman Long rwsem_spin_on_owner(struct rw_semaphore *sem) 7483f6d517aSWaiman Long { 74994a9717bSWaiman Long struct task_struct *new, *owner; 75094a9717bSWaiman Long unsigned long flags, new_flags; 75194a9717bSWaiman Long enum owner_state state; 7523f6d517aSWaiman Long 7536c2787f2SYanfei Xu lockdep_assert_preemption_disabled(); 7546c2787f2SYanfei Xu 75594a9717bSWaiman Long owner = rwsem_owner_flags(sem, &flags); 756617f3ef9SWaiman Long state = rwsem_owner_state(owner, flags); 7573f6d517aSWaiman Long if (state != OWNER_WRITER) 7583f6d517aSWaiman Long return state; 7595dec94d4SWaiman Long 7603f6d517aSWaiman Long for (;;) { 76191d2a812SWaiman Long /* 76291d2a812SWaiman Long * When a waiting writer set the handoff flag, it may spin 76391d2a812SWaiman Long * on the owner as well. Once that writer acquires the lock, 76491d2a812SWaiman Long * we can spin on it. So we don't need to quit even when the 76591d2a812SWaiman Long * handoff bit is set. 76691d2a812SWaiman Long */ 76794a9717bSWaiman Long new = rwsem_owner_flags(sem, &new_flags); 76894a9717bSWaiman Long if ((new != owner) || (new_flags != flags)) { 769617f3ef9SWaiman Long state = rwsem_owner_state(new, new_flags); 7703f6d517aSWaiman Long break; 7713f6d517aSWaiman Long } 7723f6d517aSWaiman Long 7735dec94d4SWaiman Long /* 7745dec94d4SWaiman Long * Ensure we emit the owner->on_cpu, dereference _after_ 7755dec94d4SWaiman Long * checking sem->owner still matches owner, if that fails, 7765dec94d4SWaiman Long * owner might point to free()d memory, if it still matches, 7776c2787f2SYanfei Xu * our spinning context already disabled preemption which is 7786c2787f2SYanfei Xu * equal to RCU read-side crital section ensures the memory 7796c2787f2SYanfei Xu * stays valid. 7805dec94d4SWaiman Long */ 7815dec94d4SWaiman Long barrier(); 7825dec94d4SWaiman Long 7835dec94d4SWaiman Long if (need_resched() || !owner_on_cpu(owner)) { 7843f6d517aSWaiman Long state = OWNER_NONSPINNABLE; 7853f6d517aSWaiman Long break; 7865dec94d4SWaiman Long } 7875dec94d4SWaiman Long 7885dec94d4SWaiman Long cpu_relax(); 7895dec94d4SWaiman Long } 7905dec94d4SWaiman Long 7913f6d517aSWaiman Long return state; 7925dec94d4SWaiman Long } 7935dec94d4SWaiman Long 7947d43f1ceSWaiman Long /* 7957d43f1ceSWaiman Long * Calculate reader-owned rwsem spinning threshold for writer 7967d43f1ceSWaiman Long * 7977d43f1ceSWaiman Long * The more readers own the rwsem, the longer it will take for them to 7987d43f1ceSWaiman Long * wind down and free the rwsem. So the empirical formula used to 7997d43f1ceSWaiman Long * determine the actual spinning time limit here is: 8007d43f1ceSWaiman Long * 8017d43f1ceSWaiman Long * Spinning threshold = (10 + nr_readers/2)us 8027d43f1ceSWaiman Long * 8037d43f1ceSWaiman Long * The limit is capped to a maximum of 25us (30 readers). This is just 8047d43f1ceSWaiman Long * a heuristic and is subjected to change in the future. 8057d43f1ceSWaiman Long */ 8067d43f1ceSWaiman Long static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem) 8077d43f1ceSWaiman Long { 8087d43f1ceSWaiman Long long count = atomic_long_read(&sem->count); 8097d43f1ceSWaiman Long int readers = count >> RWSEM_READER_SHIFT; 8107d43f1ceSWaiman Long u64 delta; 8117d43f1ceSWaiman Long 8127d43f1ceSWaiman Long if (readers > 30) 8137d43f1ceSWaiman Long readers = 30; 8147d43f1ceSWaiman Long delta = (20 + readers) * NSEC_PER_USEC / 2; 8157d43f1ceSWaiman Long 8167d43f1ceSWaiman Long return sched_clock() + delta; 8177d43f1ceSWaiman Long } 8187d43f1ceSWaiman Long 819617f3ef9SWaiman Long static bool rwsem_optimistic_spin(struct rw_semaphore *sem) 8205dec94d4SWaiman Long { 8215dec94d4SWaiman Long bool taken = false; 822990fa738SWaiman Long int prev_owner_state = OWNER_NULL; 8237d43f1ceSWaiman Long int loop = 0; 8247d43f1ceSWaiman Long u64 rspin_threshold = 0; 8255dec94d4SWaiman Long 8265dec94d4SWaiman Long /* sem->wait_lock should not be held when doing optimistic spinning */ 8275dec94d4SWaiman Long if (!osq_lock(&sem->osq)) 8285dec94d4SWaiman Long goto done; 8295dec94d4SWaiman Long 8305dec94d4SWaiman Long /* 8315dec94d4SWaiman Long * Optimistically spin on the owner field and attempt to acquire the 8325dec94d4SWaiman Long * lock whenever the owner changes. Spinning will be stopped when: 8335dec94d4SWaiman Long * 1) the owning writer isn't running; or 8347d43f1ceSWaiman Long * 2) readers own the lock and spinning time has exceeded limit. 8355dec94d4SWaiman Long */ 836990fa738SWaiman Long for (;;) { 8377d43f1ceSWaiman Long enum owner_state owner_state; 838990fa738SWaiman Long 839617f3ef9SWaiman Long owner_state = rwsem_spin_on_owner(sem); 840990fa738SWaiman Long if (!(owner_state & OWNER_SPINNABLE)) 841990fa738SWaiman Long break; 842990fa738SWaiman Long 8435dec94d4SWaiman Long /* 8445dec94d4SWaiman Long * Try to acquire the lock 8455dec94d4SWaiman Long */ 846617f3ef9SWaiman Long taken = rwsem_try_write_lock_unqueued(sem); 847cf69482dSWaiman Long 848cf69482dSWaiman Long if (taken) 8495dec94d4SWaiman Long break; 8505dec94d4SWaiman Long 8515dec94d4SWaiman Long /* 8527d43f1ceSWaiman Long * Time-based reader-owned rwsem optimistic spinning 8537d43f1ceSWaiman Long */ 854617f3ef9SWaiman Long if (owner_state == OWNER_READER) { 8557d43f1ceSWaiman Long /* 8567d43f1ceSWaiman Long * Re-initialize rspin_threshold every time when 8577d43f1ceSWaiman Long * the owner state changes from non-reader to reader. 8587d43f1ceSWaiman Long * This allows a writer to steal the lock in between 8597d43f1ceSWaiman Long * 2 reader phases and have the threshold reset at 8607d43f1ceSWaiman Long * the beginning of the 2nd reader phase. 8617d43f1ceSWaiman Long */ 8627d43f1ceSWaiman Long if (prev_owner_state != OWNER_READER) { 863617f3ef9SWaiman Long if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) 8647d43f1ceSWaiman Long break; 8657d43f1ceSWaiman Long rspin_threshold = rwsem_rspin_threshold(sem); 8667d43f1ceSWaiman Long loop = 0; 8677d43f1ceSWaiman Long } 8687d43f1ceSWaiman Long 8697d43f1ceSWaiman Long /* 8707d43f1ceSWaiman Long * Check time threshold once every 16 iterations to 8717d43f1ceSWaiman Long * avoid calling sched_clock() too frequently so 8727d43f1ceSWaiman Long * as to reduce the average latency between the times 8737d43f1ceSWaiman Long * when the lock becomes free and when the spinner 8747d43f1ceSWaiman Long * is ready to do a trylock. 8757d43f1ceSWaiman Long */ 8767d43f1ceSWaiman Long else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) { 8777d43f1ceSWaiman Long rwsem_set_nonspinnable(sem); 8787d43f1ceSWaiman Long lockevent_inc(rwsem_opt_nospin); 8797d43f1ceSWaiman Long break; 8807d43f1ceSWaiman Long } 8817d43f1ceSWaiman Long } 8827d43f1ceSWaiman Long 8837d43f1ceSWaiman Long /* 884990fa738SWaiman Long * An RT task cannot do optimistic spinning if it cannot 885990fa738SWaiman Long * be sure the lock holder is running or live-lock may 886990fa738SWaiman Long * happen if the current task and the lock holder happen 887990fa738SWaiman Long * to run in the same CPU. However, aborting optimistic 888990fa738SWaiman Long * spinning while a NULL owner is detected may miss some 889990fa738SWaiman Long * opportunity where spinning can continue without causing 890990fa738SWaiman Long * problem. 891990fa738SWaiman Long * 892990fa738SWaiman Long * There are 2 possible cases where an RT task may be able 893990fa738SWaiman Long * to continue spinning. 894990fa738SWaiman Long * 895990fa738SWaiman Long * 1) The lock owner is in the process of releasing the 896990fa738SWaiman Long * lock, sem->owner is cleared but the lock has not 897990fa738SWaiman Long * been released yet. 898990fa738SWaiman Long * 2) The lock was free and owner cleared, but another 899990fa738SWaiman Long * task just comes in and acquire the lock before 900990fa738SWaiman Long * we try to get it. The new owner may be a spinnable 901990fa738SWaiman Long * writer. 902990fa738SWaiman Long * 903e2db7592SIngo Molnar * To take advantage of two scenarios listed above, the RT 904990fa738SWaiman Long * task is made to retry one more time to see if it can 905990fa738SWaiman Long * acquire the lock or continue spinning on the new owning 906990fa738SWaiman Long * writer. Of course, if the time lag is long enough or the 907990fa738SWaiman Long * new owner is not a writer or spinnable, the RT task will 908990fa738SWaiman Long * quit spinning. 909990fa738SWaiman Long * 910990fa738SWaiman Long * If the owner is a writer, the need_resched() check is 911990fa738SWaiman Long * done inside rwsem_spin_on_owner(). If the owner is not 912990fa738SWaiman Long * a writer, need_resched() check needs to be done here. 9135dec94d4SWaiman Long */ 914990fa738SWaiman Long if (owner_state != OWNER_WRITER) { 915990fa738SWaiman Long if (need_resched()) 9165dec94d4SWaiman Long break; 917990fa738SWaiman Long if (rt_task(current) && 918990fa738SWaiman Long (prev_owner_state != OWNER_WRITER)) 919990fa738SWaiman Long break; 920990fa738SWaiman Long } 921990fa738SWaiman Long prev_owner_state = owner_state; 9225dec94d4SWaiman Long 9235dec94d4SWaiman Long /* 9245dec94d4SWaiman Long * The cpu_relax() call is a compiler barrier which forces 9255dec94d4SWaiman Long * everything in this loop to be re-loaded. We don't need 9265dec94d4SWaiman Long * memory barriers as we'll eventually observe the right 9275dec94d4SWaiman Long * values at the cost of a few extra spins. 9285dec94d4SWaiman Long */ 9295dec94d4SWaiman Long cpu_relax(); 9305dec94d4SWaiman Long } 9315dec94d4SWaiman Long osq_unlock(&sem->osq); 9325dec94d4SWaiman Long done: 9335dec94d4SWaiman Long lockevent_cond_inc(rwsem_opt_fail, !taken); 9345dec94d4SWaiman Long return taken; 9355dec94d4SWaiman Long } 9367d43f1ceSWaiman Long 9377d43f1ceSWaiman Long /* 938617f3ef9SWaiman Long * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should 9397d43f1ceSWaiman Long * only be called when the reader count reaches 0. 9407d43f1ceSWaiman Long */ 941617f3ef9SWaiman Long static inline void clear_nonspinnable(struct rw_semaphore *sem) 9427d43f1ceSWaiman Long { 94354c1ee4dSWaiman Long if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))) 944617f3ef9SWaiman Long atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner); 9451a728dffSWaiman Long } 9461a728dffSWaiman Long 9475dec94d4SWaiman Long #else 948617f3ef9SWaiman Long static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) 949cf69482dSWaiman Long { 950cf69482dSWaiman Long return false; 951cf69482dSWaiman Long } 952cf69482dSWaiman Long 953617f3ef9SWaiman Long static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem) 9545dec94d4SWaiman Long { 9555dec94d4SWaiman Long return false; 9565dec94d4SWaiman Long } 9577d43f1ceSWaiman Long 958617f3ef9SWaiman Long static inline void clear_nonspinnable(struct rw_semaphore *sem) { } 9591a728dffSWaiman Long 9607cdacc5fSYanfei Xu static inline enum owner_state 961617f3ef9SWaiman Long rwsem_spin_on_owner(struct rw_semaphore *sem) 96291d2a812SWaiman Long { 9637cdacc5fSYanfei Xu return OWNER_NONSPINNABLE; 96491d2a812SWaiman Long } 9655dec94d4SWaiman Long #endif 9665dec94d4SWaiman Long 9675dec94d4SWaiman Long /* 96854c1ee4dSWaiman Long * Prepare to wake up waiter(s) in the wait queue by putting them into the 96954c1ee4dSWaiman Long * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely 97054c1ee4dSWaiman Long * reader-owned, wake up read lock waiters in queue front or wake up any 97154c1ee4dSWaiman Long * front waiter otherwise. 97254c1ee4dSWaiman Long 97354c1ee4dSWaiman Long * This is being called from both reader and writer slow paths. 97454c1ee4dSWaiman Long */ 97554c1ee4dSWaiman Long static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count, 97654c1ee4dSWaiman Long struct wake_q_head *wake_q) 97754c1ee4dSWaiman Long { 97854c1ee4dSWaiman Long enum rwsem_wake_type wake_type; 97954c1ee4dSWaiman Long 98054c1ee4dSWaiman Long if (count & RWSEM_WRITER_MASK) 98154c1ee4dSWaiman Long return; 98254c1ee4dSWaiman Long 98354c1ee4dSWaiman Long if (count & RWSEM_READER_MASK) { 98454c1ee4dSWaiman Long wake_type = RWSEM_WAKE_READERS; 98554c1ee4dSWaiman Long } else { 98654c1ee4dSWaiman Long wake_type = RWSEM_WAKE_ANY; 98754c1ee4dSWaiman Long clear_nonspinnable(sem); 98854c1ee4dSWaiman Long } 98954c1ee4dSWaiman Long rwsem_mark_wake(sem, wake_type, wake_q); 99054c1ee4dSWaiman Long } 99154c1ee4dSWaiman Long 99254c1ee4dSWaiman Long /* 9935dec94d4SWaiman Long * Wait for the read lock to be granted 9945dec94d4SWaiman Long */ 9956cef7ff6SWaiman Long static struct rw_semaphore __sched * 9962f064a59SPeter Zijlstra rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state) 9975dec94d4SWaiman Long { 998617f3ef9SWaiman Long long adjustment = -RWSEM_READER_BIAS; 9992f06f702SWaiman Long long rcnt = (count >> RWSEM_READER_SHIFT); 10005dec94d4SWaiman Long struct rwsem_waiter waiter; 10015dec94d4SWaiman Long DEFINE_WAKE_Q(wake_q); 10025dec94d4SWaiman Long 10035cfd92e1SWaiman Long /* 10042f06f702SWaiman Long * To prevent a constant stream of readers from starving a sleeping 1005d566c786SWaiman Long * writer, don't attempt optimistic lock stealing if the lock is 1006d566c786SWaiman Long * very likely owned by readers. 10072f06f702SWaiman Long */ 1008617f3ef9SWaiman Long if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) && 1009617f3ef9SWaiman Long (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED)) 10102f06f702SWaiman Long goto queue; 10112f06f702SWaiman Long 10122f06f702SWaiman Long /* 1013617f3ef9SWaiman Long * Reader optimistic lock stealing. 10141a728dffSWaiman Long */ 1015617f3ef9SWaiman Long if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) { 10161a728dffSWaiman Long rwsem_set_reader_owned(sem); 10171a728dffSWaiman Long lockevent_inc(rwsem_rlock_steal); 10181a728dffSWaiman Long 10191a728dffSWaiman Long /* 1020617f3ef9SWaiman Long * Wake up other readers in the wait queue if it is 1021617f3ef9SWaiman Long * the first reader. 10225cfd92e1SWaiman Long */ 1023617f3ef9SWaiman Long if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) { 1024cf69482dSWaiman Long raw_spin_lock_irq(&sem->wait_lock); 1025cf69482dSWaiman Long if (!list_empty(&sem->wait_list)) 1026cf69482dSWaiman Long rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, 1027cf69482dSWaiman Long &wake_q); 1028cf69482dSWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 1029cf69482dSWaiman Long wake_up_q(&wake_q); 1030cf69482dSWaiman Long } 1031cf69482dSWaiman Long return sem; 1032cf69482dSWaiman Long } 1033cf69482dSWaiman Long 1034cf69482dSWaiman Long queue: 10355dec94d4SWaiman Long waiter.task = current; 10365dec94d4SWaiman Long waiter.type = RWSEM_WAITING_FOR_READ; 10374f23dbc1SWaiman Long waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; 10386eebd5fbSWaiman Long waiter.handoff_set = false; 10395dec94d4SWaiman Long 10405dec94d4SWaiman Long raw_spin_lock_irq(&sem->wait_lock); 10415dec94d4SWaiman Long if (list_empty(&sem->wait_list)) { 10425dec94d4SWaiman Long /* 10435dec94d4SWaiman Long * In case the wait queue is empty and the lock isn't owned 1044f9e21aa9SWaiman Long * by a writer, this reader can exit the slowpath and return 1045f9e21aa9SWaiman Long * immediately as its RWSEM_READER_BIAS has already been set 1046f9e21aa9SWaiman Long * in the count. 10475dec94d4SWaiman Long */ 1048f9e21aa9SWaiman Long if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) { 1049e1b98fa3SJan Stancek /* Provide lock ACQUIRE */ 1050e1b98fa3SJan Stancek smp_acquire__after_ctrl_dep(); 10515dec94d4SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 10525dec94d4SWaiman Long rwsem_set_reader_owned(sem); 10535dec94d4SWaiman Long lockevent_inc(rwsem_rlock_fast); 10545dec94d4SWaiman Long return sem; 10555dec94d4SWaiman Long } 10565dec94d4SWaiman Long adjustment += RWSEM_FLAG_WAITERS; 10575dec94d4SWaiman Long } 1058d257cc8cSWaiman Long rwsem_add_waiter(sem, &waiter); 10595dec94d4SWaiman Long 10605dec94d4SWaiman Long /* we're now waiting on the lock, but no longer actively locking */ 10615dec94d4SWaiman Long count = atomic_long_add_return(adjustment, &sem->count); 10625dec94d4SWaiman Long 106354c1ee4dSWaiman Long rwsem_cond_wake_waiter(sem, count, &wake_q); 10645dec94d4SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 106554c1ee4dSWaiman Long 106654c1ee4dSWaiman Long if (!wake_q_empty(&wake_q)) 10675dec94d4SWaiman Long wake_up_q(&wake_q); 10685dec94d4SWaiman Long 1069ee042be1SNamhyung Kim trace_contention_begin(sem, LCB_F_READ); 1070ee042be1SNamhyung Kim 10715dec94d4SWaiman Long /* wait to be given the lock */ 10726ffddfb9SPeter Zijlstra for (;;) { 10735dec94d4SWaiman Long set_current_state(state); 107499143f82SPeter Zijlstra if (!smp_load_acquire(&waiter.task)) { 10756ffddfb9SPeter Zijlstra /* Matches rwsem_mark_wake()'s smp_store_release(). */ 10765dec94d4SWaiman Long break; 107799143f82SPeter Zijlstra } 10785dec94d4SWaiman Long if (signal_pending_state(state, current)) { 10795dec94d4SWaiman Long raw_spin_lock_irq(&sem->wait_lock); 10805dec94d4SWaiman Long if (waiter.task) 10815dec94d4SWaiman Long goto out_nolock; 10825dec94d4SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 10836ffddfb9SPeter Zijlstra /* Ordered by sem->wait_lock against rwsem_mark_wake(). */ 10845dec94d4SWaiman Long break; 10855dec94d4SWaiman Long } 10863f524553SWaiman Long schedule_preempt_disabled(); 10875dec94d4SWaiman Long lockevent_inc(rwsem_sleep_reader); 10885dec94d4SWaiman Long } 10895dec94d4SWaiman Long 10905dec94d4SWaiman Long __set_current_state(TASK_RUNNING); 10915dec94d4SWaiman Long lockevent_inc(rwsem_rlock); 1092ee042be1SNamhyung Kim trace_contention_end(sem, 0); 10935dec94d4SWaiman Long return sem; 10946ffddfb9SPeter Zijlstra 10955dec94d4SWaiman Long out_nolock: 10961ee32619SWaiman Long rwsem_del_wake_waiter(sem, &waiter, &wake_q); 10975dec94d4SWaiman Long __set_current_state(TASK_RUNNING); 10985dec94d4SWaiman Long lockevent_inc(rwsem_rlock_fail); 1099ee042be1SNamhyung Kim trace_contention_end(sem, -EINTR); 11005dec94d4SWaiman Long return ERR_PTR(-EINTR); 11015dec94d4SWaiman Long } 11025dec94d4SWaiman Long 11035dec94d4SWaiman Long /* 11045dec94d4SWaiman Long * Wait until we successfully acquire the write lock 11055dec94d4SWaiman Long */ 1106c441e934SMinchan Kim static struct rw_semaphore __sched * 11076cef7ff6SWaiman Long rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) 11085dec94d4SWaiman Long { 11095dec94d4SWaiman Long struct rwsem_waiter waiter; 11105dec94d4SWaiman Long DEFINE_WAKE_Q(wake_q); 11115dec94d4SWaiman Long 11125dec94d4SWaiman Long /* do optimistic spinning and steal lock if possible */ 1113617f3ef9SWaiman Long if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) { 11146ffddfb9SPeter Zijlstra /* rwsem_optimistic_spin() implies ACQUIRE on success */ 11155dec94d4SWaiman Long return sem; 11166ffddfb9SPeter Zijlstra } 11175dec94d4SWaiman Long 11185dec94d4SWaiman Long /* 11195dec94d4SWaiman Long * Optimistic spinning failed, proceed to the slowpath 11205dec94d4SWaiman Long * and block until we can acquire the sem. 11215dec94d4SWaiman Long */ 11225dec94d4SWaiman Long waiter.task = current; 11235dec94d4SWaiman Long waiter.type = RWSEM_WAITING_FOR_WRITE; 11244f23dbc1SWaiman Long waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; 1125d257cc8cSWaiman Long waiter.handoff_set = false; 11265dec94d4SWaiman Long 11275dec94d4SWaiman Long raw_spin_lock_irq(&sem->wait_lock); 1128d257cc8cSWaiman Long rwsem_add_waiter(sem, &waiter); 11295dec94d4SWaiman Long 11305dec94d4SWaiman Long /* we're now waiting on the lock */ 1131d257cc8cSWaiman Long if (rwsem_first_waiter(sem) != &waiter) { 113254c1ee4dSWaiman Long rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count), 113354c1ee4dSWaiman Long &wake_q); 113400f3c5a3SWaiman Long if (!wake_q_empty(&wake_q)) { 11355dec94d4SWaiman Long /* 113600f3c5a3SWaiman Long * We want to minimize wait_lock hold time especially 113700f3c5a3SWaiman Long * when a large number of readers are to be woken up. 11385dec94d4SWaiman Long */ 113900f3c5a3SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 11405dec94d4SWaiman Long wake_up_q(&wake_q); 114100f3c5a3SWaiman Long raw_spin_lock_irq(&sem->wait_lock); 114200f3c5a3SWaiman Long } 11435dec94d4SWaiman Long } else { 114400f3c5a3SWaiman Long atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); 11455dec94d4SWaiman Long } 11465dec94d4SWaiman Long 11475dec94d4SWaiman Long /* wait until we successfully acquire the lock */ 11485dec94d4SWaiman Long set_current_state(state); 1149ee042be1SNamhyung Kim trace_contention_begin(sem, LCB_F_WRITE); 1150ee042be1SNamhyung Kim 11516ffddfb9SPeter Zijlstra for (;;) { 1152d257cc8cSWaiman Long if (rwsem_try_write_lock(sem, &waiter)) { 11536ffddfb9SPeter Zijlstra /* rwsem_try_write_lock() implies ACQUIRE on success */ 11545dec94d4SWaiman Long break; 11556ffddfb9SPeter Zijlstra } 11564f23dbc1SWaiman Long 11575dec94d4SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 11585dec94d4SWaiman Long 1159d257cc8cSWaiman Long if (signal_pending_state(state, current)) 1160d257cc8cSWaiman Long goto out_nolock; 1161d257cc8cSWaiman Long 116291d2a812SWaiman Long /* 116391d2a812SWaiman Long * After setting the handoff bit and failing to acquire 116491d2a812SWaiman Long * the lock, attempt to spin on owner to accelerate lock 116591d2a812SWaiman Long * transfer. If the previous owner is a on-cpu writer and it 116691d2a812SWaiman Long * has just released the lock, OWNER_NULL will be returned. 116791d2a812SWaiman Long * In this case, we attempt to acquire the lock again 116891d2a812SWaiman Long * without sleeping. 116991d2a812SWaiman Long */ 1170d257cc8cSWaiman Long if (waiter.handoff_set) { 11717cdacc5fSYanfei Xu enum owner_state owner_state; 11727cdacc5fSYanfei Xu 11737cdacc5fSYanfei Xu owner_state = rwsem_spin_on_owner(sem); 11747cdacc5fSYanfei Xu if (owner_state == OWNER_NULL) 117591d2a812SWaiman Long goto trylock_again; 11767cdacc5fSYanfei Xu } 117791d2a812SWaiman Long 11781d61659cSWaiman Long schedule_preempt_disabled(); 11795dec94d4SWaiman Long lockevent_inc(rwsem_sleep_writer); 11805dec94d4SWaiman Long set_current_state(state); 118191d2a812SWaiman Long trylock_again: 11825dec94d4SWaiman Long raw_spin_lock_irq(&sem->wait_lock); 11835dec94d4SWaiman Long } 11845dec94d4SWaiman Long __set_current_state(TASK_RUNNING); 11855dec94d4SWaiman Long raw_spin_unlock_irq(&sem->wait_lock); 11865dec94d4SWaiman Long lockevent_inc(rwsem_wlock); 1187ee042be1SNamhyung Kim trace_contention_end(sem, 0); 1188d257cc8cSWaiman Long return sem; 11895dec94d4SWaiman Long 11905dec94d4SWaiman Long out_nolock: 11915dec94d4SWaiman Long __set_current_state(TASK_RUNNING); 11925dec94d4SWaiman Long raw_spin_lock_irq(&sem->wait_lock); 11931ee32619SWaiman Long rwsem_del_wake_waiter(sem, &waiter, &wake_q); 11945dec94d4SWaiman Long lockevent_inc(rwsem_wlock_fail); 1195ee042be1SNamhyung Kim trace_contention_end(sem, -EINTR); 11965dec94d4SWaiman Long return ERR_PTR(-EINTR); 11975dec94d4SWaiman Long } 11985dec94d4SWaiman Long 11995dec94d4SWaiman Long /* 12005dec94d4SWaiman Long * handle waking up a waiter on the semaphore 12015dec94d4SWaiman Long * - up_read/up_write has decremented the active part of count if we come here 12025dec94d4SWaiman Long */ 1203d4e5076cSxuyehan static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) 12045dec94d4SWaiman Long { 12055dec94d4SWaiman Long unsigned long flags; 12065dec94d4SWaiman Long DEFINE_WAKE_Q(wake_q); 12075dec94d4SWaiman Long 12085dec94d4SWaiman Long raw_spin_lock_irqsave(&sem->wait_lock, flags); 12095dec94d4SWaiman Long 12105dec94d4SWaiman Long if (!list_empty(&sem->wait_list)) 12116cef7ff6SWaiman Long rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); 12125dec94d4SWaiman Long 12135dec94d4SWaiman Long raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 12145dec94d4SWaiman Long wake_up_q(&wake_q); 12155dec94d4SWaiman Long 12165dec94d4SWaiman Long return sem; 12175dec94d4SWaiman Long } 12185dec94d4SWaiman Long 12195dec94d4SWaiman Long /* 12205dec94d4SWaiman Long * downgrade a write lock into a read lock 12215dec94d4SWaiman Long * - caller incremented waiting part of count and discovered it still negative 12225dec94d4SWaiman Long * - just wake up any readers at the front of the queue 12235dec94d4SWaiman Long */ 12246cef7ff6SWaiman Long static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) 12255dec94d4SWaiman Long { 12265dec94d4SWaiman Long unsigned long flags; 12275dec94d4SWaiman Long DEFINE_WAKE_Q(wake_q); 12285dec94d4SWaiman Long 12295dec94d4SWaiman Long raw_spin_lock_irqsave(&sem->wait_lock, flags); 12305dec94d4SWaiman Long 12315dec94d4SWaiman Long if (!list_empty(&sem->wait_list)) 12326cef7ff6SWaiman Long rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); 12335dec94d4SWaiman Long 12345dec94d4SWaiman Long raw_spin_unlock_irqrestore(&sem->wait_lock, flags); 12355dec94d4SWaiman Long wake_up_q(&wake_q); 12365dec94d4SWaiman Long 12375dec94d4SWaiman Long return sem; 12385dec94d4SWaiman Long } 12395dec94d4SWaiman Long 12405dec94d4SWaiman Long /* 12415dec94d4SWaiman Long * lock for reading 12425dec94d4SWaiman Long */ 124392cc5d00SJohn Stultz static __always_inline int __down_read_common(struct rw_semaphore *sem, int state) 12445dec94d4SWaiman Long { 12453f524553SWaiman Long int ret = 0; 1246c8fe8b05SWaiman Long long count; 1247c8fe8b05SWaiman Long 12483f524553SWaiman Long preempt_disable(); 1249c8fe8b05SWaiman Long if (!rwsem_read_trylock(sem, &count)) { 12503f524553SWaiman Long if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) { 12513f524553SWaiman Long ret = -EINTR; 12523f524553SWaiman Long goto out; 12533f524553SWaiman Long } 125494a9717bSWaiman Long DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); 12555dec94d4SWaiman Long } 12563f524553SWaiman Long out: 12573f524553SWaiman Long preempt_enable(); 12583f524553SWaiman Long return ret; 1259c995e638SPeter Zijlstra } 1260c995e638SPeter Zijlstra 126192cc5d00SJohn Stultz static __always_inline void __down_read(struct rw_semaphore *sem) 1262c995e638SPeter Zijlstra { 1263c995e638SPeter Zijlstra __down_read_common(sem, TASK_UNINTERRUPTIBLE); 12645dec94d4SWaiman Long } 12655dec94d4SWaiman Long 126692cc5d00SJohn Stultz static __always_inline int __down_read_interruptible(struct rw_semaphore *sem) 126731784cffSEric W. Biederman { 1268c995e638SPeter Zijlstra return __down_read_common(sem, TASK_INTERRUPTIBLE); 126931784cffSEric W. Biederman } 127031784cffSEric W. Biederman 127192cc5d00SJohn Stultz static __always_inline int __down_read_killable(struct rw_semaphore *sem) 12725dec94d4SWaiman Long { 1273c995e638SPeter Zijlstra return __down_read_common(sem, TASK_KILLABLE); 12745dec94d4SWaiman Long } 12755dec94d4SWaiman Long 12765dec94d4SWaiman Long static inline int __down_read_trylock(struct rw_semaphore *sem) 12775dec94d4SWaiman Long { 12783f524553SWaiman Long int ret = 0; 1279fce45cd4SDavidlohr Bueso long tmp; 1280fce45cd4SDavidlohr Bueso 1281fce45cd4SDavidlohr Bueso DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); 1282fce45cd4SDavidlohr Bueso 12833f524553SWaiman Long preempt_disable(); 128414c24048SMuchun Song tmp = atomic_long_read(&sem->count); 128514c24048SMuchun Song while (!(tmp & RWSEM_READ_FAILED_MASK)) { 12865dec94d4SWaiman Long if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, 12875dec94d4SWaiman Long tmp + RWSEM_READER_BIAS)) { 12885dec94d4SWaiman Long rwsem_set_reader_owned(sem); 12893f524553SWaiman Long ret = 1; 12903f524553SWaiman Long break; 12915dec94d4SWaiman Long } 129214c24048SMuchun Song } 12933f524553SWaiman Long preempt_enable(); 12943f524553SWaiman Long return ret; 12955dec94d4SWaiman Long } 12965dec94d4SWaiman Long 12975dec94d4SWaiman Long /* 12985dec94d4SWaiman Long * lock for writing 12995dec94d4SWaiman Long */ 1300*e81859feSJohn Stultz static __always_inline int __down_write_common(struct rw_semaphore *sem, int state) 13015dec94d4SWaiman Long { 13021d61659cSWaiman Long int ret = 0; 13031d61659cSWaiman Long 13041d61659cSWaiman Long preempt_disable(); 1305285c61aeSPeter Zijlstra if (unlikely(!rwsem_write_trylock(sem))) { 1306c995e638SPeter Zijlstra if (IS_ERR(rwsem_down_write_slowpath(sem, state))) 13071d61659cSWaiman Long ret = -EINTR; 13085cfd92e1SWaiman Long } 13091d61659cSWaiman Long preempt_enable(); 13101d61659cSWaiman Long return ret; 13115dec94d4SWaiman Long } 13125dec94d4SWaiman Long 1313*e81859feSJohn Stultz static __always_inline void __down_write(struct rw_semaphore *sem) 1314c995e638SPeter Zijlstra { 1315c995e638SPeter Zijlstra __down_write_common(sem, TASK_UNINTERRUPTIBLE); 1316c995e638SPeter Zijlstra } 1317c995e638SPeter Zijlstra 1318*e81859feSJohn Stultz static __always_inline int __down_write_killable(struct rw_semaphore *sem) 1319c995e638SPeter Zijlstra { 1320c995e638SPeter Zijlstra return __down_write_common(sem, TASK_KILLABLE); 1321c995e638SPeter Zijlstra } 1322c995e638SPeter Zijlstra 13235dec94d4SWaiman Long static inline int __down_write_trylock(struct rw_semaphore *sem) 13245dec94d4SWaiman Long { 13251d61659cSWaiman Long int ret; 13261d61659cSWaiman Long 13271d61659cSWaiman Long preempt_disable(); 1328fce45cd4SDavidlohr Bueso DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); 13291d61659cSWaiman Long ret = rwsem_write_trylock(sem); 13301d61659cSWaiman Long preempt_enable(); 13311d61659cSWaiman Long 13321d61659cSWaiman Long return ret; 13335dec94d4SWaiman Long } 13345dec94d4SWaiman Long 13355dec94d4SWaiman Long /* 13365dec94d4SWaiman Long * unlock after reading 13375dec94d4SWaiman Long */ 13387f26482aSPeter Zijlstra static inline void __up_read(struct rw_semaphore *sem) 13395dec94d4SWaiman Long { 13405dec94d4SWaiman Long long tmp; 13415dec94d4SWaiman Long 1342fce45cd4SDavidlohr Bueso DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); 134394a9717bSWaiman Long DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); 1344fce45cd4SDavidlohr Bueso 13453f524553SWaiman Long preempt_disable(); 13465dec94d4SWaiman Long rwsem_clear_reader_owned(sem); 13475dec94d4SWaiman Long tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); 1348a15ea1a3SWaiman Long DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); 13496cef7ff6SWaiman Long if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == 13507d43f1ceSWaiman Long RWSEM_FLAG_WAITERS)) { 1351617f3ef9SWaiman Long clear_nonspinnable(sem); 1352d4e5076cSxuyehan rwsem_wake(sem); 13535dec94d4SWaiman Long } 13543f524553SWaiman Long preempt_enable(); 13557d43f1ceSWaiman Long } 13565dec94d4SWaiman Long 13575dec94d4SWaiman Long /* 13585dec94d4SWaiman Long * unlock after writing 13595dec94d4SWaiman Long */ 13607f26482aSPeter Zijlstra static inline void __up_write(struct rw_semaphore *sem) 13615dec94d4SWaiman Long { 13626cef7ff6SWaiman Long long tmp; 13636cef7ff6SWaiman Long 1364fce45cd4SDavidlohr Bueso DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); 136502f1082bSWaiman Long /* 136602f1082bSWaiman Long * sem->owner may differ from current if the ownership is transferred 136702f1082bSWaiman Long * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits. 136802f1082bSWaiman Long */ 136994a9717bSWaiman Long DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) && 137094a9717bSWaiman Long !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem); 1371fce45cd4SDavidlohr Bueso 137248dfb5d2SGokul krishna Krishnakumar preempt_disable(); 13735dec94d4SWaiman Long rwsem_clear_owner(sem); 13746cef7ff6SWaiman Long tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); 13756cef7ff6SWaiman Long if (unlikely(tmp & RWSEM_FLAG_WAITERS)) 1376d4e5076cSxuyehan rwsem_wake(sem); 13771d61659cSWaiman Long preempt_enable(); 13785dec94d4SWaiman Long } 13795dec94d4SWaiman Long 13805dec94d4SWaiman Long /* 13815dec94d4SWaiman Long * downgrade write lock to read lock 13825dec94d4SWaiman Long */ 13835dec94d4SWaiman Long static inline void __downgrade_write(struct rw_semaphore *sem) 13845dec94d4SWaiman Long { 13855dec94d4SWaiman Long long tmp; 13865dec94d4SWaiman Long 13875dec94d4SWaiman Long /* 13885dec94d4SWaiman Long * When downgrading from exclusive to shared ownership, 13895dec94d4SWaiman Long * anything inside the write-locked region cannot leak 13905dec94d4SWaiman Long * into the read side. In contrast, anything in the 13915dec94d4SWaiman Long * read-locked region is ok to be re-ordered into the 13925dec94d4SWaiman Long * write side. As such, rely on RELEASE semantics. 13935dec94d4SWaiman Long */ 139494a9717bSWaiman Long DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem); 13951d61659cSWaiman Long preempt_disable(); 13965dec94d4SWaiman Long tmp = atomic_long_fetch_add_release( 13975dec94d4SWaiman Long -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); 13985dec94d4SWaiman Long rwsem_set_reader_owned(sem); 13995dec94d4SWaiman Long if (tmp & RWSEM_FLAG_WAITERS) 14005dec94d4SWaiman Long rwsem_downgrade_wake(sem); 14011d61659cSWaiman Long preempt_enable(); 14025dec94d4SWaiman Long } 14034fc828e2SDavidlohr Bueso 140442254105SThomas Gleixner #else /* !CONFIG_PREEMPT_RT */ 140542254105SThomas Gleixner 1406e17ba59bSThomas Gleixner #define RT_MUTEX_BUILD_MUTEX 140742254105SThomas Gleixner #include "rtmutex.c" 140842254105SThomas Gleixner 140942254105SThomas Gleixner #define rwbase_set_and_save_current_state(state) \ 141042254105SThomas Gleixner set_current_state(state) 141142254105SThomas Gleixner 141242254105SThomas Gleixner #define rwbase_restore_current_state() \ 141342254105SThomas Gleixner __set_current_state(TASK_RUNNING) 141442254105SThomas Gleixner 141542254105SThomas Gleixner #define rwbase_rtmutex_lock_state(rtm, state) \ 141642254105SThomas Gleixner __rt_mutex_lock(rtm, state) 141742254105SThomas Gleixner 141842254105SThomas Gleixner #define rwbase_rtmutex_slowlock_locked(rtm, state) \ 1419add46132SPeter Zijlstra __rt_mutex_slowlock_locked(rtm, NULL, state) 142042254105SThomas Gleixner 142142254105SThomas Gleixner #define rwbase_rtmutex_unlock(rtm) \ 142242254105SThomas Gleixner __rt_mutex_unlock(rtm) 142342254105SThomas Gleixner 142442254105SThomas Gleixner #define rwbase_rtmutex_trylock(rtm) \ 142542254105SThomas Gleixner __rt_mutex_trylock(rtm) 142642254105SThomas Gleixner 142742254105SThomas Gleixner #define rwbase_signal_pending_state(state, current) \ 142842254105SThomas Gleixner signal_pending_state(state, current) 142942254105SThomas Gleixner 1430d14f9e93SSebastian Andrzej Siewior #define rwbase_pre_schedule() \ 1431d14f9e93SSebastian Andrzej Siewior rt_mutex_pre_schedule() 1432d14f9e93SSebastian Andrzej Siewior 143342254105SThomas Gleixner #define rwbase_schedule() \ 1434d14f9e93SSebastian Andrzej Siewior rt_mutex_schedule() 1435d14f9e93SSebastian Andrzej Siewior 1436d14f9e93SSebastian Andrzej Siewior #define rwbase_post_schedule() \ 1437d14f9e93SSebastian Andrzej Siewior rt_mutex_post_schedule() 143842254105SThomas Gleixner 143942254105SThomas Gleixner #include "rwbase_rt.c" 144042254105SThomas Gleixner 144115eb7c88SMike Galbraith void __init_rwsem(struct rw_semaphore *sem, const char *name, 144242254105SThomas Gleixner struct lock_class_key *key) 144342254105SThomas Gleixner { 144415eb7c88SMike Galbraith init_rwbase_rt(&(sem)->rwbase); 144515eb7c88SMike Galbraith 144615eb7c88SMike Galbraith #ifdef CONFIG_DEBUG_LOCK_ALLOC 144742254105SThomas Gleixner debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 144842254105SThomas Gleixner lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); 144942254105SThomas Gleixner #endif 145015eb7c88SMike Galbraith } 145115eb7c88SMike Galbraith EXPORT_SYMBOL(__init_rwsem); 145242254105SThomas Gleixner 145342254105SThomas Gleixner static inline void __down_read(struct rw_semaphore *sem) 145442254105SThomas Gleixner { 145542254105SThomas Gleixner rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); 145642254105SThomas Gleixner } 145742254105SThomas Gleixner 145842254105SThomas Gleixner static inline int __down_read_interruptible(struct rw_semaphore *sem) 145942254105SThomas Gleixner { 146042254105SThomas Gleixner return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE); 146142254105SThomas Gleixner } 146242254105SThomas Gleixner 146342254105SThomas Gleixner static inline int __down_read_killable(struct rw_semaphore *sem) 146442254105SThomas Gleixner { 146542254105SThomas Gleixner return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE); 146642254105SThomas Gleixner } 146742254105SThomas Gleixner 146842254105SThomas Gleixner static inline int __down_read_trylock(struct rw_semaphore *sem) 146942254105SThomas Gleixner { 147042254105SThomas Gleixner return rwbase_read_trylock(&sem->rwbase); 147142254105SThomas Gleixner } 147242254105SThomas Gleixner 147342254105SThomas Gleixner static inline void __up_read(struct rw_semaphore *sem) 147442254105SThomas Gleixner { 147542254105SThomas Gleixner rwbase_read_unlock(&sem->rwbase, TASK_NORMAL); 147642254105SThomas Gleixner } 147742254105SThomas Gleixner 147842254105SThomas Gleixner static inline void __sched __down_write(struct rw_semaphore *sem) 147942254105SThomas Gleixner { 148042254105SThomas Gleixner rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); 148142254105SThomas Gleixner } 148242254105SThomas Gleixner 148342254105SThomas Gleixner static inline int __sched __down_write_killable(struct rw_semaphore *sem) 148442254105SThomas Gleixner { 148542254105SThomas Gleixner return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE); 148642254105SThomas Gleixner } 148742254105SThomas Gleixner 148842254105SThomas Gleixner static inline int __down_write_trylock(struct rw_semaphore *sem) 148942254105SThomas Gleixner { 149042254105SThomas Gleixner return rwbase_write_trylock(&sem->rwbase); 149142254105SThomas Gleixner } 149242254105SThomas Gleixner 149342254105SThomas Gleixner static inline void __up_write(struct rw_semaphore *sem) 149442254105SThomas Gleixner { 149542254105SThomas Gleixner rwbase_write_unlock(&sem->rwbase); 149642254105SThomas Gleixner } 149742254105SThomas Gleixner 149842254105SThomas Gleixner static inline void __downgrade_write(struct rw_semaphore *sem) 149942254105SThomas Gleixner { 150042254105SThomas Gleixner rwbase_write_downgrade(&sem->rwbase); 150142254105SThomas Gleixner } 150242254105SThomas Gleixner 150342254105SThomas Gleixner /* Debug stubs for the common API */ 150442254105SThomas Gleixner #define DEBUG_RWSEMS_WARN_ON(c, sem) 150542254105SThomas Gleixner 150642254105SThomas Gleixner static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, 150742254105SThomas Gleixner struct task_struct *owner) 150842254105SThomas Gleixner { 150942254105SThomas Gleixner } 151042254105SThomas Gleixner 151142254105SThomas Gleixner static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) 151242254105SThomas Gleixner { 151342254105SThomas Gleixner int count = atomic_read(&sem->rwbase.readers); 151442254105SThomas Gleixner 151542254105SThomas Gleixner return count < 0 && count != READER_BIAS; 151642254105SThomas Gleixner } 151742254105SThomas Gleixner 151842254105SThomas Gleixner #endif /* CONFIG_PREEMPT_RT */ 151942254105SThomas Gleixner 1520ed428bfcSPeter Zijlstra /* 1521ed428bfcSPeter Zijlstra * lock for reading 1522ed428bfcSPeter Zijlstra */ 1523ed428bfcSPeter Zijlstra void __sched down_read(struct rw_semaphore *sem) 1524ed428bfcSPeter Zijlstra { 1525ed428bfcSPeter Zijlstra might_sleep(); 1526ed428bfcSPeter Zijlstra rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); 1527ed428bfcSPeter Zijlstra 1528ed428bfcSPeter Zijlstra LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 1529ed428bfcSPeter Zijlstra } 1530ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read); 1531ed428bfcSPeter Zijlstra 153231784cffSEric W. Biederman int __sched down_read_interruptible(struct rw_semaphore *sem) 153331784cffSEric W. Biederman { 153431784cffSEric W. Biederman might_sleep(); 153531784cffSEric W. Biederman rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); 153631784cffSEric W. Biederman 153731784cffSEric W. Biederman if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) { 153831784cffSEric W. Biederman rwsem_release(&sem->dep_map, _RET_IP_); 153931784cffSEric W. Biederman return -EINTR; 154031784cffSEric W. Biederman } 154131784cffSEric W. Biederman 154231784cffSEric W. Biederman return 0; 154331784cffSEric W. Biederman } 154431784cffSEric W. Biederman EXPORT_SYMBOL(down_read_interruptible); 154531784cffSEric W. Biederman 154676f8507fSKirill Tkhai int __sched down_read_killable(struct rw_semaphore *sem) 154776f8507fSKirill Tkhai { 154876f8507fSKirill Tkhai might_sleep(); 154976f8507fSKirill Tkhai rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); 155076f8507fSKirill Tkhai 155176f8507fSKirill Tkhai if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { 15525facae4fSQian Cai rwsem_release(&sem->dep_map, _RET_IP_); 155376f8507fSKirill Tkhai return -EINTR; 155476f8507fSKirill Tkhai } 155576f8507fSKirill Tkhai 155676f8507fSKirill Tkhai return 0; 155776f8507fSKirill Tkhai } 155876f8507fSKirill Tkhai EXPORT_SYMBOL(down_read_killable); 155976f8507fSKirill Tkhai 1560ed428bfcSPeter Zijlstra /* 1561ed428bfcSPeter Zijlstra * trylock for reading -- returns 1 if successful, 0 if contention 1562ed428bfcSPeter Zijlstra */ 1563ed428bfcSPeter Zijlstra int down_read_trylock(struct rw_semaphore *sem) 1564ed428bfcSPeter Zijlstra { 1565ed428bfcSPeter Zijlstra int ret = __down_read_trylock(sem); 1566ed428bfcSPeter Zijlstra 1567c7580c1eSWaiman Long if (ret == 1) 1568ed428bfcSPeter Zijlstra rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); 1569ed428bfcSPeter Zijlstra return ret; 1570ed428bfcSPeter Zijlstra } 1571ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_trylock); 1572ed428bfcSPeter Zijlstra 1573ed428bfcSPeter Zijlstra /* 1574ed428bfcSPeter Zijlstra * lock for writing 1575ed428bfcSPeter Zijlstra */ 1576ed428bfcSPeter Zijlstra void __sched down_write(struct rw_semaphore *sem) 1577ed428bfcSPeter Zijlstra { 1578ed428bfcSPeter Zijlstra might_sleep(); 1579ed428bfcSPeter Zijlstra rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 1580ed428bfcSPeter Zijlstra LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 1581ed428bfcSPeter Zijlstra } 1582ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write); 1583ed428bfcSPeter Zijlstra 1584ed428bfcSPeter Zijlstra /* 1585916633a4SMichal Hocko * lock for writing 1586916633a4SMichal Hocko */ 1587916633a4SMichal Hocko int __sched down_write_killable(struct rw_semaphore *sem) 1588916633a4SMichal Hocko { 1589916633a4SMichal Hocko might_sleep(); 1590916633a4SMichal Hocko rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 1591916633a4SMichal Hocko 15926cef7ff6SWaiman Long if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, 15936cef7ff6SWaiman Long __down_write_killable)) { 15945facae4fSQian Cai rwsem_release(&sem->dep_map, _RET_IP_); 1595916633a4SMichal Hocko return -EINTR; 1596916633a4SMichal Hocko } 1597916633a4SMichal Hocko 1598916633a4SMichal Hocko return 0; 1599916633a4SMichal Hocko } 1600916633a4SMichal Hocko EXPORT_SYMBOL(down_write_killable); 1601916633a4SMichal Hocko 1602916633a4SMichal Hocko /* 1603ed428bfcSPeter Zijlstra * trylock for writing -- returns 1 if successful, 0 if contention 1604ed428bfcSPeter Zijlstra */ 1605ed428bfcSPeter Zijlstra int down_write_trylock(struct rw_semaphore *sem) 1606ed428bfcSPeter Zijlstra { 1607ed428bfcSPeter Zijlstra int ret = __down_write_trylock(sem); 1608ed428bfcSPeter Zijlstra 1609c7580c1eSWaiman Long if (ret == 1) 1610ed428bfcSPeter Zijlstra rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); 16114fc828e2SDavidlohr Bueso 1612ed428bfcSPeter Zijlstra return ret; 1613ed428bfcSPeter Zijlstra } 1614ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write_trylock); 1615ed428bfcSPeter Zijlstra 1616ed428bfcSPeter Zijlstra /* 1617ed428bfcSPeter Zijlstra * release a read lock 1618ed428bfcSPeter Zijlstra */ 1619ed428bfcSPeter Zijlstra void up_read(struct rw_semaphore *sem) 1620ed428bfcSPeter Zijlstra { 16215facae4fSQian Cai rwsem_release(&sem->dep_map, _RET_IP_); 1622ed428bfcSPeter Zijlstra __up_read(sem); 1623ed428bfcSPeter Zijlstra } 1624ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_read); 1625ed428bfcSPeter Zijlstra 1626ed428bfcSPeter Zijlstra /* 1627ed428bfcSPeter Zijlstra * release a write lock 1628ed428bfcSPeter Zijlstra */ 1629ed428bfcSPeter Zijlstra void up_write(struct rw_semaphore *sem) 1630ed428bfcSPeter Zijlstra { 16315facae4fSQian Cai rwsem_release(&sem->dep_map, _RET_IP_); 1632ed428bfcSPeter Zijlstra __up_write(sem); 1633ed428bfcSPeter Zijlstra } 1634ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_write); 1635ed428bfcSPeter Zijlstra 1636ed428bfcSPeter Zijlstra /* 1637ed428bfcSPeter Zijlstra * downgrade write lock to read lock 1638ed428bfcSPeter Zijlstra */ 1639ed428bfcSPeter Zijlstra void downgrade_write(struct rw_semaphore *sem) 1640ed428bfcSPeter Zijlstra { 16416419c4afSJ. R. Okajima lock_downgrade(&sem->dep_map, _RET_IP_); 1642ed428bfcSPeter Zijlstra __downgrade_write(sem); 1643ed428bfcSPeter Zijlstra } 1644ed428bfcSPeter Zijlstra EXPORT_SYMBOL(downgrade_write); 1645ed428bfcSPeter Zijlstra 1646ed428bfcSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC 1647ed428bfcSPeter Zijlstra 1648ed428bfcSPeter Zijlstra void down_read_nested(struct rw_semaphore *sem, int subclass) 1649ed428bfcSPeter Zijlstra { 1650ed428bfcSPeter Zijlstra might_sleep(); 1651ed428bfcSPeter Zijlstra rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); 1652ed428bfcSPeter Zijlstra LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 1653ed428bfcSPeter Zijlstra } 1654ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_nested); 1655ed428bfcSPeter Zijlstra 16560f9368b5SEric W. Biederman int down_read_killable_nested(struct rw_semaphore *sem, int subclass) 16570f9368b5SEric W. Biederman { 16580f9368b5SEric W. Biederman might_sleep(); 16590f9368b5SEric W. Biederman rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); 16600f9368b5SEric W. Biederman 16610f9368b5SEric W. Biederman if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) { 16620f9368b5SEric W. Biederman rwsem_release(&sem->dep_map, _RET_IP_); 16630f9368b5SEric W. Biederman return -EINTR; 16640f9368b5SEric W. Biederman } 16650f9368b5SEric W. Biederman 16660f9368b5SEric W. Biederman return 0; 16670f9368b5SEric W. Biederman } 16680f9368b5SEric W. Biederman EXPORT_SYMBOL(down_read_killable_nested); 16690f9368b5SEric W. Biederman 1670ed428bfcSPeter Zijlstra void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) 1671ed428bfcSPeter Zijlstra { 1672ed428bfcSPeter Zijlstra might_sleep(); 1673ed428bfcSPeter Zijlstra rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); 1674ed428bfcSPeter Zijlstra LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 1675ed428bfcSPeter Zijlstra } 1676ed428bfcSPeter Zijlstra EXPORT_SYMBOL(_down_write_nest_lock); 1677ed428bfcSPeter Zijlstra 1678ed428bfcSPeter Zijlstra void down_read_non_owner(struct rw_semaphore *sem) 1679ed428bfcSPeter Zijlstra { 1680ed428bfcSPeter Zijlstra might_sleep(); 1681ed428bfcSPeter Zijlstra __down_read(sem); 16823f524553SWaiman Long /* 16833f524553SWaiman Long * The owner value for a reader-owned lock is mostly for debugging 16843f524553SWaiman Long * purpose only and is not critical to the correct functioning of 16853f524553SWaiman Long * rwsem. So it is perfectly fine to set it in a preempt-enabled 16863f524553SWaiman Long * context here. 16873f524553SWaiman Long */ 1688925b9cd1SWaiman Long __rwsem_set_reader_owned(sem, NULL); 1689ed428bfcSPeter Zijlstra } 1690ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_non_owner); 1691ed428bfcSPeter Zijlstra 1692ed428bfcSPeter Zijlstra void down_write_nested(struct rw_semaphore *sem, int subclass) 1693ed428bfcSPeter Zijlstra { 1694ed428bfcSPeter Zijlstra might_sleep(); 1695ed428bfcSPeter Zijlstra rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 1696ed428bfcSPeter Zijlstra LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 1697ed428bfcSPeter Zijlstra } 1698ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write_nested); 1699ed428bfcSPeter Zijlstra 1700887bddfaSAl Viro int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) 1701887bddfaSAl Viro { 1702887bddfaSAl Viro might_sleep(); 1703887bddfaSAl Viro rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 1704887bddfaSAl Viro 17056cef7ff6SWaiman Long if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, 17066cef7ff6SWaiman Long __down_write_killable)) { 17075facae4fSQian Cai rwsem_release(&sem->dep_map, _RET_IP_); 1708887bddfaSAl Viro return -EINTR; 1709887bddfaSAl Viro } 1710887bddfaSAl Viro 1711887bddfaSAl Viro return 0; 1712887bddfaSAl Viro } 1713887bddfaSAl Viro EXPORT_SYMBOL(down_write_killable_nested); 1714887bddfaSAl Viro 1715ed428bfcSPeter Zijlstra void up_read_non_owner(struct rw_semaphore *sem) 1716ed428bfcSPeter Zijlstra { 171794a9717bSWaiman Long DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); 1718ed428bfcSPeter Zijlstra __up_read(sem); 1719ed428bfcSPeter Zijlstra } 1720ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_read_non_owner); 1721ed428bfcSPeter Zijlstra 1722ed428bfcSPeter Zijlstra #endif 1723