1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 21696a8beSPeter Zijlstra /* 31696a8beSPeter Zijlstra * RT-Mutexes: simple blocking mutual exclusion locks with PI support 41696a8beSPeter Zijlstra * 51696a8beSPeter Zijlstra * started by Ingo Molnar and Thomas Gleixner. 61696a8beSPeter Zijlstra * 71696a8beSPeter Zijlstra * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 81696a8beSPeter Zijlstra * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 91696a8beSPeter Zijlstra * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 101696a8beSPeter Zijlstra * Copyright (C) 2006 Esben Nielsen 11*992caf7fSSteven Rostedt * Adaptive Spinlocks: 12*992caf7fSSteven Rostedt * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13*992caf7fSSteven Rostedt * and Peter Morreale, 14*992caf7fSSteven Rostedt * Adaptive Spinlocks simplification: 15*992caf7fSSteven Rostedt * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 161696a8beSPeter Zijlstra * 17387b1468SMauro Carvalho Chehab * See Documentation/locking/rt-mutex-design.rst for details. 181696a8beSPeter Zijlstra */ 19531ae4b0SThomas Gleixner #include <linux/sched.h> 20531ae4b0SThomas Gleixner #include <linux/sched/debug.h> 21531ae4b0SThomas Gleixner #include <linux/sched/deadline.h> 22174cd4b1SIngo Molnar #include <linux/sched/signal.h> 231696a8beSPeter Zijlstra #include <linux/sched/rt.h> 2484f001e1SIngo Molnar #include <linux/sched/wake_q.h> 25add46132SPeter Zijlstra #include <linux/ww_mutex.h> 261696a8beSPeter Zijlstra 271696a8beSPeter Zijlstra #include "rtmutex_common.h" 281696a8beSPeter Zijlstra 29add46132SPeter Zijlstra #ifndef WW_RT 30add46132SPeter Zijlstra # define build_ww_mutex() (false) 31add46132SPeter Zijlstra # define ww_container_of(rtm) NULL 32add46132SPeter Zijlstra 33add46132SPeter Zijlstra static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 34add46132SPeter Zijlstra struct rt_mutex *lock, 35add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx) 36add46132SPeter Zijlstra { 37add46132SPeter Zijlstra return 0; 38add46132SPeter Zijlstra } 39add46132SPeter Zijlstra 40add46132SPeter Zijlstra static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 41add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx) 42add46132SPeter Zijlstra { 43add46132SPeter Zijlstra } 44add46132SPeter Zijlstra 45add46132SPeter Zijlstra static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 46add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx) 47add46132SPeter Zijlstra { 48add46132SPeter Zijlstra } 49add46132SPeter Zijlstra 50add46132SPeter Zijlstra static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 51add46132SPeter Zijlstra struct rt_mutex_waiter *waiter, 52add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx) 53add46132SPeter Zijlstra { 54add46132SPeter Zijlstra return 0; 55add46132SPeter Zijlstra } 56add46132SPeter Zijlstra 57add46132SPeter Zijlstra #else 58add46132SPeter Zijlstra # define build_ww_mutex() (true) 59add46132SPeter Zijlstra # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 60add46132SPeter Zijlstra # include "ww_mutex.h" 61add46132SPeter Zijlstra #endif 62add46132SPeter Zijlstra 631696a8beSPeter Zijlstra /* 641696a8beSPeter Zijlstra * lock->owner state tracking: 651696a8beSPeter Zijlstra * 661696a8beSPeter Zijlstra * lock->owner holds the task_struct pointer of the owner. Bit 0 671696a8beSPeter Zijlstra * is used to keep track of the "lock has waiters" state. 681696a8beSPeter Zijlstra * 691696a8beSPeter Zijlstra * owner bit0 701696a8beSPeter Zijlstra * NULL 0 lock is free (fast acquire possible) 711696a8beSPeter Zijlstra * NULL 1 lock is free and has waiters and the top waiter 721696a8beSPeter Zijlstra * is going to take the lock* 731696a8beSPeter Zijlstra * taskpointer 0 lock is held (fast release possible) 741696a8beSPeter Zijlstra * taskpointer 1 lock is held and has waiters** 751696a8beSPeter Zijlstra * 761696a8beSPeter Zijlstra * The fast atomic compare exchange based acquire and release is only 771696a8beSPeter Zijlstra * possible when bit 0 of lock->owner is 0. 781696a8beSPeter Zijlstra * 791696a8beSPeter Zijlstra * (*) It also can be a transitional state when grabbing the lock 801696a8beSPeter Zijlstra * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 811696a8beSPeter Zijlstra * we need to set the bit0 before looking at the lock, and the owner may be 821696a8beSPeter Zijlstra * NULL in this small time, hence this can be a transitional state. 831696a8beSPeter Zijlstra * 841696a8beSPeter Zijlstra * (**) There is a small time when bit 0 is set but there are no 851696a8beSPeter Zijlstra * waiters. This can happen when grabbing the lock in the slow path. 861696a8beSPeter Zijlstra * To prevent a cmpxchg of the owner releasing the lock, we need to 871696a8beSPeter Zijlstra * set this bit before looking at the lock. 881696a8beSPeter Zijlstra */ 891696a8beSPeter Zijlstra 90d7a2edb8SThomas Gleixner static __always_inline void 91830e6accSPeter Zijlstra rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 921696a8beSPeter Zijlstra { 931696a8beSPeter Zijlstra unsigned long val = (unsigned long)owner; 941696a8beSPeter Zijlstra 951696a8beSPeter Zijlstra if (rt_mutex_has_waiters(lock)) 961696a8beSPeter Zijlstra val |= RT_MUTEX_HAS_WAITERS; 971696a8beSPeter Zijlstra 980050c7b2SPaul E. McKenney WRITE_ONCE(lock->owner, (struct task_struct *)val); 991696a8beSPeter Zijlstra } 1001696a8beSPeter Zijlstra 101830e6accSPeter Zijlstra static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 1021696a8beSPeter Zijlstra { 1031696a8beSPeter Zijlstra lock->owner = (struct task_struct *) 1041696a8beSPeter Zijlstra ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 1051696a8beSPeter Zijlstra } 1061696a8beSPeter Zijlstra 107830e6accSPeter Zijlstra static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) 1081696a8beSPeter Zijlstra { 109dbb26055SThomas Gleixner unsigned long owner, *p = (unsigned long *) &lock->owner; 110dbb26055SThomas Gleixner 111dbb26055SThomas Gleixner if (rt_mutex_has_waiters(lock)) 112dbb26055SThomas Gleixner return; 113dbb26055SThomas Gleixner 114dbb26055SThomas Gleixner /* 115dbb26055SThomas Gleixner * The rbtree has no waiters enqueued, now make sure that the 116dbb26055SThomas Gleixner * lock->owner still has the waiters bit set, otherwise the 117dbb26055SThomas Gleixner * following can happen: 118dbb26055SThomas Gleixner * 119dbb26055SThomas Gleixner * CPU 0 CPU 1 CPU2 120dbb26055SThomas Gleixner * l->owner=T1 121dbb26055SThomas Gleixner * rt_mutex_lock(l) 122dbb26055SThomas Gleixner * lock(l->lock) 123dbb26055SThomas Gleixner * l->owner = T1 | HAS_WAITERS; 124dbb26055SThomas Gleixner * enqueue(T2) 125dbb26055SThomas Gleixner * boost() 126dbb26055SThomas Gleixner * unlock(l->lock) 127dbb26055SThomas Gleixner * block() 128dbb26055SThomas Gleixner * 129dbb26055SThomas Gleixner * rt_mutex_lock(l) 130dbb26055SThomas Gleixner * lock(l->lock) 131dbb26055SThomas Gleixner * l->owner = T1 | HAS_WAITERS; 132dbb26055SThomas Gleixner * enqueue(T3) 133dbb26055SThomas Gleixner * boost() 134dbb26055SThomas Gleixner * unlock(l->lock) 135dbb26055SThomas Gleixner * block() 136dbb26055SThomas Gleixner * signal(->T2) signal(->T3) 137dbb26055SThomas Gleixner * lock(l->lock) 138dbb26055SThomas Gleixner * dequeue(T2) 139dbb26055SThomas Gleixner * deboost() 140dbb26055SThomas Gleixner * unlock(l->lock) 141dbb26055SThomas Gleixner * lock(l->lock) 142dbb26055SThomas Gleixner * dequeue(T3) 143dbb26055SThomas Gleixner * ==> wait list is empty 144dbb26055SThomas Gleixner * deboost() 145dbb26055SThomas Gleixner * unlock(l->lock) 146dbb26055SThomas Gleixner * lock(l->lock) 147dbb26055SThomas Gleixner * fixup_rt_mutex_waiters() 148dbb26055SThomas Gleixner * if (wait_list_empty(l) { 149dbb26055SThomas Gleixner * l->owner = owner 150dbb26055SThomas Gleixner * owner = l->owner & ~HAS_WAITERS; 151dbb26055SThomas Gleixner * ==> l->owner = T1 152dbb26055SThomas Gleixner * } 153dbb26055SThomas Gleixner * lock(l->lock) 154dbb26055SThomas Gleixner * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 155dbb26055SThomas Gleixner * if (wait_list_empty(l) { 156dbb26055SThomas Gleixner * owner = l->owner & ~HAS_WAITERS; 157dbb26055SThomas Gleixner * cmpxchg(l->owner, T1, NULL) 158dbb26055SThomas Gleixner * ===> Success (l->owner = NULL) 159dbb26055SThomas Gleixner * 160dbb26055SThomas Gleixner * l->owner = owner 161dbb26055SThomas Gleixner * ==> l->owner = T1 162dbb26055SThomas Gleixner * } 163dbb26055SThomas Gleixner * 164dbb26055SThomas Gleixner * With the check for the waiter bit in place T3 on CPU2 will not 165dbb26055SThomas Gleixner * overwrite. All tasks fiddling with the waiters bit are 166dbb26055SThomas Gleixner * serialized by l->lock, so nothing else can modify the waiters 167dbb26055SThomas Gleixner * bit. If the bit is set then nothing can change l->owner either 168dbb26055SThomas Gleixner * so the simple RMW is safe. The cmpxchg() will simply fail if it 169dbb26055SThomas Gleixner * happens in the middle of the RMW because the waiters bit is 170dbb26055SThomas Gleixner * still set. 171dbb26055SThomas Gleixner */ 172dbb26055SThomas Gleixner owner = READ_ONCE(*p); 173dbb26055SThomas Gleixner if (owner & RT_MUTEX_HAS_WAITERS) 174dbb26055SThomas Gleixner WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 1751696a8beSPeter Zijlstra } 1761696a8beSPeter Zijlstra 1771696a8beSPeter Zijlstra /* 178cede8841SSebastian Andrzej Siewior * We can speed up the acquire/release, if there's no debugging state to be 179cede8841SSebastian Andrzej Siewior * set up. 1801696a8beSPeter Zijlstra */ 181cede8841SSebastian Andrzej Siewior #ifndef CONFIG_DEBUG_RT_MUTEXES 182830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 18378515930SSebastian Andrzej Siewior struct task_struct *old, 18478515930SSebastian Andrzej Siewior struct task_struct *new) 18578515930SSebastian Andrzej Siewior { 186709e0b62SThomas Gleixner return try_cmpxchg_acquire(&lock->owner, &old, new); 18778515930SSebastian Andrzej Siewior } 18878515930SSebastian Andrzej Siewior 189830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 19078515930SSebastian Andrzej Siewior struct task_struct *old, 19178515930SSebastian Andrzej Siewior struct task_struct *new) 19278515930SSebastian Andrzej Siewior { 193709e0b62SThomas Gleixner return try_cmpxchg_release(&lock->owner, &old, new); 19478515930SSebastian Andrzej Siewior } 195700318d1SDavidlohr Bueso 196700318d1SDavidlohr Bueso /* 197700318d1SDavidlohr Bueso * Callers must hold the ->wait_lock -- which is the whole purpose as we force 198700318d1SDavidlohr Bueso * all future threads that attempt to [Rmw] the lock to the slowpath. As such 199700318d1SDavidlohr Bueso * relaxed semantics suffice. 200700318d1SDavidlohr Bueso */ 201830e6accSPeter Zijlstra static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 2021696a8beSPeter Zijlstra { 2031696a8beSPeter Zijlstra unsigned long owner, *p = (unsigned long *) &lock->owner; 2041696a8beSPeter Zijlstra 2051696a8beSPeter Zijlstra do { 2061696a8beSPeter Zijlstra owner = *p; 207700318d1SDavidlohr Bueso } while (cmpxchg_relaxed(p, owner, 208700318d1SDavidlohr Bueso owner | RT_MUTEX_HAS_WAITERS) != owner); 2091696a8beSPeter Zijlstra } 21027e35715SThomas Gleixner 21127e35715SThomas Gleixner /* 21227e35715SThomas Gleixner * Safe fastpath aware unlock: 21327e35715SThomas Gleixner * 1) Clear the waiters bit 21427e35715SThomas Gleixner * 2) Drop lock->wait_lock 21527e35715SThomas Gleixner * 3) Try to unlock the lock with cmpxchg 21627e35715SThomas Gleixner */ 217830e6accSPeter Zijlstra static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 218b4abf910SThomas Gleixner unsigned long flags) 21927e35715SThomas Gleixner __releases(lock->wait_lock) 22027e35715SThomas Gleixner { 22127e35715SThomas Gleixner struct task_struct *owner = rt_mutex_owner(lock); 22227e35715SThomas Gleixner 22327e35715SThomas Gleixner clear_rt_mutex_waiters(lock); 224b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 22527e35715SThomas Gleixner /* 22627e35715SThomas Gleixner * If a new waiter comes in between the unlock and the cmpxchg 22727e35715SThomas Gleixner * we have two situations: 22827e35715SThomas Gleixner * 22927e35715SThomas Gleixner * unlock(wait_lock); 23027e35715SThomas Gleixner * lock(wait_lock); 23127e35715SThomas Gleixner * cmpxchg(p, owner, 0) == owner 23227e35715SThomas Gleixner * mark_rt_mutex_waiters(lock); 23327e35715SThomas Gleixner * acquire(lock); 23427e35715SThomas Gleixner * or: 23527e35715SThomas Gleixner * 23627e35715SThomas Gleixner * unlock(wait_lock); 23727e35715SThomas Gleixner * lock(wait_lock); 23827e35715SThomas Gleixner * mark_rt_mutex_waiters(lock); 23927e35715SThomas Gleixner * 24027e35715SThomas Gleixner * cmpxchg(p, owner, 0) != owner 24127e35715SThomas Gleixner * enqueue_waiter(); 24227e35715SThomas Gleixner * unlock(wait_lock); 24327e35715SThomas Gleixner * lock(wait_lock); 24427e35715SThomas Gleixner * wake waiter(); 24527e35715SThomas Gleixner * unlock(wait_lock); 24627e35715SThomas Gleixner * lock(wait_lock); 24727e35715SThomas Gleixner * acquire(lock); 24827e35715SThomas Gleixner */ 249700318d1SDavidlohr Bueso return rt_mutex_cmpxchg_release(lock, owner, NULL); 25027e35715SThomas Gleixner } 25127e35715SThomas Gleixner 2521696a8beSPeter Zijlstra #else 253830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 25478515930SSebastian Andrzej Siewior struct task_struct *old, 25578515930SSebastian Andrzej Siewior struct task_struct *new) 25678515930SSebastian Andrzej Siewior { 25778515930SSebastian Andrzej Siewior return false; 25878515930SSebastian Andrzej Siewior 25978515930SSebastian Andrzej Siewior } 26078515930SSebastian Andrzej Siewior 261830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 26278515930SSebastian Andrzej Siewior struct task_struct *old, 26378515930SSebastian Andrzej Siewior struct task_struct *new) 26478515930SSebastian Andrzej Siewior { 26578515930SSebastian Andrzej Siewior return false; 26678515930SSebastian Andrzej Siewior } 267700318d1SDavidlohr Bueso 268830e6accSPeter Zijlstra static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 2691696a8beSPeter Zijlstra { 2701696a8beSPeter Zijlstra lock->owner = (struct task_struct *) 2711696a8beSPeter Zijlstra ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 2721696a8beSPeter Zijlstra } 27327e35715SThomas Gleixner 27427e35715SThomas Gleixner /* 27527e35715SThomas Gleixner * Simple slow path only version: lock->owner is protected by lock->wait_lock. 27627e35715SThomas Gleixner */ 277830e6accSPeter Zijlstra static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 278b4abf910SThomas Gleixner unsigned long flags) 27927e35715SThomas Gleixner __releases(lock->wait_lock) 28027e35715SThomas Gleixner { 28127e35715SThomas Gleixner lock->owner = NULL; 282b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 28327e35715SThomas Gleixner return true; 28427e35715SThomas Gleixner } 2851696a8beSPeter Zijlstra #endif 2861696a8beSPeter Zijlstra 287715f7f9eSPeter Zijlstra static __always_inline int __waiter_prio(struct task_struct *task) 288715f7f9eSPeter Zijlstra { 289715f7f9eSPeter Zijlstra int prio = task->prio; 290715f7f9eSPeter Zijlstra 291715f7f9eSPeter Zijlstra if (!rt_prio(prio)) 292715f7f9eSPeter Zijlstra return DEFAULT_PRIO; 293715f7f9eSPeter Zijlstra 294715f7f9eSPeter Zijlstra return prio; 295715f7f9eSPeter Zijlstra } 296715f7f9eSPeter Zijlstra 297715f7f9eSPeter Zijlstra static __always_inline void 298715f7f9eSPeter Zijlstra waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 299715f7f9eSPeter Zijlstra { 300715f7f9eSPeter Zijlstra waiter->prio = __waiter_prio(task); 301715f7f9eSPeter Zijlstra waiter->deadline = task->dl.deadline; 302715f7f9eSPeter Zijlstra } 303715f7f9eSPeter Zijlstra 30419830e55SPeter Zijlstra /* 30519830e55SPeter Zijlstra * Only use with rt_mutex_waiter_{less,equal}() 30619830e55SPeter Zijlstra */ 30719830e55SPeter Zijlstra #define task_to_waiter(p) \ 308715f7f9eSPeter Zijlstra &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 30919830e55SPeter Zijlstra 310d7a2edb8SThomas Gleixner static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 311fb00aca4SPeter Zijlstra struct rt_mutex_waiter *right) 312fb00aca4SPeter Zijlstra { 3132d3d891dSDario Faggioli if (left->prio < right->prio) 314fb00aca4SPeter Zijlstra return 1; 315fb00aca4SPeter Zijlstra 3161696a8beSPeter Zijlstra /* 3172d3d891dSDario Faggioli * If both waiters have dl_prio(), we check the deadlines of the 3182d3d891dSDario Faggioli * associated tasks. 3192d3d891dSDario Faggioli * If left waiter has a dl_prio(), and we didn't return 1 above, 3202d3d891dSDario Faggioli * then right waiter has a dl_prio() too. 321fb00aca4SPeter Zijlstra */ 3222d3d891dSDario Faggioli if (dl_prio(left->prio)) 323e0aad5b4SPeter Zijlstra return dl_time_before(left->deadline, right->deadline); 324fb00aca4SPeter Zijlstra 325fb00aca4SPeter Zijlstra return 0; 326fb00aca4SPeter Zijlstra } 327fb00aca4SPeter Zijlstra 328d7a2edb8SThomas Gleixner static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 32919830e55SPeter Zijlstra struct rt_mutex_waiter *right) 33019830e55SPeter Zijlstra { 33119830e55SPeter Zijlstra if (left->prio != right->prio) 33219830e55SPeter Zijlstra return 0; 33319830e55SPeter Zijlstra 33419830e55SPeter Zijlstra /* 33519830e55SPeter Zijlstra * If both waiters have dl_prio(), we check the deadlines of the 33619830e55SPeter Zijlstra * associated tasks. 33719830e55SPeter Zijlstra * If left waiter has a dl_prio(), and we didn't return 0 above, 33819830e55SPeter Zijlstra * then right waiter has a dl_prio() too. 33919830e55SPeter Zijlstra */ 34019830e55SPeter Zijlstra if (dl_prio(left->prio)) 34119830e55SPeter Zijlstra return left->deadline == right->deadline; 34219830e55SPeter Zijlstra 34319830e55SPeter Zijlstra return 1; 34419830e55SPeter Zijlstra } 34519830e55SPeter Zijlstra 34648eb3f4fSGregory Haskins static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 34748eb3f4fSGregory Haskins struct rt_mutex_waiter *top_waiter) 34848eb3f4fSGregory Haskins { 34948eb3f4fSGregory Haskins if (rt_mutex_waiter_less(waiter, top_waiter)) 35048eb3f4fSGregory Haskins return true; 35148eb3f4fSGregory Haskins 35248eb3f4fSGregory Haskins #ifdef RT_MUTEX_BUILD_SPINLOCKS 35348eb3f4fSGregory Haskins /* 35448eb3f4fSGregory Haskins * Note that RT tasks are excluded from same priority (lateral) 35548eb3f4fSGregory Haskins * steals to prevent the introduction of an unbounded latency. 35648eb3f4fSGregory Haskins */ 35748eb3f4fSGregory Haskins if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) 35848eb3f4fSGregory Haskins return false; 35948eb3f4fSGregory Haskins 36048eb3f4fSGregory Haskins return rt_mutex_waiter_equal(waiter, top_waiter); 36148eb3f4fSGregory Haskins #else 36248eb3f4fSGregory Haskins return false; 36348eb3f4fSGregory Haskins #endif 36448eb3f4fSGregory Haskins } 36548eb3f4fSGregory Haskins 3665a798725SPeter Zijlstra #define __node_2_waiter(node) \ 3675a798725SPeter Zijlstra rb_entry((node), struct rt_mutex_waiter, tree_entry) 3685a798725SPeter Zijlstra 369d7a2edb8SThomas Gleixner static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 3705a798725SPeter Zijlstra { 371add46132SPeter Zijlstra struct rt_mutex_waiter *aw = __node_2_waiter(a); 372add46132SPeter Zijlstra struct rt_mutex_waiter *bw = __node_2_waiter(b); 373add46132SPeter Zijlstra 374add46132SPeter Zijlstra if (rt_mutex_waiter_less(aw, bw)) 375add46132SPeter Zijlstra return 1; 376add46132SPeter Zijlstra 377add46132SPeter Zijlstra if (!build_ww_mutex()) 378add46132SPeter Zijlstra return 0; 379add46132SPeter Zijlstra 380add46132SPeter Zijlstra if (rt_mutex_waiter_less(bw, aw)) 381add46132SPeter Zijlstra return 0; 382add46132SPeter Zijlstra 383add46132SPeter Zijlstra /* NOTE: relies on waiter->ww_ctx being set before insertion */ 384add46132SPeter Zijlstra if (aw->ww_ctx) { 385add46132SPeter Zijlstra if (!bw->ww_ctx) 386add46132SPeter Zijlstra return 1; 387add46132SPeter Zijlstra 388add46132SPeter Zijlstra return (signed long)(aw->ww_ctx->stamp - 389add46132SPeter Zijlstra bw->ww_ctx->stamp) < 0; 390add46132SPeter Zijlstra } 391add46132SPeter Zijlstra 392add46132SPeter Zijlstra return 0; 3935a798725SPeter Zijlstra } 3945a798725SPeter Zijlstra 395d7a2edb8SThomas Gleixner static __always_inline void 396830e6accSPeter Zijlstra rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 397fb00aca4SPeter Zijlstra { 3985a798725SPeter Zijlstra rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 399fb00aca4SPeter Zijlstra } 400fb00aca4SPeter Zijlstra 401d7a2edb8SThomas Gleixner static __always_inline void 402830e6accSPeter Zijlstra rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 403fb00aca4SPeter Zijlstra { 404fb00aca4SPeter Zijlstra if (RB_EMPTY_NODE(&waiter->tree_entry)) 405fb00aca4SPeter Zijlstra return; 406fb00aca4SPeter Zijlstra 407a23ba907SDavidlohr Bueso rb_erase_cached(&waiter->tree_entry, &lock->waiters); 408fb00aca4SPeter Zijlstra RB_CLEAR_NODE(&waiter->tree_entry); 409fb00aca4SPeter Zijlstra } 410fb00aca4SPeter Zijlstra 4115a798725SPeter Zijlstra #define __node_2_pi_waiter(node) \ 4125a798725SPeter Zijlstra rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 4135a798725SPeter Zijlstra 414d7a2edb8SThomas Gleixner static __always_inline bool 415d7a2edb8SThomas Gleixner __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 4165a798725SPeter Zijlstra { 4175a798725SPeter Zijlstra return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 4185a798725SPeter Zijlstra } 4195a798725SPeter Zijlstra 420d7a2edb8SThomas Gleixner static __always_inline void 421fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 422fb00aca4SPeter Zijlstra { 4235a798725SPeter Zijlstra rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 424fb00aca4SPeter Zijlstra } 425fb00aca4SPeter Zijlstra 426d7a2edb8SThomas Gleixner static __always_inline void 427fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 428fb00aca4SPeter Zijlstra { 429fb00aca4SPeter Zijlstra if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 430fb00aca4SPeter Zijlstra return; 431fb00aca4SPeter Zijlstra 432a23ba907SDavidlohr Bueso rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 433fb00aca4SPeter Zijlstra RB_CLEAR_NODE(&waiter->pi_tree_entry); 434fb00aca4SPeter Zijlstra } 435fb00aca4SPeter Zijlstra 436d7a2edb8SThomas Gleixner static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 437e96a7705SXunlei Pang { 438acd58620SPeter Zijlstra struct task_struct *pi_task = NULL; 439e96a7705SXunlei Pang 440acd58620SPeter Zijlstra lockdep_assert_held(&p->pi_lock); 441e96a7705SXunlei Pang 442acd58620SPeter Zijlstra if (task_has_pi_waiters(p)) 443acd58620SPeter Zijlstra pi_task = task_top_pi_waiter(p)->task; 4441696a8beSPeter Zijlstra 445acd58620SPeter Zijlstra rt_mutex_setprio(p, pi_task); 4461696a8beSPeter Zijlstra } 4471696a8beSPeter Zijlstra 448b576e640SThomas Gleixner /* RT mutex specific wake_q wrappers */ 449b576e640SThomas Gleixner static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 450b576e640SThomas Gleixner struct rt_mutex_waiter *w) 451b576e640SThomas Gleixner { 452456cfbc6SThomas Gleixner if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { 453456cfbc6SThomas Gleixner if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 454456cfbc6SThomas Gleixner WARN_ON_ONCE(wqh->rtlock_task); 455456cfbc6SThomas Gleixner get_task_struct(w->task); 456456cfbc6SThomas Gleixner wqh->rtlock_task = w->task; 457456cfbc6SThomas Gleixner } else { 458b576e640SThomas Gleixner wake_q_add(&wqh->head, w->task); 459b576e640SThomas Gleixner } 460456cfbc6SThomas Gleixner } 461b576e640SThomas Gleixner 462b576e640SThomas Gleixner static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 463b576e640SThomas Gleixner { 464456cfbc6SThomas Gleixner if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 465456cfbc6SThomas Gleixner wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 466456cfbc6SThomas Gleixner put_task_struct(wqh->rtlock_task); 467456cfbc6SThomas Gleixner wqh->rtlock_task = NULL; 468456cfbc6SThomas Gleixner } 469456cfbc6SThomas Gleixner 470456cfbc6SThomas Gleixner if (!wake_q_empty(&wqh->head)) 471b576e640SThomas Gleixner wake_up_q(&wqh->head); 472b576e640SThomas Gleixner 473b576e640SThomas Gleixner /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 474b576e640SThomas Gleixner preempt_enable(); 475b576e640SThomas Gleixner } 476b576e640SThomas Gleixner 4771696a8beSPeter Zijlstra /* 4788930ed80SThomas Gleixner * Deadlock detection is conditional: 4798930ed80SThomas Gleixner * 4808930ed80SThomas Gleixner * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 4818930ed80SThomas Gleixner * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 4828930ed80SThomas Gleixner * 4838930ed80SThomas Gleixner * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 4848930ed80SThomas Gleixner * conducted independent of the detect argument. 4858930ed80SThomas Gleixner * 4868930ed80SThomas Gleixner * If the waiter argument is NULL this indicates the deboost path and 4878930ed80SThomas Gleixner * deadlock detection is disabled independent of the detect argument 4888930ed80SThomas Gleixner * and the config settings. 4898930ed80SThomas Gleixner */ 490d7a2edb8SThomas Gleixner static __always_inline bool 491d7a2edb8SThomas Gleixner rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 4928930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk) 4938930ed80SThomas Gleixner { 49407d25971SZhen Lei if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 495f7efc479SThomas Gleixner return waiter != NULL; 496f7efc479SThomas Gleixner return chwalk == RT_MUTEX_FULL_CHAINWALK; 4978930ed80SThomas Gleixner } 4988930ed80SThomas Gleixner 499830e6accSPeter Zijlstra static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 50082084984SThomas Gleixner { 50182084984SThomas Gleixner return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 50282084984SThomas Gleixner } 50382084984SThomas Gleixner 5041696a8beSPeter Zijlstra /* 5051696a8beSPeter Zijlstra * Adjust the priority chain. Also used for deadlock detection. 5061696a8beSPeter Zijlstra * Decreases task's usage by one - may thus free the task. 5071696a8beSPeter Zijlstra * 50882084984SThomas Gleixner * @task: the task owning the mutex (owner) for which a chain walk is 50982084984SThomas Gleixner * probably needed 510e6beaa36STom(JeHyeon) Yeon * @chwalk: do we have to carry out deadlock detection? 5111696a8beSPeter Zijlstra * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 5121696a8beSPeter Zijlstra * things for a task that has just got its priority adjusted, and 5131696a8beSPeter Zijlstra * is waiting on a mutex) 51482084984SThomas Gleixner * @next_lock: the mutex on which the owner of @orig_lock was blocked before 51582084984SThomas Gleixner * we dropped its pi_lock. Is never dereferenced, only used for 51682084984SThomas Gleixner * comparison to detect lock chain changes. 5171696a8beSPeter Zijlstra * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 5181696a8beSPeter Zijlstra * its priority to the mutex owner (can be NULL in the case 5191696a8beSPeter Zijlstra * depicted above or if the top waiter is gone away and we are 5201696a8beSPeter Zijlstra * actually deboosting the owner) 5211696a8beSPeter Zijlstra * @top_task: the current top waiter 5221696a8beSPeter Zijlstra * 5231696a8beSPeter Zijlstra * Returns 0 or -EDEADLK. 5243eb65aeaSThomas Gleixner * 5253eb65aeaSThomas Gleixner * Chain walk basics and protection scope 5263eb65aeaSThomas Gleixner * 5273eb65aeaSThomas Gleixner * [R] refcount on task 5283eb65aeaSThomas Gleixner * [P] task->pi_lock held 5293eb65aeaSThomas Gleixner * [L] rtmutex->wait_lock held 5303eb65aeaSThomas Gleixner * 5313eb65aeaSThomas Gleixner * Step Description Protected by 5323eb65aeaSThomas Gleixner * function arguments: 5333eb65aeaSThomas Gleixner * @task [R] 5343eb65aeaSThomas Gleixner * @orig_lock if != NULL @top_task is blocked on it 5353eb65aeaSThomas Gleixner * @next_lock Unprotected. Cannot be 5363eb65aeaSThomas Gleixner * dereferenced. Only used for 5373eb65aeaSThomas Gleixner * comparison. 5383eb65aeaSThomas Gleixner * @orig_waiter if != NULL @top_task is blocked on it 5393eb65aeaSThomas Gleixner * @top_task current, or in case of proxy 5403eb65aeaSThomas Gleixner * locking protected by calling 5413eb65aeaSThomas Gleixner * code 5423eb65aeaSThomas Gleixner * again: 5433eb65aeaSThomas Gleixner * loop_sanity_check(); 5443eb65aeaSThomas Gleixner * retry: 5453eb65aeaSThomas Gleixner * [1] lock(task->pi_lock); [R] acquire [P] 5463eb65aeaSThomas Gleixner * [2] waiter = task->pi_blocked_on; [P] 5473eb65aeaSThomas Gleixner * [3] check_exit_conditions_1(); [P] 5483eb65aeaSThomas Gleixner * [4] lock = waiter->lock; [P] 5493eb65aeaSThomas Gleixner * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 5503eb65aeaSThomas Gleixner * unlock(task->pi_lock); release [P] 5513eb65aeaSThomas Gleixner * goto retry; 5523eb65aeaSThomas Gleixner * } 5533eb65aeaSThomas Gleixner * [6] check_exit_conditions_2(); [P] + [L] 5543eb65aeaSThomas Gleixner * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 5553eb65aeaSThomas Gleixner * [8] unlock(task->pi_lock); release [P] 5563eb65aeaSThomas Gleixner * put_task_struct(task); release [R] 5573eb65aeaSThomas Gleixner * [9] check_exit_conditions_3(); [L] 5583eb65aeaSThomas Gleixner * [10] task = owner(lock); [L] 5593eb65aeaSThomas Gleixner * get_task_struct(task); [L] acquire [R] 5603eb65aeaSThomas Gleixner * lock(task->pi_lock); [L] acquire [P] 5613eb65aeaSThomas Gleixner * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 5623eb65aeaSThomas Gleixner * [12] check_exit_conditions_4(); [P] + [L] 5633eb65aeaSThomas Gleixner * [13] unlock(task->pi_lock); release [P] 5643eb65aeaSThomas Gleixner * unlock(lock->wait_lock); release [L] 5653eb65aeaSThomas Gleixner * goto again; 5661696a8beSPeter Zijlstra */ 567d7a2edb8SThomas Gleixner static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 5688930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk, 569830e6accSPeter Zijlstra struct rt_mutex_base *orig_lock, 570830e6accSPeter Zijlstra struct rt_mutex_base *next_lock, 5711696a8beSPeter Zijlstra struct rt_mutex_waiter *orig_waiter, 5721696a8beSPeter Zijlstra struct task_struct *top_task) 5731696a8beSPeter Zijlstra { 5741696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 575a57594a1SThomas Gleixner struct rt_mutex_waiter *prerequeue_top_waiter; 5768930ed80SThomas Gleixner int ret = 0, depth = 0; 577830e6accSPeter Zijlstra struct rt_mutex_base *lock; 5788930ed80SThomas Gleixner bool detect_deadlock; 57967792e2cSThomas Gleixner bool requeue = true; 5801696a8beSPeter Zijlstra 5818930ed80SThomas Gleixner detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 5821696a8beSPeter Zijlstra 5831696a8beSPeter Zijlstra /* 5841696a8beSPeter Zijlstra * The (de)boosting is a step by step approach with a lot of 5851696a8beSPeter Zijlstra * pitfalls. We want this to be preemptible and we want hold a 5861696a8beSPeter Zijlstra * maximum of two locks per step. So we have to check 5871696a8beSPeter Zijlstra * carefully whether things change under us. 5881696a8beSPeter Zijlstra */ 5891696a8beSPeter Zijlstra again: 5903eb65aeaSThomas Gleixner /* 5913eb65aeaSThomas Gleixner * We limit the lock chain length for each invocation. 5923eb65aeaSThomas Gleixner */ 5931696a8beSPeter Zijlstra if (++depth > max_lock_depth) { 5941696a8beSPeter Zijlstra static int prev_max; 5951696a8beSPeter Zijlstra 5961696a8beSPeter Zijlstra /* 5971696a8beSPeter Zijlstra * Print this only once. If the admin changes the limit, 5981696a8beSPeter Zijlstra * print a new message when reaching the limit again. 5991696a8beSPeter Zijlstra */ 6001696a8beSPeter Zijlstra if (prev_max != max_lock_depth) { 6011696a8beSPeter Zijlstra prev_max = max_lock_depth; 6021696a8beSPeter Zijlstra printk(KERN_WARNING "Maximum lock depth %d reached " 6031696a8beSPeter Zijlstra "task: %s (%d)\n", max_lock_depth, 6041696a8beSPeter Zijlstra top_task->comm, task_pid_nr(top_task)); 6051696a8beSPeter Zijlstra } 6061696a8beSPeter Zijlstra put_task_struct(task); 6071696a8beSPeter Zijlstra 6083d5c9340SThomas Gleixner return -EDEADLK; 6091696a8beSPeter Zijlstra } 6103eb65aeaSThomas Gleixner 6113eb65aeaSThomas Gleixner /* 6123eb65aeaSThomas Gleixner * We are fully preemptible here and only hold the refcount on 6133eb65aeaSThomas Gleixner * @task. So everything can have changed under us since the 6143eb65aeaSThomas Gleixner * caller or our own code below (goto retry/again) dropped all 6153eb65aeaSThomas Gleixner * locks. 6163eb65aeaSThomas Gleixner */ 6171696a8beSPeter Zijlstra retry: 6181696a8beSPeter Zijlstra /* 6193eb65aeaSThomas Gleixner * [1] Task cannot go away as we did a get_task() before ! 6201696a8beSPeter Zijlstra */ 621b4abf910SThomas Gleixner raw_spin_lock_irq(&task->pi_lock); 6221696a8beSPeter Zijlstra 6233eb65aeaSThomas Gleixner /* 6243eb65aeaSThomas Gleixner * [2] Get the waiter on which @task is blocked on. 6253eb65aeaSThomas Gleixner */ 6261696a8beSPeter Zijlstra waiter = task->pi_blocked_on; 6273eb65aeaSThomas Gleixner 6283eb65aeaSThomas Gleixner /* 6293eb65aeaSThomas Gleixner * [3] check_exit_conditions_1() protected by task->pi_lock. 6303eb65aeaSThomas Gleixner */ 6313eb65aeaSThomas Gleixner 6321696a8beSPeter Zijlstra /* 6331696a8beSPeter Zijlstra * Check whether the end of the boosting chain has been 6341696a8beSPeter Zijlstra * reached or the state of the chain has changed while we 6351696a8beSPeter Zijlstra * dropped the locks. 6361696a8beSPeter Zijlstra */ 6371696a8beSPeter Zijlstra if (!waiter) 6381696a8beSPeter Zijlstra goto out_unlock_pi; 6391696a8beSPeter Zijlstra 6401696a8beSPeter Zijlstra /* 6411696a8beSPeter Zijlstra * Check the orig_waiter state. After we dropped the locks, 6421696a8beSPeter Zijlstra * the previous owner of the lock might have released the lock. 6431696a8beSPeter Zijlstra */ 6441696a8beSPeter Zijlstra if (orig_waiter && !rt_mutex_owner(orig_lock)) 6451696a8beSPeter Zijlstra goto out_unlock_pi; 6461696a8beSPeter Zijlstra 6471696a8beSPeter Zijlstra /* 64882084984SThomas Gleixner * We dropped all locks after taking a refcount on @task, so 64982084984SThomas Gleixner * the task might have moved on in the lock chain or even left 65082084984SThomas Gleixner * the chain completely and blocks now on an unrelated lock or 65182084984SThomas Gleixner * on @orig_lock. 65282084984SThomas Gleixner * 65382084984SThomas Gleixner * We stored the lock on which @task was blocked in @next_lock, 65482084984SThomas Gleixner * so we can detect the chain change. 65582084984SThomas Gleixner */ 65682084984SThomas Gleixner if (next_lock != waiter->lock) 65782084984SThomas Gleixner goto out_unlock_pi; 65882084984SThomas Gleixner 65982084984SThomas Gleixner /* 6601696a8beSPeter Zijlstra * Drop out, when the task has no waiters. Note, 6611696a8beSPeter Zijlstra * top_waiter can be NULL, when we are in the deboosting 6621696a8beSPeter Zijlstra * mode! 6631696a8beSPeter Zijlstra */ 664397335f0SThomas Gleixner if (top_waiter) { 665397335f0SThomas Gleixner if (!task_has_pi_waiters(task)) 6661696a8beSPeter Zijlstra goto out_unlock_pi; 667397335f0SThomas Gleixner /* 668397335f0SThomas Gleixner * If deadlock detection is off, we stop here if we 66967792e2cSThomas Gleixner * are not the top pi waiter of the task. If deadlock 67067792e2cSThomas Gleixner * detection is enabled we continue, but stop the 67167792e2cSThomas Gleixner * requeueing in the chain walk. 672397335f0SThomas Gleixner */ 67367792e2cSThomas Gleixner if (top_waiter != task_top_pi_waiter(task)) { 67467792e2cSThomas Gleixner if (!detect_deadlock) 675397335f0SThomas Gleixner goto out_unlock_pi; 67667792e2cSThomas Gleixner else 67767792e2cSThomas Gleixner requeue = false; 67867792e2cSThomas Gleixner } 679397335f0SThomas Gleixner } 6801696a8beSPeter Zijlstra 6811696a8beSPeter Zijlstra /* 68267792e2cSThomas Gleixner * If the waiter priority is the same as the task priority 68367792e2cSThomas Gleixner * then there is no further priority adjustment necessary. If 68467792e2cSThomas Gleixner * deadlock detection is off, we stop the chain walk. If its 68567792e2cSThomas Gleixner * enabled we continue, but stop the requeueing in the chain 68667792e2cSThomas Gleixner * walk. 6871696a8beSPeter Zijlstra */ 68819830e55SPeter Zijlstra if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 68967792e2cSThomas Gleixner if (!detect_deadlock) 6901696a8beSPeter Zijlstra goto out_unlock_pi; 69167792e2cSThomas Gleixner else 69267792e2cSThomas Gleixner requeue = false; 69367792e2cSThomas Gleixner } 6941696a8beSPeter Zijlstra 6953eb65aeaSThomas Gleixner /* 6963eb65aeaSThomas Gleixner * [4] Get the next lock 6973eb65aeaSThomas Gleixner */ 6981696a8beSPeter Zijlstra lock = waiter->lock; 6993eb65aeaSThomas Gleixner /* 7003eb65aeaSThomas Gleixner * [5] We need to trylock here as we are holding task->pi_lock, 7013eb65aeaSThomas Gleixner * which is the reverse lock order versus the other rtmutex 7023eb65aeaSThomas Gleixner * operations. 7033eb65aeaSThomas Gleixner */ 7041696a8beSPeter Zijlstra if (!raw_spin_trylock(&lock->wait_lock)) { 705b4abf910SThomas Gleixner raw_spin_unlock_irq(&task->pi_lock); 7061696a8beSPeter Zijlstra cpu_relax(); 7071696a8beSPeter Zijlstra goto retry; 7081696a8beSPeter Zijlstra } 7091696a8beSPeter Zijlstra 710397335f0SThomas Gleixner /* 7113eb65aeaSThomas Gleixner * [6] check_exit_conditions_2() protected by task->pi_lock and 7123eb65aeaSThomas Gleixner * lock->wait_lock. 7133eb65aeaSThomas Gleixner * 714397335f0SThomas Gleixner * Deadlock detection. If the lock is the same as the original 715397335f0SThomas Gleixner * lock which caused us to walk the lock chain or if the 716397335f0SThomas Gleixner * current lock is owned by the task which initiated the chain 717397335f0SThomas Gleixner * walk, we detected a deadlock. 718397335f0SThomas Gleixner */ 7191696a8beSPeter Zijlstra if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 7201696a8beSPeter Zijlstra raw_spin_unlock(&lock->wait_lock); 7213d5c9340SThomas Gleixner ret = -EDEADLK; 7221696a8beSPeter Zijlstra goto out_unlock_pi; 7231696a8beSPeter Zijlstra } 7241696a8beSPeter Zijlstra 725a57594a1SThomas Gleixner /* 72667792e2cSThomas Gleixner * If we just follow the lock chain for deadlock detection, no 72767792e2cSThomas Gleixner * need to do all the requeue operations. To avoid a truckload 72867792e2cSThomas Gleixner * of conditionals around the various places below, just do the 72967792e2cSThomas Gleixner * minimum chain walk checks. 73067792e2cSThomas Gleixner */ 73167792e2cSThomas Gleixner if (!requeue) { 73267792e2cSThomas Gleixner /* 73367792e2cSThomas Gleixner * No requeue[7] here. Just release @task [8] 73467792e2cSThomas Gleixner */ 735b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 73667792e2cSThomas Gleixner put_task_struct(task); 73767792e2cSThomas Gleixner 73867792e2cSThomas Gleixner /* 73967792e2cSThomas Gleixner * [9] check_exit_conditions_3 protected by lock->wait_lock. 74067792e2cSThomas Gleixner * If there is no owner of the lock, end of chain. 74167792e2cSThomas Gleixner */ 74267792e2cSThomas Gleixner if (!rt_mutex_owner(lock)) { 743b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 74467792e2cSThomas Gleixner return 0; 74567792e2cSThomas Gleixner } 74667792e2cSThomas Gleixner 74767792e2cSThomas Gleixner /* [10] Grab the next task, i.e. owner of @lock */ 7487b3c92b8SMatthew Wilcox (Oracle) task = get_task_struct(rt_mutex_owner(lock)); 749b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 75067792e2cSThomas Gleixner 75167792e2cSThomas Gleixner /* 75267792e2cSThomas Gleixner * No requeue [11] here. We just do deadlock detection. 75367792e2cSThomas Gleixner * 75467792e2cSThomas Gleixner * [12] Store whether owner is blocked 75567792e2cSThomas Gleixner * itself. Decision is made after dropping the locks 75667792e2cSThomas Gleixner */ 75767792e2cSThomas Gleixner next_lock = task_blocked_on_lock(task); 75867792e2cSThomas Gleixner /* 75967792e2cSThomas Gleixner * Get the top waiter for the next iteration 76067792e2cSThomas Gleixner */ 76167792e2cSThomas Gleixner top_waiter = rt_mutex_top_waiter(lock); 76267792e2cSThomas Gleixner 76367792e2cSThomas Gleixner /* [13] Drop locks */ 764b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 765b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 76667792e2cSThomas Gleixner 76767792e2cSThomas Gleixner /* If owner is not blocked, end of chain. */ 76867792e2cSThomas Gleixner if (!next_lock) 76967792e2cSThomas Gleixner goto out_put_task; 77067792e2cSThomas Gleixner goto again; 77167792e2cSThomas Gleixner } 77267792e2cSThomas Gleixner 77367792e2cSThomas Gleixner /* 774a57594a1SThomas Gleixner * Store the current top waiter before doing the requeue 775a57594a1SThomas Gleixner * operation on @lock. We need it for the boost/deboost 776a57594a1SThomas Gleixner * decision below. 777a57594a1SThomas Gleixner */ 778a57594a1SThomas Gleixner prerequeue_top_waiter = rt_mutex_top_waiter(lock); 7791696a8beSPeter Zijlstra 7809f40a51aSDavidlohr Bueso /* [7] Requeue the waiter in the lock waiter tree. */ 781fb00aca4SPeter Zijlstra rt_mutex_dequeue(lock, waiter); 782e0aad5b4SPeter Zijlstra 783e0aad5b4SPeter Zijlstra /* 784e0aad5b4SPeter Zijlstra * Update the waiter prio fields now that we're dequeued. 785e0aad5b4SPeter Zijlstra * 786e0aad5b4SPeter Zijlstra * These values can have changed through either: 787e0aad5b4SPeter Zijlstra * 788e0aad5b4SPeter Zijlstra * sys_sched_set_scheduler() / sys_sched_setattr() 789e0aad5b4SPeter Zijlstra * 790e0aad5b4SPeter Zijlstra * or 791e0aad5b4SPeter Zijlstra * 792e0aad5b4SPeter Zijlstra * DL CBS enforcement advancing the effective deadline. 793e0aad5b4SPeter Zijlstra * 794e0aad5b4SPeter Zijlstra * Even though pi_waiters also uses these fields, and that tree is only 795e0aad5b4SPeter Zijlstra * updated in [11], we can do this here, since we hold [L], which 796e0aad5b4SPeter Zijlstra * serializes all pi_waiters access and rb_erase() does not care about 797e0aad5b4SPeter Zijlstra * the values of the node being removed. 798e0aad5b4SPeter Zijlstra */ 799715f7f9eSPeter Zijlstra waiter_update_prio(waiter, task); 800e0aad5b4SPeter Zijlstra 801fb00aca4SPeter Zijlstra rt_mutex_enqueue(lock, waiter); 8021696a8beSPeter Zijlstra 8033eb65aeaSThomas Gleixner /* [8] Release the task */ 804b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 8052ffa5a5cSThomas Gleixner put_task_struct(task); 8062ffa5a5cSThomas Gleixner 807a57594a1SThomas Gleixner /* 8083eb65aeaSThomas Gleixner * [9] check_exit_conditions_3 protected by lock->wait_lock. 8093eb65aeaSThomas Gleixner * 810a57594a1SThomas Gleixner * We must abort the chain walk if there is no lock owner even 811a57594a1SThomas Gleixner * in the dead lock detection case, as we have nothing to 812a57594a1SThomas Gleixner * follow here. This is the end of the chain we are walking. 813a57594a1SThomas Gleixner */ 8141696a8beSPeter Zijlstra if (!rt_mutex_owner(lock)) { 8151696a8beSPeter Zijlstra /* 8163eb65aeaSThomas Gleixner * If the requeue [7] above changed the top waiter, 8173eb65aeaSThomas Gleixner * then we need to wake the new top waiter up to try 8183eb65aeaSThomas Gleixner * to get the lock. 8191696a8beSPeter Zijlstra */ 820a57594a1SThomas Gleixner if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 821c014ef69SThomas Gleixner wake_up_state(waiter->task, waiter->wake_state); 822b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 8232ffa5a5cSThomas Gleixner return 0; 8241696a8beSPeter Zijlstra } 8251696a8beSPeter Zijlstra 8263eb65aeaSThomas Gleixner /* [10] Grab the next task, i.e. the owner of @lock */ 8277b3c92b8SMatthew Wilcox (Oracle) task = get_task_struct(rt_mutex_owner(lock)); 828b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 8291696a8beSPeter Zijlstra 8303eb65aeaSThomas Gleixner /* [11] requeue the pi waiters if necessary */ 8311696a8beSPeter Zijlstra if (waiter == rt_mutex_top_waiter(lock)) { 832a57594a1SThomas Gleixner /* 833a57594a1SThomas Gleixner * The waiter became the new top (highest priority) 834a57594a1SThomas Gleixner * waiter on the lock. Replace the previous top waiter 8359f40a51aSDavidlohr Bueso * in the owner tasks pi waiters tree with this waiter 836a57594a1SThomas Gleixner * and adjust the priority of the owner. 837a57594a1SThomas Gleixner */ 838a57594a1SThomas Gleixner rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 839fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(task, waiter); 840acd58620SPeter Zijlstra rt_mutex_adjust_prio(task); 8411696a8beSPeter Zijlstra 842a57594a1SThomas Gleixner } else if (prerequeue_top_waiter == waiter) { 843a57594a1SThomas Gleixner /* 844a57594a1SThomas Gleixner * The waiter was the top waiter on the lock, but is 845e2db7592SIngo Molnar * no longer the top priority waiter. Replace waiter in 8469f40a51aSDavidlohr Bueso * the owner tasks pi waiters tree with the new top 847a57594a1SThomas Gleixner * (highest priority) waiter and adjust the priority 848a57594a1SThomas Gleixner * of the owner. 849a57594a1SThomas Gleixner * The new top waiter is stored in @waiter so that 850a57594a1SThomas Gleixner * @waiter == @top_waiter evaluates to true below and 851a57594a1SThomas Gleixner * we continue to deboost the rest of the chain. 852a57594a1SThomas Gleixner */ 853fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(task, waiter); 8541696a8beSPeter Zijlstra waiter = rt_mutex_top_waiter(lock); 855fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(task, waiter); 856acd58620SPeter Zijlstra rt_mutex_adjust_prio(task); 857a57594a1SThomas Gleixner } else { 858a57594a1SThomas Gleixner /* 859a57594a1SThomas Gleixner * Nothing changed. No need to do any priority 860a57594a1SThomas Gleixner * adjustment. 861a57594a1SThomas Gleixner */ 8621696a8beSPeter Zijlstra } 8631696a8beSPeter Zijlstra 86482084984SThomas Gleixner /* 8653eb65aeaSThomas Gleixner * [12] check_exit_conditions_4() protected by task->pi_lock 8663eb65aeaSThomas Gleixner * and lock->wait_lock. The actual decisions are made after we 8673eb65aeaSThomas Gleixner * dropped the locks. 8683eb65aeaSThomas Gleixner * 86982084984SThomas Gleixner * Check whether the task which owns the current lock is pi 87082084984SThomas Gleixner * blocked itself. If yes we store a pointer to the lock for 87182084984SThomas Gleixner * the lock chain change detection above. After we dropped 87282084984SThomas Gleixner * task->pi_lock next_lock cannot be dereferenced anymore. 87382084984SThomas Gleixner */ 87482084984SThomas Gleixner next_lock = task_blocked_on_lock(task); 875a57594a1SThomas Gleixner /* 876a57594a1SThomas Gleixner * Store the top waiter of @lock for the end of chain walk 877a57594a1SThomas Gleixner * decision below. 878a57594a1SThomas Gleixner */ 8791696a8beSPeter Zijlstra top_waiter = rt_mutex_top_waiter(lock); 8803eb65aeaSThomas Gleixner 8813eb65aeaSThomas Gleixner /* [13] Drop the locks */ 882b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 883b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 8841696a8beSPeter Zijlstra 88582084984SThomas Gleixner /* 8863eb65aeaSThomas Gleixner * Make the actual exit decisions [12], based on the stored 8873eb65aeaSThomas Gleixner * values. 8883eb65aeaSThomas Gleixner * 88982084984SThomas Gleixner * We reached the end of the lock chain. Stop right here. No 89082084984SThomas Gleixner * point to go back just to figure that out. 89182084984SThomas Gleixner */ 89282084984SThomas Gleixner if (!next_lock) 89382084984SThomas Gleixner goto out_put_task; 89482084984SThomas Gleixner 895a57594a1SThomas Gleixner /* 896a57594a1SThomas Gleixner * If the current waiter is not the top waiter on the lock, 897a57594a1SThomas Gleixner * then we can stop the chain walk here if we are not in full 898a57594a1SThomas Gleixner * deadlock detection mode. 899a57594a1SThomas Gleixner */ 9001696a8beSPeter Zijlstra if (!detect_deadlock && waiter != top_waiter) 9011696a8beSPeter Zijlstra goto out_put_task; 9021696a8beSPeter Zijlstra 9031696a8beSPeter Zijlstra goto again; 9041696a8beSPeter Zijlstra 9051696a8beSPeter Zijlstra out_unlock_pi: 906b4abf910SThomas Gleixner raw_spin_unlock_irq(&task->pi_lock); 9071696a8beSPeter Zijlstra out_put_task: 9081696a8beSPeter Zijlstra put_task_struct(task); 9091696a8beSPeter Zijlstra 9101696a8beSPeter Zijlstra return ret; 9111696a8beSPeter Zijlstra } 9121696a8beSPeter Zijlstra 9131696a8beSPeter Zijlstra /* 9141696a8beSPeter Zijlstra * Try to take an rt-mutex 9151696a8beSPeter Zijlstra * 916b4abf910SThomas Gleixner * Must be called with lock->wait_lock held and interrupts disabled 9171696a8beSPeter Zijlstra * 918358c331fSThomas Gleixner * @lock: The lock to be acquired. 919358c331fSThomas Gleixner * @task: The task which wants to acquire the lock 9209f40a51aSDavidlohr Bueso * @waiter: The waiter that is queued to the lock's wait tree if the 921358c331fSThomas Gleixner * callsite called task_blocked_on_lock(), otherwise NULL 9221696a8beSPeter Zijlstra */ 923d7a2edb8SThomas Gleixner static int __sched 924830e6accSPeter Zijlstra try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 9251696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter) 9261696a8beSPeter Zijlstra { 927e0aad5b4SPeter Zijlstra lockdep_assert_held(&lock->wait_lock); 928e0aad5b4SPeter Zijlstra 9291696a8beSPeter Zijlstra /* 930358c331fSThomas Gleixner * Before testing whether we can acquire @lock, we set the 931358c331fSThomas Gleixner * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 932358c331fSThomas Gleixner * other tasks which try to modify @lock into the slow path 933358c331fSThomas Gleixner * and they serialize on @lock->wait_lock. 9341696a8beSPeter Zijlstra * 935358c331fSThomas Gleixner * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 936358c331fSThomas Gleixner * as explained at the top of this file if and only if: 9371696a8beSPeter Zijlstra * 938358c331fSThomas Gleixner * - There is a lock owner. The caller must fixup the 939358c331fSThomas Gleixner * transient state if it does a trylock or leaves the lock 940358c331fSThomas Gleixner * function due to a signal or timeout. 941358c331fSThomas Gleixner * 942358c331fSThomas Gleixner * - @task acquires the lock and there are no other 943358c331fSThomas Gleixner * waiters. This is undone in rt_mutex_set_owner(@task) at 944358c331fSThomas Gleixner * the end of this function. 9451696a8beSPeter Zijlstra */ 9461696a8beSPeter Zijlstra mark_rt_mutex_waiters(lock); 9471696a8beSPeter Zijlstra 948358c331fSThomas Gleixner /* 949358c331fSThomas Gleixner * If @lock has an owner, give up. 950358c331fSThomas Gleixner */ 9511696a8beSPeter Zijlstra if (rt_mutex_owner(lock)) 9521696a8beSPeter Zijlstra return 0; 9531696a8beSPeter Zijlstra 9541696a8beSPeter Zijlstra /* 955358c331fSThomas Gleixner * If @waiter != NULL, @task has already enqueued the waiter 9569f40a51aSDavidlohr Bueso * into @lock waiter tree. If @waiter == NULL then this is a 957358c331fSThomas Gleixner * trylock attempt. 958358c331fSThomas Gleixner */ 959358c331fSThomas Gleixner if (waiter) { 96048eb3f4fSGregory Haskins struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 961358c331fSThomas Gleixner 962358c331fSThomas Gleixner /* 96348eb3f4fSGregory Haskins * If waiter is the highest priority waiter of @lock, 96448eb3f4fSGregory Haskins * or allowed to steal it, take it over. 96548eb3f4fSGregory Haskins */ 96648eb3f4fSGregory Haskins if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 96748eb3f4fSGregory Haskins /* 968358c331fSThomas Gleixner * We can acquire the lock. Remove the waiter from the 9699f40a51aSDavidlohr Bueso * lock waiters tree. 970358c331fSThomas Gleixner */ 971358c331fSThomas Gleixner rt_mutex_dequeue(lock, waiter); 97248eb3f4fSGregory Haskins } else { 97348eb3f4fSGregory Haskins return 0; 97448eb3f4fSGregory Haskins } 975358c331fSThomas Gleixner } else { 976358c331fSThomas Gleixner /* 977358c331fSThomas Gleixner * If the lock has waiters already we check whether @task is 978358c331fSThomas Gleixner * eligible to take over the lock. 979358c331fSThomas Gleixner * 980358c331fSThomas Gleixner * If there are no other waiters, @task can acquire 981358c331fSThomas Gleixner * the lock. @task->pi_blocked_on is NULL, so it does 982358c331fSThomas Gleixner * not need to be dequeued. 9831696a8beSPeter Zijlstra */ 9841696a8beSPeter Zijlstra if (rt_mutex_has_waiters(lock)) { 98548eb3f4fSGregory Haskins /* Check whether the trylock can steal it. */ 98648eb3f4fSGregory Haskins if (!rt_mutex_steal(task_to_waiter(task), 98719830e55SPeter Zijlstra rt_mutex_top_waiter(lock))) 9881696a8beSPeter Zijlstra return 0; 989358c331fSThomas Gleixner 990358c331fSThomas Gleixner /* 991358c331fSThomas Gleixner * The current top waiter stays enqueued. We 992358c331fSThomas Gleixner * don't have to change anything in the lock 993358c331fSThomas Gleixner * waiters order. 994358c331fSThomas Gleixner */ 995358c331fSThomas Gleixner } else { 996358c331fSThomas Gleixner /* 997358c331fSThomas Gleixner * No waiters. Take the lock without the 998358c331fSThomas Gleixner * pi_lock dance.@task->pi_blocked_on is NULL 999358c331fSThomas Gleixner * and we have no waiters to enqueue in @task 10009f40a51aSDavidlohr Bueso * pi waiters tree. 1001358c331fSThomas Gleixner */ 1002358c331fSThomas Gleixner goto takeit; 10031696a8beSPeter Zijlstra } 10041696a8beSPeter Zijlstra } 10051696a8beSPeter Zijlstra 10061696a8beSPeter Zijlstra /* 1007358c331fSThomas Gleixner * Clear @task->pi_blocked_on. Requires protection by 1008358c331fSThomas Gleixner * @task->pi_lock. Redundant operation for the @waiter == NULL 1009358c331fSThomas Gleixner * case, but conditionals are more expensive than a redundant 1010358c331fSThomas Gleixner * store. 10111696a8beSPeter Zijlstra */ 1012b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 1013358c331fSThomas Gleixner task->pi_blocked_on = NULL; 1014358c331fSThomas Gleixner /* 1015358c331fSThomas Gleixner * Finish the lock acquisition. @task is the new owner. If 1016358c331fSThomas Gleixner * other waiters exist we have to insert the highest priority 10179f40a51aSDavidlohr Bueso * waiter into @task->pi_waiters tree. 1018358c331fSThomas Gleixner */ 1019358c331fSThomas Gleixner if (rt_mutex_has_waiters(lock)) 1020358c331fSThomas Gleixner rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1021b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 10221696a8beSPeter Zijlstra 1023358c331fSThomas Gleixner takeit: 1024358c331fSThomas Gleixner /* 1025358c331fSThomas Gleixner * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1026358c331fSThomas Gleixner * are still waiters or clears it. 1027358c331fSThomas Gleixner */ 10281696a8beSPeter Zijlstra rt_mutex_set_owner(lock, task); 10291696a8beSPeter Zijlstra 10301696a8beSPeter Zijlstra return 1; 10311696a8beSPeter Zijlstra } 10321696a8beSPeter Zijlstra 10331696a8beSPeter Zijlstra /* 10341696a8beSPeter Zijlstra * Task blocks on lock. 10351696a8beSPeter Zijlstra * 10361696a8beSPeter Zijlstra * Prepare waiter and propagate pi chain 10371696a8beSPeter Zijlstra * 1038b4abf910SThomas Gleixner * This must be called with lock->wait_lock held and interrupts disabled 10391696a8beSPeter Zijlstra */ 1040830e6accSPeter Zijlstra static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 10411696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter, 10421696a8beSPeter Zijlstra struct task_struct *task, 1043add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx, 10448930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk) 10451696a8beSPeter Zijlstra { 10461696a8beSPeter Zijlstra struct task_struct *owner = rt_mutex_owner(lock); 10471696a8beSPeter Zijlstra struct rt_mutex_waiter *top_waiter = waiter; 1048830e6accSPeter Zijlstra struct rt_mutex_base *next_lock; 10491696a8beSPeter Zijlstra int chain_walk = 0, res; 10501696a8beSPeter Zijlstra 1051e0aad5b4SPeter Zijlstra lockdep_assert_held(&lock->wait_lock); 1052e0aad5b4SPeter Zijlstra 1053397335f0SThomas Gleixner /* 1054397335f0SThomas Gleixner * Early deadlock detection. We really don't want the task to 1055397335f0SThomas Gleixner * enqueue on itself just to untangle the mess later. It's not 1056397335f0SThomas Gleixner * only an optimization. We drop the locks, so another waiter 1057397335f0SThomas Gleixner * can come in before the chain walk detects the deadlock. So 1058397335f0SThomas Gleixner * the other will detect the deadlock and return -EDEADLOCK, 1059397335f0SThomas Gleixner * which is wrong, as the other waiter is not in a deadlock 1060397335f0SThomas Gleixner * situation. 1061397335f0SThomas Gleixner */ 10623d5c9340SThomas Gleixner if (owner == task) 1063397335f0SThomas Gleixner return -EDEADLK; 1064397335f0SThomas Gleixner 1065b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 10661696a8beSPeter Zijlstra waiter->task = task; 10671696a8beSPeter Zijlstra waiter->lock = lock; 1068715f7f9eSPeter Zijlstra waiter_update_prio(waiter, task); 10691696a8beSPeter Zijlstra 10701696a8beSPeter Zijlstra /* Get the top priority waiter on the lock */ 10711696a8beSPeter Zijlstra if (rt_mutex_has_waiters(lock)) 10721696a8beSPeter Zijlstra top_waiter = rt_mutex_top_waiter(lock); 1073fb00aca4SPeter Zijlstra rt_mutex_enqueue(lock, waiter); 10741696a8beSPeter Zijlstra 10751696a8beSPeter Zijlstra task->pi_blocked_on = waiter; 10761696a8beSPeter Zijlstra 1077b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 10781696a8beSPeter Zijlstra 1079add46132SPeter Zijlstra if (build_ww_mutex() && ww_ctx) { 1080add46132SPeter Zijlstra struct rt_mutex *rtm; 1081add46132SPeter Zijlstra 1082add46132SPeter Zijlstra /* Check whether the waiter should back out immediately */ 1083add46132SPeter Zijlstra rtm = container_of(lock, struct rt_mutex, rtmutex); 1084add46132SPeter Zijlstra res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1085add46132SPeter Zijlstra if (res) 1086add46132SPeter Zijlstra return res; 1087add46132SPeter Zijlstra } 1088add46132SPeter Zijlstra 10891696a8beSPeter Zijlstra if (!owner) 10901696a8beSPeter Zijlstra return 0; 10911696a8beSPeter Zijlstra 1092b4abf910SThomas Gleixner raw_spin_lock(&owner->pi_lock); 109382084984SThomas Gleixner if (waiter == rt_mutex_top_waiter(lock)) { 1094fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(owner, top_waiter); 1095fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(owner, waiter); 10961696a8beSPeter Zijlstra 1097acd58620SPeter Zijlstra rt_mutex_adjust_prio(owner); 10981696a8beSPeter Zijlstra if (owner->pi_blocked_on) 10991696a8beSPeter Zijlstra chain_walk = 1; 11008930ed80SThomas Gleixner } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 11011696a8beSPeter Zijlstra chain_walk = 1; 110282084984SThomas Gleixner } 11031696a8beSPeter Zijlstra 110482084984SThomas Gleixner /* Store the lock on which owner is blocked or NULL */ 110582084984SThomas Gleixner next_lock = task_blocked_on_lock(owner); 110682084984SThomas Gleixner 1107b4abf910SThomas Gleixner raw_spin_unlock(&owner->pi_lock); 110882084984SThomas Gleixner /* 110982084984SThomas Gleixner * Even if full deadlock detection is on, if the owner is not 111082084984SThomas Gleixner * blocked itself, we can avoid finding this out in the chain 111182084984SThomas Gleixner * walk. 111282084984SThomas Gleixner */ 111382084984SThomas Gleixner if (!chain_walk || !next_lock) 11141696a8beSPeter Zijlstra return 0; 11151696a8beSPeter Zijlstra 11161696a8beSPeter Zijlstra /* 11171696a8beSPeter Zijlstra * The owner can't disappear while holding a lock, 11181696a8beSPeter Zijlstra * so the owner struct is protected by wait_lock. 11191696a8beSPeter Zijlstra * Gets dropped in rt_mutex_adjust_prio_chain()! 11201696a8beSPeter Zijlstra */ 11211696a8beSPeter Zijlstra get_task_struct(owner); 11221696a8beSPeter Zijlstra 1123b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 11241696a8beSPeter Zijlstra 11258930ed80SThomas Gleixner res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 112682084984SThomas Gleixner next_lock, waiter, task); 11271696a8beSPeter Zijlstra 1128b4abf910SThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 11291696a8beSPeter Zijlstra 11301696a8beSPeter Zijlstra return res; 11311696a8beSPeter Zijlstra } 11321696a8beSPeter Zijlstra 11331696a8beSPeter Zijlstra /* 11349f40a51aSDavidlohr Bueso * Remove the top waiter from the current tasks pi waiter tree and 113545ab4effSDavidlohr Bueso * queue it up. 11361696a8beSPeter Zijlstra * 1137b4abf910SThomas Gleixner * Called with lock->wait_lock held and interrupts disabled. 11381696a8beSPeter Zijlstra */ 11397980aa39SThomas Gleixner static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1140830e6accSPeter Zijlstra struct rt_mutex_base *lock) 11411696a8beSPeter Zijlstra { 11421696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter; 11431696a8beSPeter Zijlstra 1144b4abf910SThomas Gleixner raw_spin_lock(¤t->pi_lock); 11451696a8beSPeter Zijlstra 11461696a8beSPeter Zijlstra waiter = rt_mutex_top_waiter(lock); 11471696a8beSPeter Zijlstra 11481696a8beSPeter Zijlstra /* 1149acd58620SPeter Zijlstra * Remove it from current->pi_waiters and deboost. 1150acd58620SPeter Zijlstra * 1151acd58620SPeter Zijlstra * We must in fact deboost here in order to ensure we call 1152acd58620SPeter Zijlstra * rt_mutex_setprio() to update p->pi_top_task before the 1153acd58620SPeter Zijlstra * task unblocks. 11541696a8beSPeter Zijlstra */ 1155fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(current, waiter); 1156acd58620SPeter Zijlstra rt_mutex_adjust_prio(current); 11571696a8beSPeter Zijlstra 115827e35715SThomas Gleixner /* 115927e35715SThomas Gleixner * As we are waking up the top waiter, and the waiter stays 116027e35715SThomas Gleixner * queued on the lock until it gets the lock, this lock 116127e35715SThomas Gleixner * obviously has waiters. Just set the bit here and this has 116227e35715SThomas Gleixner * the added benefit of forcing all new tasks into the 116327e35715SThomas Gleixner * slow path making sure no task of lower priority than 116427e35715SThomas Gleixner * the top waiter can steal this lock. 116527e35715SThomas Gleixner */ 116627e35715SThomas Gleixner lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 11671696a8beSPeter Zijlstra 1168acd58620SPeter Zijlstra /* 1169acd58620SPeter Zijlstra * We deboosted before waking the top waiter task such that we don't 1170acd58620SPeter Zijlstra * run two tasks with the 'same' priority (and ensure the 1171acd58620SPeter Zijlstra * p->pi_top_task pointer points to a blocked task). This however can 1172acd58620SPeter Zijlstra * lead to priority inversion if we would get preempted after the 1173acd58620SPeter Zijlstra * deboost but before waking our donor task, hence the preempt_disable() 1174acd58620SPeter Zijlstra * before unlock. 1175acd58620SPeter Zijlstra * 11767980aa39SThomas Gleixner * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1177acd58620SPeter Zijlstra */ 1178acd58620SPeter Zijlstra preempt_disable(); 11797980aa39SThomas Gleixner rt_mutex_wake_q_add(wqh, waiter); 1180acd58620SPeter Zijlstra raw_spin_unlock(¤t->pi_lock); 11811696a8beSPeter Zijlstra } 11821696a8beSPeter Zijlstra 1183e17ba59bSThomas Gleixner static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1184e17ba59bSThomas Gleixner { 1185e17ba59bSThomas Gleixner int ret = try_to_take_rt_mutex(lock, current, NULL); 1186e17ba59bSThomas Gleixner 1187e17ba59bSThomas Gleixner /* 1188e17ba59bSThomas Gleixner * try_to_take_rt_mutex() sets the lock waiters bit 1189e17ba59bSThomas Gleixner * unconditionally. Clean this up. 1190e17ba59bSThomas Gleixner */ 1191e17ba59bSThomas Gleixner fixup_rt_mutex_waiters(lock); 1192e17ba59bSThomas Gleixner 1193e17ba59bSThomas Gleixner return ret; 1194e17ba59bSThomas Gleixner } 1195e17ba59bSThomas Gleixner 1196e17ba59bSThomas Gleixner /* 1197e17ba59bSThomas Gleixner * Slow path try-lock function: 1198e17ba59bSThomas Gleixner */ 1199e17ba59bSThomas Gleixner static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1200e17ba59bSThomas Gleixner { 1201e17ba59bSThomas Gleixner unsigned long flags; 1202e17ba59bSThomas Gleixner int ret; 1203e17ba59bSThomas Gleixner 1204e17ba59bSThomas Gleixner /* 1205e17ba59bSThomas Gleixner * If the lock already has an owner we fail to get the lock. 1206e17ba59bSThomas Gleixner * This can be done without taking the @lock->wait_lock as 1207e17ba59bSThomas Gleixner * it is only being read, and this is a trylock anyway. 1208e17ba59bSThomas Gleixner */ 1209e17ba59bSThomas Gleixner if (rt_mutex_owner(lock)) 1210e17ba59bSThomas Gleixner return 0; 1211e17ba59bSThomas Gleixner 1212e17ba59bSThomas Gleixner /* 1213e17ba59bSThomas Gleixner * The mutex has currently no owner. Lock the wait lock and try to 1214e17ba59bSThomas Gleixner * acquire the lock. We use irqsave here to support early boot calls. 1215e17ba59bSThomas Gleixner */ 1216e17ba59bSThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 1217e17ba59bSThomas Gleixner 1218e17ba59bSThomas Gleixner ret = __rt_mutex_slowtrylock(lock); 1219e17ba59bSThomas Gleixner 1220e17ba59bSThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1221e17ba59bSThomas Gleixner 1222e17ba59bSThomas Gleixner return ret; 1223e17ba59bSThomas Gleixner } 1224e17ba59bSThomas Gleixner 1225e17ba59bSThomas Gleixner static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1226e17ba59bSThomas Gleixner { 1227e17ba59bSThomas Gleixner if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1228e17ba59bSThomas Gleixner return 1; 1229e17ba59bSThomas Gleixner 1230e17ba59bSThomas Gleixner return rt_mutex_slowtrylock(lock); 1231e17ba59bSThomas Gleixner } 1232e17ba59bSThomas Gleixner 1233e17ba59bSThomas Gleixner /* 1234e17ba59bSThomas Gleixner * Slow path to release a rt-mutex. 1235e17ba59bSThomas Gleixner */ 1236e17ba59bSThomas Gleixner static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1237e17ba59bSThomas Gleixner { 1238e17ba59bSThomas Gleixner DEFINE_RT_WAKE_Q(wqh); 1239e17ba59bSThomas Gleixner unsigned long flags; 1240e17ba59bSThomas Gleixner 1241e17ba59bSThomas Gleixner /* irqsave required to support early boot calls */ 1242e17ba59bSThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 1243e17ba59bSThomas Gleixner 1244e17ba59bSThomas Gleixner debug_rt_mutex_unlock(lock); 1245e17ba59bSThomas Gleixner 1246e17ba59bSThomas Gleixner /* 1247e17ba59bSThomas Gleixner * We must be careful here if the fast path is enabled. If we 1248e17ba59bSThomas Gleixner * have no waiters queued we cannot set owner to NULL here 1249e17ba59bSThomas Gleixner * because of: 1250e17ba59bSThomas Gleixner * 1251e17ba59bSThomas Gleixner * foo->lock->owner = NULL; 1252e17ba59bSThomas Gleixner * rtmutex_lock(foo->lock); <- fast path 1253e17ba59bSThomas Gleixner * free = atomic_dec_and_test(foo->refcnt); 1254e17ba59bSThomas Gleixner * rtmutex_unlock(foo->lock); <- fast path 1255e17ba59bSThomas Gleixner * if (free) 1256e17ba59bSThomas Gleixner * kfree(foo); 1257e17ba59bSThomas Gleixner * raw_spin_unlock(foo->lock->wait_lock); 1258e17ba59bSThomas Gleixner * 1259e17ba59bSThomas Gleixner * So for the fastpath enabled kernel: 1260e17ba59bSThomas Gleixner * 1261e17ba59bSThomas Gleixner * Nothing can set the waiters bit as long as we hold 1262e17ba59bSThomas Gleixner * lock->wait_lock. So we do the following sequence: 1263e17ba59bSThomas Gleixner * 1264e17ba59bSThomas Gleixner * owner = rt_mutex_owner(lock); 1265e17ba59bSThomas Gleixner * clear_rt_mutex_waiters(lock); 1266e17ba59bSThomas Gleixner * raw_spin_unlock(&lock->wait_lock); 1267e17ba59bSThomas Gleixner * if (cmpxchg(&lock->owner, owner, 0) == owner) 1268e17ba59bSThomas Gleixner * return; 1269e17ba59bSThomas Gleixner * goto retry; 1270e17ba59bSThomas Gleixner * 1271e17ba59bSThomas Gleixner * The fastpath disabled variant is simple as all access to 1272e17ba59bSThomas Gleixner * lock->owner is serialized by lock->wait_lock: 1273e17ba59bSThomas Gleixner * 1274e17ba59bSThomas Gleixner * lock->owner = NULL; 1275e17ba59bSThomas Gleixner * raw_spin_unlock(&lock->wait_lock); 1276e17ba59bSThomas Gleixner */ 1277e17ba59bSThomas Gleixner while (!rt_mutex_has_waiters(lock)) { 1278e17ba59bSThomas Gleixner /* Drops lock->wait_lock ! */ 1279e17ba59bSThomas Gleixner if (unlock_rt_mutex_safe(lock, flags) == true) 1280e17ba59bSThomas Gleixner return; 1281e17ba59bSThomas Gleixner /* Relock the rtmutex and try again */ 1282e17ba59bSThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 1283e17ba59bSThomas Gleixner } 1284e17ba59bSThomas Gleixner 1285e17ba59bSThomas Gleixner /* 1286e17ba59bSThomas Gleixner * The wakeup next waiter path does not suffer from the above 1287e17ba59bSThomas Gleixner * race. See the comments there. 1288e17ba59bSThomas Gleixner * 1289e17ba59bSThomas Gleixner * Queue the next waiter for wakeup once we release the wait_lock. 1290e17ba59bSThomas Gleixner */ 1291e17ba59bSThomas Gleixner mark_wakeup_next_waiter(&wqh, lock); 1292e17ba59bSThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1293e17ba59bSThomas Gleixner 1294e17ba59bSThomas Gleixner rt_mutex_wake_up_q(&wqh); 1295e17ba59bSThomas Gleixner } 1296e17ba59bSThomas Gleixner 1297e17ba59bSThomas Gleixner static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1298e17ba59bSThomas Gleixner { 1299e17ba59bSThomas Gleixner if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1300e17ba59bSThomas Gleixner return; 1301e17ba59bSThomas Gleixner 1302e17ba59bSThomas Gleixner rt_mutex_slowunlock(lock); 1303e17ba59bSThomas Gleixner } 1304e17ba59bSThomas Gleixner 1305*992caf7fSSteven Rostedt #ifdef CONFIG_SMP 1306*992caf7fSSteven Rostedt static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1307*992caf7fSSteven Rostedt struct rt_mutex_waiter *waiter, 1308*992caf7fSSteven Rostedt struct task_struct *owner) 1309*992caf7fSSteven Rostedt { 1310*992caf7fSSteven Rostedt bool res = true; 1311*992caf7fSSteven Rostedt 1312*992caf7fSSteven Rostedt rcu_read_lock(); 1313*992caf7fSSteven Rostedt for (;;) { 1314*992caf7fSSteven Rostedt /* If owner changed, trylock again. */ 1315*992caf7fSSteven Rostedt if (owner != rt_mutex_owner(lock)) 1316*992caf7fSSteven Rostedt break; 1317*992caf7fSSteven Rostedt /* 1318*992caf7fSSteven Rostedt * Ensure that @owner is dereferenced after checking that 1319*992caf7fSSteven Rostedt * the lock owner still matches @owner. If that fails, 1320*992caf7fSSteven Rostedt * @owner might point to freed memory. If it still matches, 1321*992caf7fSSteven Rostedt * the rcu_read_lock() ensures the memory stays valid. 1322*992caf7fSSteven Rostedt */ 1323*992caf7fSSteven Rostedt barrier(); 1324*992caf7fSSteven Rostedt /* 1325*992caf7fSSteven Rostedt * Stop spinning when: 1326*992caf7fSSteven Rostedt * - the lock owner has been scheduled out 1327*992caf7fSSteven Rostedt * - current is not longer the top waiter 1328*992caf7fSSteven Rostedt * - current is requested to reschedule (redundant 1329*992caf7fSSteven Rostedt * for CONFIG_PREEMPT_RCU=y) 1330*992caf7fSSteven Rostedt * - the VCPU on which owner runs is preempted 1331*992caf7fSSteven Rostedt */ 1332*992caf7fSSteven Rostedt if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) || 1333*992caf7fSSteven Rostedt need_resched() || vcpu_is_preempted(task_cpu(owner))) { 1334*992caf7fSSteven Rostedt res = false; 1335*992caf7fSSteven Rostedt break; 1336*992caf7fSSteven Rostedt } 1337*992caf7fSSteven Rostedt cpu_relax(); 1338*992caf7fSSteven Rostedt } 1339*992caf7fSSteven Rostedt rcu_read_unlock(); 1340*992caf7fSSteven Rostedt return res; 1341*992caf7fSSteven Rostedt } 1342*992caf7fSSteven Rostedt #else 1343*992caf7fSSteven Rostedt static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1344*992caf7fSSteven Rostedt struct rt_mutex_waiter *waiter, 1345*992caf7fSSteven Rostedt struct task_struct *owner) 1346*992caf7fSSteven Rostedt { 1347*992caf7fSSteven Rostedt return false; 1348*992caf7fSSteven Rostedt } 1349*992caf7fSSteven Rostedt #endif 1350*992caf7fSSteven Rostedt 1351e17ba59bSThomas Gleixner #ifdef RT_MUTEX_BUILD_MUTEX 1352e17ba59bSThomas Gleixner /* 1353e17ba59bSThomas Gleixner * Functions required for: 1354e17ba59bSThomas Gleixner * - rtmutex, futex on all kernels 1355e17ba59bSThomas Gleixner * - mutex and rwsem substitutions on RT kernels 1356e17ba59bSThomas Gleixner */ 1357e17ba59bSThomas Gleixner 13581696a8beSPeter Zijlstra /* 13591696a8beSPeter Zijlstra * Remove a waiter from a lock and give up 13601696a8beSPeter Zijlstra * 1361e17ba59bSThomas Gleixner * Must be called with lock->wait_lock held and interrupts disabled. It must 13621696a8beSPeter Zijlstra * have just failed to try_to_take_rt_mutex(). 13631696a8beSPeter Zijlstra */ 1364830e6accSPeter Zijlstra static void __sched remove_waiter(struct rt_mutex_base *lock, 13651696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter) 13661696a8beSPeter Zijlstra { 13671ca7b860SThomas Gleixner bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 13681696a8beSPeter Zijlstra struct task_struct *owner = rt_mutex_owner(lock); 1369830e6accSPeter Zijlstra struct rt_mutex_base *next_lock; 13701696a8beSPeter Zijlstra 1371e0aad5b4SPeter Zijlstra lockdep_assert_held(&lock->wait_lock); 1372e0aad5b4SPeter Zijlstra 1373b4abf910SThomas Gleixner raw_spin_lock(¤t->pi_lock); 1374fb00aca4SPeter Zijlstra rt_mutex_dequeue(lock, waiter); 13751696a8beSPeter Zijlstra current->pi_blocked_on = NULL; 1376b4abf910SThomas Gleixner raw_spin_unlock(¤t->pi_lock); 13771696a8beSPeter Zijlstra 13781ca7b860SThomas Gleixner /* 13791ca7b860SThomas Gleixner * Only update priority if the waiter was the highest priority 13801ca7b860SThomas Gleixner * waiter of the lock and there is an owner to update. 13811ca7b860SThomas Gleixner */ 13821ca7b860SThomas Gleixner if (!owner || !is_top_waiter) 13831696a8beSPeter Zijlstra return; 13841696a8beSPeter Zijlstra 1385b4abf910SThomas Gleixner raw_spin_lock(&owner->pi_lock); 13861696a8beSPeter Zijlstra 1387fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(owner, waiter); 13881696a8beSPeter Zijlstra 13891ca7b860SThomas Gleixner if (rt_mutex_has_waiters(lock)) 13901ca7b860SThomas Gleixner rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 13911696a8beSPeter Zijlstra 1392acd58620SPeter Zijlstra rt_mutex_adjust_prio(owner); 13931696a8beSPeter Zijlstra 139482084984SThomas Gleixner /* Store the lock on which owner is blocked or NULL */ 139582084984SThomas Gleixner next_lock = task_blocked_on_lock(owner); 13961696a8beSPeter Zijlstra 1397b4abf910SThomas Gleixner raw_spin_unlock(&owner->pi_lock); 13981696a8beSPeter Zijlstra 13991ca7b860SThomas Gleixner /* 14001ca7b860SThomas Gleixner * Don't walk the chain, if the owner task is not blocked 14011ca7b860SThomas Gleixner * itself. 14021ca7b860SThomas Gleixner */ 140382084984SThomas Gleixner if (!next_lock) 14041696a8beSPeter Zijlstra return; 14051696a8beSPeter Zijlstra 14061696a8beSPeter Zijlstra /* gets dropped in rt_mutex_adjust_prio_chain()! */ 14071696a8beSPeter Zijlstra get_task_struct(owner); 14081696a8beSPeter Zijlstra 1409b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 14101696a8beSPeter Zijlstra 14118930ed80SThomas Gleixner rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 14128930ed80SThomas Gleixner next_lock, NULL, current); 14131696a8beSPeter Zijlstra 1414b4abf910SThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 14151696a8beSPeter Zijlstra } 14161696a8beSPeter Zijlstra 14171696a8beSPeter Zijlstra /** 1418ebbdc41eSThomas Gleixner * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 14191696a8beSPeter Zijlstra * @lock: the rt_mutex to take 1420add46132SPeter Zijlstra * @ww_ctx: WW mutex context pointer 14211696a8beSPeter Zijlstra * @state: the state the task should block in (TASK_INTERRUPTIBLE 14221696a8beSPeter Zijlstra * or TASK_UNINTERRUPTIBLE) 14231696a8beSPeter Zijlstra * @timeout: the pre-initialized and started timer, or NULL for none 14241696a8beSPeter Zijlstra * @waiter: the pre-initialized rt_mutex_waiter 14251696a8beSPeter Zijlstra * 1426b4abf910SThomas Gleixner * Must be called with lock->wait_lock held and interrupts disabled 14271696a8beSPeter Zijlstra */ 1428ebbdc41eSThomas Gleixner static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1429add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx, 1430830e6accSPeter Zijlstra unsigned int state, 14311696a8beSPeter Zijlstra struct hrtimer_sleeper *timeout, 14321696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter) 14331696a8beSPeter Zijlstra { 1434add46132SPeter Zijlstra struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1435*992caf7fSSteven Rostedt struct task_struct *owner; 14361696a8beSPeter Zijlstra int ret = 0; 14371696a8beSPeter Zijlstra 14381696a8beSPeter Zijlstra for (;;) { 14391696a8beSPeter Zijlstra /* Try to acquire the lock: */ 14401696a8beSPeter Zijlstra if (try_to_take_rt_mutex(lock, current, waiter)) 14411696a8beSPeter Zijlstra break; 14421696a8beSPeter Zijlstra 1443a51a327fSThomas Gleixner if (timeout && !timeout->task) { 14441696a8beSPeter Zijlstra ret = -ETIMEDOUT; 1445a51a327fSThomas Gleixner break; 1446a51a327fSThomas Gleixner } 1447a51a327fSThomas Gleixner if (signal_pending_state(state, current)) { 1448a51a327fSThomas Gleixner ret = -EINTR; 14491696a8beSPeter Zijlstra break; 14501696a8beSPeter Zijlstra } 14511696a8beSPeter Zijlstra 1452add46132SPeter Zijlstra if (build_ww_mutex() && ww_ctx) { 1453add46132SPeter Zijlstra ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1454add46132SPeter Zijlstra if (ret) 1455add46132SPeter Zijlstra break; 1456add46132SPeter Zijlstra } 1457add46132SPeter Zijlstra 1458*992caf7fSSteven Rostedt if (waiter == rt_mutex_top_waiter(lock)) 1459*992caf7fSSteven Rostedt owner = rt_mutex_owner(lock); 1460*992caf7fSSteven Rostedt else 1461*992caf7fSSteven Rostedt owner = NULL; 1462b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 14631696a8beSPeter Zijlstra 1464*992caf7fSSteven Rostedt if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 14651b0b7c17SDavidlohr Bueso schedule(); 14661696a8beSPeter Zijlstra 1467b4abf910SThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 14681696a8beSPeter Zijlstra set_current_state(state); 14691696a8beSPeter Zijlstra } 14701696a8beSPeter Zijlstra 1471afffc6c1SDavidlohr Bueso __set_current_state(TASK_RUNNING); 14721696a8beSPeter Zijlstra return ret; 14731696a8beSPeter Zijlstra } 14741696a8beSPeter Zijlstra 1475d7a2edb8SThomas Gleixner static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 14763d5c9340SThomas Gleixner struct rt_mutex_waiter *w) 14773d5c9340SThomas Gleixner { 14783d5c9340SThomas Gleixner /* 14793d5c9340SThomas Gleixner * If the result is not -EDEADLOCK or the caller requested 14803d5c9340SThomas Gleixner * deadlock detection, nothing to do here. 14813d5c9340SThomas Gleixner */ 14823d5c9340SThomas Gleixner if (res != -EDEADLOCK || detect_deadlock) 14833d5c9340SThomas Gleixner return; 14843d5c9340SThomas Gleixner 1485add46132SPeter Zijlstra if (build_ww_mutex() && w->ww_ctx) 1486add46132SPeter Zijlstra return; 1487add46132SPeter Zijlstra 14883d5c9340SThomas Gleixner /* 1489e2db7592SIngo Molnar * Yell loudly and stop the task right here. 14903d5c9340SThomas Gleixner */ 14916d41c675SSebastian Andrzej Siewior WARN(1, "rtmutex deadlock detected\n"); 14923d5c9340SThomas Gleixner while (1) { 14933d5c9340SThomas Gleixner set_current_state(TASK_INTERRUPTIBLE); 14943d5c9340SThomas Gleixner schedule(); 14953d5c9340SThomas Gleixner } 14963d5c9340SThomas Gleixner } 14973d5c9340SThomas Gleixner 1498ebbdc41eSThomas Gleixner /** 1499ebbdc41eSThomas Gleixner * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1500ebbdc41eSThomas Gleixner * @lock: The rtmutex to block lock 1501add46132SPeter Zijlstra * @ww_ctx: WW mutex context pointer 1502ebbdc41eSThomas Gleixner * @state: The task state for sleeping 1503ebbdc41eSThomas Gleixner * @chwalk: Indicator whether full or partial chainwalk is requested 1504ebbdc41eSThomas Gleixner * @waiter: Initializer waiter for blocking 15051696a8beSPeter Zijlstra */ 1506ebbdc41eSThomas Gleixner static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1507add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx, 1508830e6accSPeter Zijlstra unsigned int state, 1509ebbdc41eSThomas Gleixner enum rtmutex_chainwalk chwalk, 1510ebbdc41eSThomas Gleixner struct rt_mutex_waiter *waiter) 1511ebbdc41eSThomas Gleixner { 1512add46132SPeter Zijlstra struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1513add46132SPeter Zijlstra struct ww_mutex *ww = ww_container_of(rtm); 1514ebbdc41eSThomas Gleixner int ret; 1515ebbdc41eSThomas Gleixner 1516ebbdc41eSThomas Gleixner lockdep_assert_held(&lock->wait_lock); 1517ebbdc41eSThomas Gleixner 1518ebbdc41eSThomas Gleixner /* Try to acquire the lock again: */ 1519add46132SPeter Zijlstra if (try_to_take_rt_mutex(lock, current, NULL)) { 1520add46132SPeter Zijlstra if (build_ww_mutex() && ww_ctx) { 1521add46132SPeter Zijlstra __ww_mutex_check_waiters(rtm, ww_ctx); 1522add46132SPeter Zijlstra ww_mutex_lock_acquired(ww, ww_ctx); 1523add46132SPeter Zijlstra } 1524ebbdc41eSThomas Gleixner return 0; 1525add46132SPeter Zijlstra } 1526ebbdc41eSThomas Gleixner 1527ebbdc41eSThomas Gleixner set_current_state(state); 1528ebbdc41eSThomas Gleixner 1529add46132SPeter Zijlstra ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1530ebbdc41eSThomas Gleixner if (likely(!ret)) 1531add46132SPeter Zijlstra ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1532ebbdc41eSThomas Gleixner 1533add46132SPeter Zijlstra if (likely(!ret)) { 1534add46132SPeter Zijlstra /* acquired the lock */ 1535add46132SPeter Zijlstra if (build_ww_mutex() && ww_ctx) { 1536add46132SPeter Zijlstra if (!ww_ctx->is_wait_die) 1537add46132SPeter Zijlstra __ww_mutex_check_waiters(rtm, ww_ctx); 1538add46132SPeter Zijlstra ww_mutex_lock_acquired(ww, ww_ctx); 1539add46132SPeter Zijlstra } 1540add46132SPeter Zijlstra } else { 1541ebbdc41eSThomas Gleixner __set_current_state(TASK_RUNNING); 1542ebbdc41eSThomas Gleixner remove_waiter(lock, waiter); 1543ebbdc41eSThomas Gleixner rt_mutex_handle_deadlock(ret, chwalk, waiter); 1544ebbdc41eSThomas Gleixner } 1545ebbdc41eSThomas Gleixner 1546ebbdc41eSThomas Gleixner /* 1547ebbdc41eSThomas Gleixner * try_to_take_rt_mutex() sets the waiter bit 1548ebbdc41eSThomas Gleixner * unconditionally. We might have to fix that up. 1549ebbdc41eSThomas Gleixner */ 1550ebbdc41eSThomas Gleixner fixup_rt_mutex_waiters(lock); 1551ebbdc41eSThomas Gleixner return ret; 1552ebbdc41eSThomas Gleixner } 1553ebbdc41eSThomas Gleixner 1554ebbdc41eSThomas Gleixner static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1555add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx, 1556ebbdc41eSThomas Gleixner unsigned int state) 15571696a8beSPeter Zijlstra { 15581696a8beSPeter Zijlstra struct rt_mutex_waiter waiter; 1559ebbdc41eSThomas Gleixner int ret; 15601696a8beSPeter Zijlstra 156150809358SPeter Zijlstra rt_mutex_init_waiter(&waiter); 1562add46132SPeter Zijlstra waiter.ww_ctx = ww_ctx; 15631696a8beSPeter Zijlstra 1564add46132SPeter Zijlstra ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1565add46132SPeter Zijlstra &waiter); 1566ebbdc41eSThomas Gleixner 1567ebbdc41eSThomas Gleixner debug_rt_mutex_free_waiter(&waiter); 1568ebbdc41eSThomas Gleixner return ret; 1569ebbdc41eSThomas Gleixner } 1570ebbdc41eSThomas Gleixner 1571ebbdc41eSThomas Gleixner /* 1572ebbdc41eSThomas Gleixner * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1573ebbdc41eSThomas Gleixner * @lock: The rtmutex to block lock 1574add46132SPeter Zijlstra * @ww_ctx: WW mutex context pointer 1575ebbdc41eSThomas Gleixner * @state: The task state for sleeping 1576ebbdc41eSThomas Gleixner */ 1577ebbdc41eSThomas Gleixner static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1578add46132SPeter Zijlstra struct ww_acquire_ctx *ww_ctx, 1579ebbdc41eSThomas Gleixner unsigned int state) 1580ebbdc41eSThomas Gleixner { 1581ebbdc41eSThomas Gleixner unsigned long flags; 1582ebbdc41eSThomas Gleixner int ret; 1583ebbdc41eSThomas Gleixner 1584b4abf910SThomas Gleixner /* 1585b4abf910SThomas Gleixner * Technically we could use raw_spin_[un]lock_irq() here, but this can 1586b4abf910SThomas Gleixner * be called in early boot if the cmpxchg() fast path is disabled 1587b4abf910SThomas Gleixner * (debug, no architecture support). In this case we will acquire the 1588b4abf910SThomas Gleixner * rtmutex with lock->wait_lock held. But we cannot unconditionally 1589b4abf910SThomas Gleixner * enable interrupts in that early boot case. So we need to use the 1590b4abf910SThomas Gleixner * irqsave/restore variants. 1591b4abf910SThomas Gleixner */ 1592b4abf910SThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 1593add46132SPeter Zijlstra ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1594b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 15951696a8beSPeter Zijlstra 15961696a8beSPeter Zijlstra return ret; 15971696a8beSPeter Zijlstra } 15981696a8beSPeter Zijlstra 1599830e6accSPeter Zijlstra static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1600531ae4b0SThomas Gleixner unsigned int state) 1601531ae4b0SThomas Gleixner { 1602531ae4b0SThomas Gleixner if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1603531ae4b0SThomas Gleixner return 0; 1604531ae4b0SThomas Gleixner 1605add46132SPeter Zijlstra return rt_mutex_slowlock(lock, NULL, state); 1606531ae4b0SThomas Gleixner } 1607e17ba59bSThomas Gleixner #endif /* RT_MUTEX_BUILD_MUTEX */ 16081c143c4bSThomas Gleixner 16091c143c4bSThomas Gleixner #ifdef RT_MUTEX_BUILD_SPINLOCKS 16101c143c4bSThomas Gleixner /* 16111c143c4bSThomas Gleixner * Functions required for spin/rw_lock substitution on RT kernels 16121c143c4bSThomas Gleixner */ 16131c143c4bSThomas Gleixner 16141c143c4bSThomas Gleixner /** 16151c143c4bSThomas Gleixner * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 16161c143c4bSThomas Gleixner * @lock: The underlying RT mutex 16171c143c4bSThomas Gleixner */ 16181c143c4bSThomas Gleixner static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 16191c143c4bSThomas Gleixner { 16201c143c4bSThomas Gleixner struct rt_mutex_waiter waiter; 1621*992caf7fSSteven Rostedt struct task_struct *owner; 16221c143c4bSThomas Gleixner 16231c143c4bSThomas Gleixner lockdep_assert_held(&lock->wait_lock); 16241c143c4bSThomas Gleixner 16251c143c4bSThomas Gleixner if (try_to_take_rt_mutex(lock, current, NULL)) 16261c143c4bSThomas Gleixner return; 16271c143c4bSThomas Gleixner 16281c143c4bSThomas Gleixner rt_mutex_init_rtlock_waiter(&waiter); 16291c143c4bSThomas Gleixner 16301c143c4bSThomas Gleixner /* Save current state and set state to TASK_RTLOCK_WAIT */ 16311c143c4bSThomas Gleixner current_save_and_set_rtlock_wait_state(); 16321c143c4bSThomas Gleixner 1633add46132SPeter Zijlstra task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 16341c143c4bSThomas Gleixner 16351c143c4bSThomas Gleixner for (;;) { 16361c143c4bSThomas Gleixner /* Try to acquire the lock again */ 16371c143c4bSThomas Gleixner if (try_to_take_rt_mutex(lock, current, &waiter)) 16381c143c4bSThomas Gleixner break; 16391c143c4bSThomas Gleixner 1640*992caf7fSSteven Rostedt if (&waiter == rt_mutex_top_waiter(lock)) 1641*992caf7fSSteven Rostedt owner = rt_mutex_owner(lock); 1642*992caf7fSSteven Rostedt else 1643*992caf7fSSteven Rostedt owner = NULL; 16441c143c4bSThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 16451c143c4bSThomas Gleixner 1646*992caf7fSSteven Rostedt if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 16471c143c4bSThomas Gleixner schedule_rtlock(); 16481c143c4bSThomas Gleixner 16491c143c4bSThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 16501c143c4bSThomas Gleixner set_current_state(TASK_RTLOCK_WAIT); 16511c143c4bSThomas Gleixner } 16521c143c4bSThomas Gleixner 16531c143c4bSThomas Gleixner /* Restore the task state */ 16541c143c4bSThomas Gleixner current_restore_rtlock_saved_state(); 16551c143c4bSThomas Gleixner 16561c143c4bSThomas Gleixner /* 16571c143c4bSThomas Gleixner * try_to_take_rt_mutex() sets the waiter bit unconditionally. 16581c143c4bSThomas Gleixner * We might have to fix that up: 16591c143c4bSThomas Gleixner */ 16601c143c4bSThomas Gleixner fixup_rt_mutex_waiters(lock); 16611c143c4bSThomas Gleixner debug_rt_mutex_free_waiter(&waiter); 16621c143c4bSThomas Gleixner } 16631c143c4bSThomas Gleixner 16641c143c4bSThomas Gleixner static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 16651c143c4bSThomas Gleixner { 16661c143c4bSThomas Gleixner unsigned long flags; 16671c143c4bSThomas Gleixner 16681c143c4bSThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 16691c143c4bSThomas Gleixner rtlock_slowlock_locked(lock); 16701c143c4bSThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 16711c143c4bSThomas Gleixner } 16721c143c4bSThomas Gleixner 16731c143c4bSThomas Gleixner #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1674