1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 21696a8beSPeter Zijlstra /* 31696a8beSPeter Zijlstra * RT-Mutexes: simple blocking mutual exclusion locks with PI support 41696a8beSPeter Zijlstra * 51696a8beSPeter Zijlstra * started by Ingo Molnar and Thomas Gleixner. 61696a8beSPeter Zijlstra * 71696a8beSPeter Zijlstra * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 81696a8beSPeter Zijlstra * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 91696a8beSPeter Zijlstra * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 101696a8beSPeter Zijlstra * Copyright (C) 2006 Esben Nielsen 111696a8beSPeter Zijlstra * 12387b1468SMauro Carvalho Chehab * See Documentation/locking/rt-mutex-design.rst for details. 131696a8beSPeter Zijlstra */ 14*531ae4b0SThomas Gleixner #include <linux/sched.h> 15*531ae4b0SThomas Gleixner #include <linux/sched/debug.h> 16*531ae4b0SThomas Gleixner #include <linux/sched/deadline.h> 17174cd4b1SIngo Molnar #include <linux/sched/signal.h> 181696a8beSPeter Zijlstra #include <linux/sched/rt.h> 1984f001e1SIngo Molnar #include <linux/sched/wake_q.h> 201696a8beSPeter Zijlstra 211696a8beSPeter Zijlstra #include "rtmutex_common.h" 221696a8beSPeter Zijlstra 231696a8beSPeter Zijlstra /* 241696a8beSPeter Zijlstra * lock->owner state tracking: 251696a8beSPeter Zijlstra * 261696a8beSPeter Zijlstra * lock->owner holds the task_struct pointer of the owner. Bit 0 271696a8beSPeter Zijlstra * is used to keep track of the "lock has waiters" state. 281696a8beSPeter Zijlstra * 291696a8beSPeter Zijlstra * owner bit0 301696a8beSPeter Zijlstra * NULL 0 lock is free (fast acquire possible) 311696a8beSPeter Zijlstra * NULL 1 lock is free and has waiters and the top waiter 321696a8beSPeter Zijlstra * is going to take the lock* 331696a8beSPeter Zijlstra * taskpointer 0 lock is held (fast release possible) 341696a8beSPeter Zijlstra * taskpointer 1 lock is held and has waiters** 351696a8beSPeter Zijlstra * 361696a8beSPeter Zijlstra * The fast atomic compare exchange based acquire and release is only 371696a8beSPeter Zijlstra * possible when bit 0 of lock->owner is 0. 381696a8beSPeter Zijlstra * 391696a8beSPeter Zijlstra * (*) It also can be a transitional state when grabbing the lock 401696a8beSPeter Zijlstra * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 411696a8beSPeter Zijlstra * we need to set the bit0 before looking at the lock, and the owner may be 421696a8beSPeter Zijlstra * NULL in this small time, hence this can be a transitional state. 431696a8beSPeter Zijlstra * 441696a8beSPeter Zijlstra * (**) There is a small time when bit 0 is set but there are no 451696a8beSPeter Zijlstra * waiters. This can happen when grabbing the lock in the slow path. 461696a8beSPeter Zijlstra * To prevent a cmpxchg of the owner releasing the lock, we need to 471696a8beSPeter Zijlstra * set this bit before looking at the lock. 481696a8beSPeter Zijlstra */ 491696a8beSPeter Zijlstra 50d7a2edb8SThomas Gleixner static __always_inline void 511696a8beSPeter Zijlstra rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 521696a8beSPeter Zijlstra { 531696a8beSPeter Zijlstra unsigned long val = (unsigned long)owner; 541696a8beSPeter Zijlstra 551696a8beSPeter Zijlstra if (rt_mutex_has_waiters(lock)) 561696a8beSPeter Zijlstra val |= RT_MUTEX_HAS_WAITERS; 571696a8beSPeter Zijlstra 580050c7b2SPaul E. McKenney WRITE_ONCE(lock->owner, (struct task_struct *)val); 591696a8beSPeter Zijlstra } 601696a8beSPeter Zijlstra 61d7a2edb8SThomas Gleixner static __always_inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 621696a8beSPeter Zijlstra { 631696a8beSPeter Zijlstra lock->owner = (struct task_struct *) 641696a8beSPeter Zijlstra ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 651696a8beSPeter Zijlstra } 661696a8beSPeter Zijlstra 67d7a2edb8SThomas Gleixner static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex *lock) 681696a8beSPeter Zijlstra { 69dbb26055SThomas Gleixner unsigned long owner, *p = (unsigned long *) &lock->owner; 70dbb26055SThomas Gleixner 71dbb26055SThomas Gleixner if (rt_mutex_has_waiters(lock)) 72dbb26055SThomas Gleixner return; 73dbb26055SThomas Gleixner 74dbb26055SThomas Gleixner /* 75dbb26055SThomas Gleixner * The rbtree has no waiters enqueued, now make sure that the 76dbb26055SThomas Gleixner * lock->owner still has the waiters bit set, otherwise the 77dbb26055SThomas Gleixner * following can happen: 78dbb26055SThomas Gleixner * 79dbb26055SThomas Gleixner * CPU 0 CPU 1 CPU2 80dbb26055SThomas Gleixner * l->owner=T1 81dbb26055SThomas Gleixner * rt_mutex_lock(l) 82dbb26055SThomas Gleixner * lock(l->lock) 83dbb26055SThomas Gleixner * l->owner = T1 | HAS_WAITERS; 84dbb26055SThomas Gleixner * enqueue(T2) 85dbb26055SThomas Gleixner * boost() 86dbb26055SThomas Gleixner * unlock(l->lock) 87dbb26055SThomas Gleixner * block() 88dbb26055SThomas Gleixner * 89dbb26055SThomas Gleixner * rt_mutex_lock(l) 90dbb26055SThomas Gleixner * lock(l->lock) 91dbb26055SThomas Gleixner * l->owner = T1 | HAS_WAITERS; 92dbb26055SThomas Gleixner * enqueue(T3) 93dbb26055SThomas Gleixner * boost() 94dbb26055SThomas Gleixner * unlock(l->lock) 95dbb26055SThomas Gleixner * block() 96dbb26055SThomas Gleixner * signal(->T2) signal(->T3) 97dbb26055SThomas Gleixner * lock(l->lock) 98dbb26055SThomas Gleixner * dequeue(T2) 99dbb26055SThomas Gleixner * deboost() 100dbb26055SThomas Gleixner * unlock(l->lock) 101dbb26055SThomas Gleixner * lock(l->lock) 102dbb26055SThomas Gleixner * dequeue(T3) 103dbb26055SThomas Gleixner * ==> wait list is empty 104dbb26055SThomas Gleixner * deboost() 105dbb26055SThomas Gleixner * unlock(l->lock) 106dbb26055SThomas Gleixner * lock(l->lock) 107dbb26055SThomas Gleixner * fixup_rt_mutex_waiters() 108dbb26055SThomas Gleixner * if (wait_list_empty(l) { 109dbb26055SThomas Gleixner * l->owner = owner 110dbb26055SThomas Gleixner * owner = l->owner & ~HAS_WAITERS; 111dbb26055SThomas Gleixner * ==> l->owner = T1 112dbb26055SThomas Gleixner * } 113dbb26055SThomas Gleixner * lock(l->lock) 114dbb26055SThomas Gleixner * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 115dbb26055SThomas Gleixner * if (wait_list_empty(l) { 116dbb26055SThomas Gleixner * owner = l->owner & ~HAS_WAITERS; 117dbb26055SThomas Gleixner * cmpxchg(l->owner, T1, NULL) 118dbb26055SThomas Gleixner * ===> Success (l->owner = NULL) 119dbb26055SThomas Gleixner * 120dbb26055SThomas Gleixner * l->owner = owner 121dbb26055SThomas Gleixner * ==> l->owner = T1 122dbb26055SThomas Gleixner * } 123dbb26055SThomas Gleixner * 124dbb26055SThomas Gleixner * With the check for the waiter bit in place T3 on CPU2 will not 125dbb26055SThomas Gleixner * overwrite. All tasks fiddling with the waiters bit are 126dbb26055SThomas Gleixner * serialized by l->lock, so nothing else can modify the waiters 127dbb26055SThomas Gleixner * bit. If the bit is set then nothing can change l->owner either 128dbb26055SThomas Gleixner * so the simple RMW is safe. The cmpxchg() will simply fail if it 129dbb26055SThomas Gleixner * happens in the middle of the RMW because the waiters bit is 130dbb26055SThomas Gleixner * still set. 131dbb26055SThomas Gleixner */ 132dbb26055SThomas Gleixner owner = READ_ONCE(*p); 133dbb26055SThomas Gleixner if (owner & RT_MUTEX_HAS_WAITERS) 134dbb26055SThomas Gleixner WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 1351696a8beSPeter Zijlstra } 1361696a8beSPeter Zijlstra 1371696a8beSPeter Zijlstra /* 138cede8841SSebastian Andrzej Siewior * We can speed up the acquire/release, if there's no debugging state to be 139cede8841SSebastian Andrzej Siewior * set up. 1401696a8beSPeter Zijlstra */ 141cede8841SSebastian Andrzej Siewior #ifndef CONFIG_DEBUG_RT_MUTEXES 14278515930SSebastian Andrzej Siewior static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, 14378515930SSebastian Andrzej Siewior struct task_struct *old, 14478515930SSebastian Andrzej Siewior struct task_struct *new) 14578515930SSebastian Andrzej Siewior { 146709e0b62SThomas Gleixner return try_cmpxchg_acquire(&lock->owner, &old, new); 14778515930SSebastian Andrzej Siewior } 14878515930SSebastian Andrzej Siewior 14978515930SSebastian Andrzej Siewior static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, 15078515930SSebastian Andrzej Siewior struct task_struct *old, 15178515930SSebastian Andrzej Siewior struct task_struct *new) 15278515930SSebastian Andrzej Siewior { 153709e0b62SThomas Gleixner return try_cmpxchg_release(&lock->owner, &old, new); 15478515930SSebastian Andrzej Siewior } 155700318d1SDavidlohr Bueso 156700318d1SDavidlohr Bueso /* 157700318d1SDavidlohr Bueso * Callers must hold the ->wait_lock -- which is the whole purpose as we force 158700318d1SDavidlohr Bueso * all future threads that attempt to [Rmw] the lock to the slowpath. As such 159700318d1SDavidlohr Bueso * relaxed semantics suffice. 160700318d1SDavidlohr Bueso */ 161d7a2edb8SThomas Gleixner static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 1621696a8beSPeter Zijlstra { 1631696a8beSPeter Zijlstra unsigned long owner, *p = (unsigned long *) &lock->owner; 1641696a8beSPeter Zijlstra 1651696a8beSPeter Zijlstra do { 1661696a8beSPeter Zijlstra owner = *p; 167700318d1SDavidlohr Bueso } while (cmpxchg_relaxed(p, owner, 168700318d1SDavidlohr Bueso owner | RT_MUTEX_HAS_WAITERS) != owner); 1691696a8beSPeter Zijlstra } 17027e35715SThomas Gleixner 17127e35715SThomas Gleixner /* 17227e35715SThomas Gleixner * Safe fastpath aware unlock: 17327e35715SThomas Gleixner * 1) Clear the waiters bit 17427e35715SThomas Gleixner * 2) Drop lock->wait_lock 17527e35715SThomas Gleixner * 3) Try to unlock the lock with cmpxchg 17627e35715SThomas Gleixner */ 177d7a2edb8SThomas Gleixner static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 178b4abf910SThomas Gleixner unsigned long flags) 17927e35715SThomas Gleixner __releases(lock->wait_lock) 18027e35715SThomas Gleixner { 18127e35715SThomas Gleixner struct task_struct *owner = rt_mutex_owner(lock); 18227e35715SThomas Gleixner 18327e35715SThomas Gleixner clear_rt_mutex_waiters(lock); 184b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 18527e35715SThomas Gleixner /* 18627e35715SThomas Gleixner * If a new waiter comes in between the unlock and the cmpxchg 18727e35715SThomas Gleixner * we have two situations: 18827e35715SThomas Gleixner * 18927e35715SThomas Gleixner * unlock(wait_lock); 19027e35715SThomas Gleixner * lock(wait_lock); 19127e35715SThomas Gleixner * cmpxchg(p, owner, 0) == owner 19227e35715SThomas Gleixner * mark_rt_mutex_waiters(lock); 19327e35715SThomas Gleixner * acquire(lock); 19427e35715SThomas Gleixner * or: 19527e35715SThomas Gleixner * 19627e35715SThomas Gleixner * unlock(wait_lock); 19727e35715SThomas Gleixner * lock(wait_lock); 19827e35715SThomas Gleixner * mark_rt_mutex_waiters(lock); 19927e35715SThomas Gleixner * 20027e35715SThomas Gleixner * cmpxchg(p, owner, 0) != owner 20127e35715SThomas Gleixner * enqueue_waiter(); 20227e35715SThomas Gleixner * unlock(wait_lock); 20327e35715SThomas Gleixner * lock(wait_lock); 20427e35715SThomas Gleixner * wake waiter(); 20527e35715SThomas Gleixner * unlock(wait_lock); 20627e35715SThomas Gleixner * lock(wait_lock); 20727e35715SThomas Gleixner * acquire(lock); 20827e35715SThomas Gleixner */ 209700318d1SDavidlohr Bueso return rt_mutex_cmpxchg_release(lock, owner, NULL); 21027e35715SThomas Gleixner } 21127e35715SThomas Gleixner 2121696a8beSPeter Zijlstra #else 21378515930SSebastian Andrzej Siewior static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, 21478515930SSebastian Andrzej Siewior struct task_struct *old, 21578515930SSebastian Andrzej Siewior struct task_struct *new) 21678515930SSebastian Andrzej Siewior { 21778515930SSebastian Andrzej Siewior return false; 21878515930SSebastian Andrzej Siewior 21978515930SSebastian Andrzej Siewior } 22078515930SSebastian Andrzej Siewior 22178515930SSebastian Andrzej Siewior static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, 22278515930SSebastian Andrzej Siewior struct task_struct *old, 22378515930SSebastian Andrzej Siewior struct task_struct *new) 22478515930SSebastian Andrzej Siewior { 22578515930SSebastian Andrzej Siewior return false; 22678515930SSebastian Andrzej Siewior } 227700318d1SDavidlohr Bueso 228d7a2edb8SThomas Gleixner static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 2291696a8beSPeter Zijlstra { 2301696a8beSPeter Zijlstra lock->owner = (struct task_struct *) 2311696a8beSPeter Zijlstra ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 2321696a8beSPeter Zijlstra } 23327e35715SThomas Gleixner 23427e35715SThomas Gleixner /* 23527e35715SThomas Gleixner * Simple slow path only version: lock->owner is protected by lock->wait_lock. 23627e35715SThomas Gleixner */ 237d7a2edb8SThomas Gleixner static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 238b4abf910SThomas Gleixner unsigned long flags) 23927e35715SThomas Gleixner __releases(lock->wait_lock) 24027e35715SThomas Gleixner { 24127e35715SThomas Gleixner lock->owner = NULL; 242b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 24327e35715SThomas Gleixner return true; 24427e35715SThomas Gleixner } 2451696a8beSPeter Zijlstra #endif 2461696a8beSPeter Zijlstra 24719830e55SPeter Zijlstra /* 24819830e55SPeter Zijlstra * Only use with rt_mutex_waiter_{less,equal}() 24919830e55SPeter Zijlstra */ 25019830e55SPeter Zijlstra #define task_to_waiter(p) \ 25119830e55SPeter Zijlstra &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } 25219830e55SPeter Zijlstra 253d7a2edb8SThomas Gleixner static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 254fb00aca4SPeter Zijlstra struct rt_mutex_waiter *right) 255fb00aca4SPeter Zijlstra { 2562d3d891dSDario Faggioli if (left->prio < right->prio) 257fb00aca4SPeter Zijlstra return 1; 258fb00aca4SPeter Zijlstra 2591696a8beSPeter Zijlstra /* 2602d3d891dSDario Faggioli * If both waiters have dl_prio(), we check the deadlines of the 2612d3d891dSDario Faggioli * associated tasks. 2622d3d891dSDario Faggioli * If left waiter has a dl_prio(), and we didn't return 1 above, 2632d3d891dSDario Faggioli * then right waiter has a dl_prio() too. 264fb00aca4SPeter Zijlstra */ 2652d3d891dSDario Faggioli if (dl_prio(left->prio)) 266e0aad5b4SPeter Zijlstra return dl_time_before(left->deadline, right->deadline); 267fb00aca4SPeter Zijlstra 268fb00aca4SPeter Zijlstra return 0; 269fb00aca4SPeter Zijlstra } 270fb00aca4SPeter Zijlstra 271d7a2edb8SThomas Gleixner static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 27219830e55SPeter Zijlstra struct rt_mutex_waiter *right) 27319830e55SPeter Zijlstra { 27419830e55SPeter Zijlstra if (left->prio != right->prio) 27519830e55SPeter Zijlstra return 0; 27619830e55SPeter Zijlstra 27719830e55SPeter Zijlstra /* 27819830e55SPeter Zijlstra * If both waiters have dl_prio(), we check the deadlines of the 27919830e55SPeter Zijlstra * associated tasks. 28019830e55SPeter Zijlstra * If left waiter has a dl_prio(), and we didn't return 0 above, 28119830e55SPeter Zijlstra * then right waiter has a dl_prio() too. 28219830e55SPeter Zijlstra */ 28319830e55SPeter Zijlstra if (dl_prio(left->prio)) 28419830e55SPeter Zijlstra return left->deadline == right->deadline; 28519830e55SPeter Zijlstra 28619830e55SPeter Zijlstra return 1; 28719830e55SPeter Zijlstra } 28819830e55SPeter Zijlstra 2895a798725SPeter Zijlstra #define __node_2_waiter(node) \ 2905a798725SPeter Zijlstra rb_entry((node), struct rt_mutex_waiter, tree_entry) 2915a798725SPeter Zijlstra 292d7a2edb8SThomas Gleixner static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 2935a798725SPeter Zijlstra { 2945a798725SPeter Zijlstra return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b)); 2955a798725SPeter Zijlstra } 2965a798725SPeter Zijlstra 297d7a2edb8SThomas Gleixner static __always_inline void 298fb00aca4SPeter Zijlstra rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 299fb00aca4SPeter Zijlstra { 3005a798725SPeter Zijlstra rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 301fb00aca4SPeter Zijlstra } 302fb00aca4SPeter Zijlstra 303d7a2edb8SThomas Gleixner static __always_inline void 304fb00aca4SPeter Zijlstra rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 305fb00aca4SPeter Zijlstra { 306fb00aca4SPeter Zijlstra if (RB_EMPTY_NODE(&waiter->tree_entry)) 307fb00aca4SPeter Zijlstra return; 308fb00aca4SPeter Zijlstra 309a23ba907SDavidlohr Bueso rb_erase_cached(&waiter->tree_entry, &lock->waiters); 310fb00aca4SPeter Zijlstra RB_CLEAR_NODE(&waiter->tree_entry); 311fb00aca4SPeter Zijlstra } 312fb00aca4SPeter Zijlstra 3135a798725SPeter Zijlstra #define __node_2_pi_waiter(node) \ 3145a798725SPeter Zijlstra rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 3155a798725SPeter Zijlstra 316d7a2edb8SThomas Gleixner static __always_inline bool 317d7a2edb8SThomas Gleixner __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 3185a798725SPeter Zijlstra { 3195a798725SPeter Zijlstra return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 3205a798725SPeter Zijlstra } 3215a798725SPeter Zijlstra 322d7a2edb8SThomas Gleixner static __always_inline void 323fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 324fb00aca4SPeter Zijlstra { 3255a798725SPeter Zijlstra rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 326fb00aca4SPeter Zijlstra } 327fb00aca4SPeter Zijlstra 328d7a2edb8SThomas Gleixner static __always_inline void 329fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 330fb00aca4SPeter Zijlstra { 331fb00aca4SPeter Zijlstra if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 332fb00aca4SPeter Zijlstra return; 333fb00aca4SPeter Zijlstra 334a23ba907SDavidlohr Bueso rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 335fb00aca4SPeter Zijlstra RB_CLEAR_NODE(&waiter->pi_tree_entry); 336fb00aca4SPeter Zijlstra } 337fb00aca4SPeter Zijlstra 338d7a2edb8SThomas Gleixner static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 339e96a7705SXunlei Pang { 340acd58620SPeter Zijlstra struct task_struct *pi_task = NULL; 341e96a7705SXunlei Pang 342acd58620SPeter Zijlstra lockdep_assert_held(&p->pi_lock); 343e96a7705SXunlei Pang 344acd58620SPeter Zijlstra if (task_has_pi_waiters(p)) 345acd58620SPeter Zijlstra pi_task = task_top_pi_waiter(p)->task; 3461696a8beSPeter Zijlstra 347acd58620SPeter Zijlstra rt_mutex_setprio(p, pi_task); 3481696a8beSPeter Zijlstra } 3491696a8beSPeter Zijlstra 3501696a8beSPeter Zijlstra /* 3518930ed80SThomas Gleixner * Deadlock detection is conditional: 3528930ed80SThomas Gleixner * 3538930ed80SThomas Gleixner * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 3548930ed80SThomas Gleixner * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 3558930ed80SThomas Gleixner * 3568930ed80SThomas Gleixner * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 3578930ed80SThomas Gleixner * conducted independent of the detect argument. 3588930ed80SThomas Gleixner * 3598930ed80SThomas Gleixner * If the waiter argument is NULL this indicates the deboost path and 3608930ed80SThomas Gleixner * deadlock detection is disabled independent of the detect argument 3618930ed80SThomas Gleixner * and the config settings. 3628930ed80SThomas Gleixner */ 363d7a2edb8SThomas Gleixner static __always_inline bool 364d7a2edb8SThomas Gleixner rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 3658930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk) 3668930ed80SThomas Gleixner { 36707d25971SZhen Lei if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 368f7efc479SThomas Gleixner return waiter != NULL; 369f7efc479SThomas Gleixner return chwalk == RT_MUTEX_FULL_CHAINWALK; 3708930ed80SThomas Gleixner } 3718930ed80SThomas Gleixner 372d7a2edb8SThomas Gleixner static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 37382084984SThomas Gleixner { 37482084984SThomas Gleixner return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 37582084984SThomas Gleixner } 37682084984SThomas Gleixner 3771696a8beSPeter Zijlstra /* 3781696a8beSPeter Zijlstra * Adjust the priority chain. Also used for deadlock detection. 3791696a8beSPeter Zijlstra * Decreases task's usage by one - may thus free the task. 3801696a8beSPeter Zijlstra * 38182084984SThomas Gleixner * @task: the task owning the mutex (owner) for which a chain walk is 38282084984SThomas Gleixner * probably needed 383e6beaa36STom(JeHyeon) Yeon * @chwalk: do we have to carry out deadlock detection? 3841696a8beSPeter Zijlstra * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 3851696a8beSPeter Zijlstra * things for a task that has just got its priority adjusted, and 3861696a8beSPeter Zijlstra * is waiting on a mutex) 38782084984SThomas Gleixner * @next_lock: the mutex on which the owner of @orig_lock was blocked before 38882084984SThomas Gleixner * we dropped its pi_lock. Is never dereferenced, only used for 38982084984SThomas Gleixner * comparison to detect lock chain changes. 3901696a8beSPeter Zijlstra * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 3911696a8beSPeter Zijlstra * its priority to the mutex owner (can be NULL in the case 3921696a8beSPeter Zijlstra * depicted above or if the top waiter is gone away and we are 3931696a8beSPeter Zijlstra * actually deboosting the owner) 3941696a8beSPeter Zijlstra * @top_task: the current top waiter 3951696a8beSPeter Zijlstra * 3961696a8beSPeter Zijlstra * Returns 0 or -EDEADLK. 3973eb65aeaSThomas Gleixner * 3983eb65aeaSThomas Gleixner * Chain walk basics and protection scope 3993eb65aeaSThomas Gleixner * 4003eb65aeaSThomas Gleixner * [R] refcount on task 4013eb65aeaSThomas Gleixner * [P] task->pi_lock held 4023eb65aeaSThomas Gleixner * [L] rtmutex->wait_lock held 4033eb65aeaSThomas Gleixner * 4043eb65aeaSThomas Gleixner * Step Description Protected by 4053eb65aeaSThomas Gleixner * function arguments: 4063eb65aeaSThomas Gleixner * @task [R] 4073eb65aeaSThomas Gleixner * @orig_lock if != NULL @top_task is blocked on it 4083eb65aeaSThomas Gleixner * @next_lock Unprotected. Cannot be 4093eb65aeaSThomas Gleixner * dereferenced. Only used for 4103eb65aeaSThomas Gleixner * comparison. 4113eb65aeaSThomas Gleixner * @orig_waiter if != NULL @top_task is blocked on it 4123eb65aeaSThomas Gleixner * @top_task current, or in case of proxy 4133eb65aeaSThomas Gleixner * locking protected by calling 4143eb65aeaSThomas Gleixner * code 4153eb65aeaSThomas Gleixner * again: 4163eb65aeaSThomas Gleixner * loop_sanity_check(); 4173eb65aeaSThomas Gleixner * retry: 4183eb65aeaSThomas Gleixner * [1] lock(task->pi_lock); [R] acquire [P] 4193eb65aeaSThomas Gleixner * [2] waiter = task->pi_blocked_on; [P] 4203eb65aeaSThomas Gleixner * [3] check_exit_conditions_1(); [P] 4213eb65aeaSThomas Gleixner * [4] lock = waiter->lock; [P] 4223eb65aeaSThomas Gleixner * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 4233eb65aeaSThomas Gleixner * unlock(task->pi_lock); release [P] 4243eb65aeaSThomas Gleixner * goto retry; 4253eb65aeaSThomas Gleixner * } 4263eb65aeaSThomas Gleixner * [6] check_exit_conditions_2(); [P] + [L] 4273eb65aeaSThomas Gleixner * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 4283eb65aeaSThomas Gleixner * [8] unlock(task->pi_lock); release [P] 4293eb65aeaSThomas Gleixner * put_task_struct(task); release [R] 4303eb65aeaSThomas Gleixner * [9] check_exit_conditions_3(); [L] 4313eb65aeaSThomas Gleixner * [10] task = owner(lock); [L] 4323eb65aeaSThomas Gleixner * get_task_struct(task); [L] acquire [R] 4333eb65aeaSThomas Gleixner * lock(task->pi_lock); [L] acquire [P] 4343eb65aeaSThomas Gleixner * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 4353eb65aeaSThomas Gleixner * [12] check_exit_conditions_4(); [P] + [L] 4363eb65aeaSThomas Gleixner * [13] unlock(task->pi_lock); release [P] 4373eb65aeaSThomas Gleixner * unlock(lock->wait_lock); release [L] 4383eb65aeaSThomas Gleixner * goto again; 4391696a8beSPeter Zijlstra */ 440d7a2edb8SThomas Gleixner static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 4418930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk, 4421696a8beSPeter Zijlstra struct rt_mutex *orig_lock, 44382084984SThomas Gleixner struct rt_mutex *next_lock, 4441696a8beSPeter Zijlstra struct rt_mutex_waiter *orig_waiter, 4451696a8beSPeter Zijlstra struct task_struct *top_task) 4461696a8beSPeter Zijlstra { 4471696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 448a57594a1SThomas Gleixner struct rt_mutex_waiter *prerequeue_top_waiter; 4498930ed80SThomas Gleixner int ret = 0, depth = 0; 450a57594a1SThomas Gleixner struct rt_mutex *lock; 4518930ed80SThomas Gleixner bool detect_deadlock; 45267792e2cSThomas Gleixner bool requeue = true; 4531696a8beSPeter Zijlstra 4548930ed80SThomas Gleixner detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 4551696a8beSPeter Zijlstra 4561696a8beSPeter Zijlstra /* 4571696a8beSPeter Zijlstra * The (de)boosting is a step by step approach with a lot of 4581696a8beSPeter Zijlstra * pitfalls. We want this to be preemptible and we want hold a 4591696a8beSPeter Zijlstra * maximum of two locks per step. So we have to check 4601696a8beSPeter Zijlstra * carefully whether things change under us. 4611696a8beSPeter Zijlstra */ 4621696a8beSPeter Zijlstra again: 4633eb65aeaSThomas Gleixner /* 4643eb65aeaSThomas Gleixner * We limit the lock chain length for each invocation. 4653eb65aeaSThomas Gleixner */ 4661696a8beSPeter Zijlstra if (++depth > max_lock_depth) { 4671696a8beSPeter Zijlstra static int prev_max; 4681696a8beSPeter Zijlstra 4691696a8beSPeter Zijlstra /* 4701696a8beSPeter Zijlstra * Print this only once. If the admin changes the limit, 4711696a8beSPeter Zijlstra * print a new message when reaching the limit again. 4721696a8beSPeter Zijlstra */ 4731696a8beSPeter Zijlstra if (prev_max != max_lock_depth) { 4741696a8beSPeter Zijlstra prev_max = max_lock_depth; 4751696a8beSPeter Zijlstra printk(KERN_WARNING "Maximum lock depth %d reached " 4761696a8beSPeter Zijlstra "task: %s (%d)\n", max_lock_depth, 4771696a8beSPeter Zijlstra top_task->comm, task_pid_nr(top_task)); 4781696a8beSPeter Zijlstra } 4791696a8beSPeter Zijlstra put_task_struct(task); 4801696a8beSPeter Zijlstra 4813d5c9340SThomas Gleixner return -EDEADLK; 4821696a8beSPeter Zijlstra } 4833eb65aeaSThomas Gleixner 4843eb65aeaSThomas Gleixner /* 4853eb65aeaSThomas Gleixner * We are fully preemptible here and only hold the refcount on 4863eb65aeaSThomas Gleixner * @task. So everything can have changed under us since the 4873eb65aeaSThomas Gleixner * caller or our own code below (goto retry/again) dropped all 4883eb65aeaSThomas Gleixner * locks. 4893eb65aeaSThomas Gleixner */ 4901696a8beSPeter Zijlstra retry: 4911696a8beSPeter Zijlstra /* 4923eb65aeaSThomas Gleixner * [1] Task cannot go away as we did a get_task() before ! 4931696a8beSPeter Zijlstra */ 494b4abf910SThomas Gleixner raw_spin_lock_irq(&task->pi_lock); 4951696a8beSPeter Zijlstra 4963eb65aeaSThomas Gleixner /* 4973eb65aeaSThomas Gleixner * [2] Get the waiter on which @task is blocked on. 4983eb65aeaSThomas Gleixner */ 4991696a8beSPeter Zijlstra waiter = task->pi_blocked_on; 5003eb65aeaSThomas Gleixner 5013eb65aeaSThomas Gleixner /* 5023eb65aeaSThomas Gleixner * [3] check_exit_conditions_1() protected by task->pi_lock. 5033eb65aeaSThomas Gleixner */ 5043eb65aeaSThomas Gleixner 5051696a8beSPeter Zijlstra /* 5061696a8beSPeter Zijlstra * Check whether the end of the boosting chain has been 5071696a8beSPeter Zijlstra * reached or the state of the chain has changed while we 5081696a8beSPeter Zijlstra * dropped the locks. 5091696a8beSPeter Zijlstra */ 5101696a8beSPeter Zijlstra if (!waiter) 5111696a8beSPeter Zijlstra goto out_unlock_pi; 5121696a8beSPeter Zijlstra 5131696a8beSPeter Zijlstra /* 5141696a8beSPeter Zijlstra * Check the orig_waiter state. After we dropped the locks, 5151696a8beSPeter Zijlstra * the previous owner of the lock might have released the lock. 5161696a8beSPeter Zijlstra */ 5171696a8beSPeter Zijlstra if (orig_waiter && !rt_mutex_owner(orig_lock)) 5181696a8beSPeter Zijlstra goto out_unlock_pi; 5191696a8beSPeter Zijlstra 5201696a8beSPeter Zijlstra /* 52182084984SThomas Gleixner * We dropped all locks after taking a refcount on @task, so 52282084984SThomas Gleixner * the task might have moved on in the lock chain or even left 52382084984SThomas Gleixner * the chain completely and blocks now on an unrelated lock or 52482084984SThomas Gleixner * on @orig_lock. 52582084984SThomas Gleixner * 52682084984SThomas Gleixner * We stored the lock on which @task was blocked in @next_lock, 52782084984SThomas Gleixner * so we can detect the chain change. 52882084984SThomas Gleixner */ 52982084984SThomas Gleixner if (next_lock != waiter->lock) 53082084984SThomas Gleixner goto out_unlock_pi; 53182084984SThomas Gleixner 53282084984SThomas Gleixner /* 5331696a8beSPeter Zijlstra * Drop out, when the task has no waiters. Note, 5341696a8beSPeter Zijlstra * top_waiter can be NULL, when we are in the deboosting 5351696a8beSPeter Zijlstra * mode! 5361696a8beSPeter Zijlstra */ 537397335f0SThomas Gleixner if (top_waiter) { 538397335f0SThomas Gleixner if (!task_has_pi_waiters(task)) 5391696a8beSPeter Zijlstra goto out_unlock_pi; 540397335f0SThomas Gleixner /* 541397335f0SThomas Gleixner * If deadlock detection is off, we stop here if we 54267792e2cSThomas Gleixner * are not the top pi waiter of the task. If deadlock 54367792e2cSThomas Gleixner * detection is enabled we continue, but stop the 54467792e2cSThomas Gleixner * requeueing in the chain walk. 545397335f0SThomas Gleixner */ 54667792e2cSThomas Gleixner if (top_waiter != task_top_pi_waiter(task)) { 54767792e2cSThomas Gleixner if (!detect_deadlock) 548397335f0SThomas Gleixner goto out_unlock_pi; 54967792e2cSThomas Gleixner else 55067792e2cSThomas Gleixner requeue = false; 55167792e2cSThomas Gleixner } 552397335f0SThomas Gleixner } 5531696a8beSPeter Zijlstra 5541696a8beSPeter Zijlstra /* 55567792e2cSThomas Gleixner * If the waiter priority is the same as the task priority 55667792e2cSThomas Gleixner * then there is no further priority adjustment necessary. If 55767792e2cSThomas Gleixner * deadlock detection is off, we stop the chain walk. If its 55867792e2cSThomas Gleixner * enabled we continue, but stop the requeueing in the chain 55967792e2cSThomas Gleixner * walk. 5601696a8beSPeter Zijlstra */ 56119830e55SPeter Zijlstra if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 56267792e2cSThomas Gleixner if (!detect_deadlock) 5631696a8beSPeter Zijlstra goto out_unlock_pi; 56467792e2cSThomas Gleixner else 56567792e2cSThomas Gleixner requeue = false; 56667792e2cSThomas Gleixner } 5671696a8beSPeter Zijlstra 5683eb65aeaSThomas Gleixner /* 5693eb65aeaSThomas Gleixner * [4] Get the next lock 5703eb65aeaSThomas Gleixner */ 5711696a8beSPeter Zijlstra lock = waiter->lock; 5723eb65aeaSThomas Gleixner /* 5733eb65aeaSThomas Gleixner * [5] We need to trylock here as we are holding task->pi_lock, 5743eb65aeaSThomas Gleixner * which is the reverse lock order versus the other rtmutex 5753eb65aeaSThomas Gleixner * operations. 5763eb65aeaSThomas Gleixner */ 5771696a8beSPeter Zijlstra if (!raw_spin_trylock(&lock->wait_lock)) { 578b4abf910SThomas Gleixner raw_spin_unlock_irq(&task->pi_lock); 5791696a8beSPeter Zijlstra cpu_relax(); 5801696a8beSPeter Zijlstra goto retry; 5811696a8beSPeter Zijlstra } 5821696a8beSPeter Zijlstra 583397335f0SThomas Gleixner /* 5843eb65aeaSThomas Gleixner * [6] check_exit_conditions_2() protected by task->pi_lock and 5853eb65aeaSThomas Gleixner * lock->wait_lock. 5863eb65aeaSThomas Gleixner * 587397335f0SThomas Gleixner * Deadlock detection. If the lock is the same as the original 588397335f0SThomas Gleixner * lock which caused us to walk the lock chain or if the 589397335f0SThomas Gleixner * current lock is owned by the task which initiated the chain 590397335f0SThomas Gleixner * walk, we detected a deadlock. 591397335f0SThomas Gleixner */ 5921696a8beSPeter Zijlstra if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 5931696a8beSPeter Zijlstra raw_spin_unlock(&lock->wait_lock); 5943d5c9340SThomas Gleixner ret = -EDEADLK; 5951696a8beSPeter Zijlstra goto out_unlock_pi; 5961696a8beSPeter Zijlstra } 5971696a8beSPeter Zijlstra 598a57594a1SThomas Gleixner /* 59967792e2cSThomas Gleixner * If we just follow the lock chain for deadlock detection, no 60067792e2cSThomas Gleixner * need to do all the requeue operations. To avoid a truckload 60167792e2cSThomas Gleixner * of conditionals around the various places below, just do the 60267792e2cSThomas Gleixner * minimum chain walk checks. 60367792e2cSThomas Gleixner */ 60467792e2cSThomas Gleixner if (!requeue) { 60567792e2cSThomas Gleixner /* 60667792e2cSThomas Gleixner * No requeue[7] here. Just release @task [8] 60767792e2cSThomas Gleixner */ 608b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 60967792e2cSThomas Gleixner put_task_struct(task); 61067792e2cSThomas Gleixner 61167792e2cSThomas Gleixner /* 61267792e2cSThomas Gleixner * [9] check_exit_conditions_3 protected by lock->wait_lock. 61367792e2cSThomas Gleixner * If there is no owner of the lock, end of chain. 61467792e2cSThomas Gleixner */ 61567792e2cSThomas Gleixner if (!rt_mutex_owner(lock)) { 616b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 61767792e2cSThomas Gleixner return 0; 61867792e2cSThomas Gleixner } 61967792e2cSThomas Gleixner 62067792e2cSThomas Gleixner /* [10] Grab the next task, i.e. owner of @lock */ 6217b3c92b8SMatthew Wilcox (Oracle) task = get_task_struct(rt_mutex_owner(lock)); 622b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 62367792e2cSThomas Gleixner 62467792e2cSThomas Gleixner /* 62567792e2cSThomas Gleixner * No requeue [11] here. We just do deadlock detection. 62667792e2cSThomas Gleixner * 62767792e2cSThomas Gleixner * [12] Store whether owner is blocked 62867792e2cSThomas Gleixner * itself. Decision is made after dropping the locks 62967792e2cSThomas Gleixner */ 63067792e2cSThomas Gleixner next_lock = task_blocked_on_lock(task); 63167792e2cSThomas Gleixner /* 63267792e2cSThomas Gleixner * Get the top waiter for the next iteration 63367792e2cSThomas Gleixner */ 63467792e2cSThomas Gleixner top_waiter = rt_mutex_top_waiter(lock); 63567792e2cSThomas Gleixner 63667792e2cSThomas Gleixner /* [13] Drop locks */ 637b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 638b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 63967792e2cSThomas Gleixner 64067792e2cSThomas Gleixner /* If owner is not blocked, end of chain. */ 64167792e2cSThomas Gleixner if (!next_lock) 64267792e2cSThomas Gleixner goto out_put_task; 64367792e2cSThomas Gleixner goto again; 64467792e2cSThomas Gleixner } 64567792e2cSThomas Gleixner 64667792e2cSThomas Gleixner /* 647a57594a1SThomas Gleixner * Store the current top waiter before doing the requeue 648a57594a1SThomas Gleixner * operation on @lock. We need it for the boost/deboost 649a57594a1SThomas Gleixner * decision below. 650a57594a1SThomas Gleixner */ 651a57594a1SThomas Gleixner prerequeue_top_waiter = rt_mutex_top_waiter(lock); 6521696a8beSPeter Zijlstra 6539f40a51aSDavidlohr Bueso /* [7] Requeue the waiter in the lock waiter tree. */ 654fb00aca4SPeter Zijlstra rt_mutex_dequeue(lock, waiter); 655e0aad5b4SPeter Zijlstra 656e0aad5b4SPeter Zijlstra /* 657e0aad5b4SPeter Zijlstra * Update the waiter prio fields now that we're dequeued. 658e0aad5b4SPeter Zijlstra * 659e0aad5b4SPeter Zijlstra * These values can have changed through either: 660e0aad5b4SPeter Zijlstra * 661e0aad5b4SPeter Zijlstra * sys_sched_set_scheduler() / sys_sched_setattr() 662e0aad5b4SPeter Zijlstra * 663e0aad5b4SPeter Zijlstra * or 664e0aad5b4SPeter Zijlstra * 665e0aad5b4SPeter Zijlstra * DL CBS enforcement advancing the effective deadline. 666e0aad5b4SPeter Zijlstra * 667e0aad5b4SPeter Zijlstra * Even though pi_waiters also uses these fields, and that tree is only 668e0aad5b4SPeter Zijlstra * updated in [11], we can do this here, since we hold [L], which 669e0aad5b4SPeter Zijlstra * serializes all pi_waiters access and rb_erase() does not care about 670e0aad5b4SPeter Zijlstra * the values of the node being removed. 671e0aad5b4SPeter Zijlstra */ 6722d3d891dSDario Faggioli waiter->prio = task->prio; 673e0aad5b4SPeter Zijlstra waiter->deadline = task->dl.deadline; 674e0aad5b4SPeter Zijlstra 675fb00aca4SPeter Zijlstra rt_mutex_enqueue(lock, waiter); 6761696a8beSPeter Zijlstra 6773eb65aeaSThomas Gleixner /* [8] Release the task */ 678b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 6792ffa5a5cSThomas Gleixner put_task_struct(task); 6802ffa5a5cSThomas Gleixner 681a57594a1SThomas Gleixner /* 6823eb65aeaSThomas Gleixner * [9] check_exit_conditions_3 protected by lock->wait_lock. 6833eb65aeaSThomas Gleixner * 684a57594a1SThomas Gleixner * We must abort the chain walk if there is no lock owner even 685a57594a1SThomas Gleixner * in the dead lock detection case, as we have nothing to 686a57594a1SThomas Gleixner * follow here. This is the end of the chain we are walking. 687a57594a1SThomas Gleixner */ 6881696a8beSPeter Zijlstra if (!rt_mutex_owner(lock)) { 6891696a8beSPeter Zijlstra /* 6903eb65aeaSThomas Gleixner * If the requeue [7] above changed the top waiter, 6913eb65aeaSThomas Gleixner * then we need to wake the new top waiter up to try 6923eb65aeaSThomas Gleixner * to get the lock. 6931696a8beSPeter Zijlstra */ 694a57594a1SThomas Gleixner if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 6951696a8beSPeter Zijlstra wake_up_process(rt_mutex_top_waiter(lock)->task); 696b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 6972ffa5a5cSThomas Gleixner return 0; 6981696a8beSPeter Zijlstra } 6991696a8beSPeter Zijlstra 7003eb65aeaSThomas Gleixner /* [10] Grab the next task, i.e. the owner of @lock */ 7017b3c92b8SMatthew Wilcox (Oracle) task = get_task_struct(rt_mutex_owner(lock)); 702b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 7031696a8beSPeter Zijlstra 7043eb65aeaSThomas Gleixner /* [11] requeue the pi waiters if necessary */ 7051696a8beSPeter Zijlstra if (waiter == rt_mutex_top_waiter(lock)) { 706a57594a1SThomas Gleixner /* 707a57594a1SThomas Gleixner * The waiter became the new top (highest priority) 708a57594a1SThomas Gleixner * waiter on the lock. Replace the previous top waiter 7099f40a51aSDavidlohr Bueso * in the owner tasks pi waiters tree with this waiter 710a57594a1SThomas Gleixner * and adjust the priority of the owner. 711a57594a1SThomas Gleixner */ 712a57594a1SThomas Gleixner rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 713fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(task, waiter); 714acd58620SPeter Zijlstra rt_mutex_adjust_prio(task); 7151696a8beSPeter Zijlstra 716a57594a1SThomas Gleixner } else if (prerequeue_top_waiter == waiter) { 717a57594a1SThomas Gleixner /* 718a57594a1SThomas Gleixner * The waiter was the top waiter on the lock, but is 719e2db7592SIngo Molnar * no longer the top priority waiter. Replace waiter in 7209f40a51aSDavidlohr Bueso * the owner tasks pi waiters tree with the new top 721a57594a1SThomas Gleixner * (highest priority) waiter and adjust the priority 722a57594a1SThomas Gleixner * of the owner. 723a57594a1SThomas Gleixner * The new top waiter is stored in @waiter so that 724a57594a1SThomas Gleixner * @waiter == @top_waiter evaluates to true below and 725a57594a1SThomas Gleixner * we continue to deboost the rest of the chain. 726a57594a1SThomas Gleixner */ 727fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(task, waiter); 7281696a8beSPeter Zijlstra waiter = rt_mutex_top_waiter(lock); 729fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(task, waiter); 730acd58620SPeter Zijlstra rt_mutex_adjust_prio(task); 731a57594a1SThomas Gleixner } else { 732a57594a1SThomas Gleixner /* 733a57594a1SThomas Gleixner * Nothing changed. No need to do any priority 734a57594a1SThomas Gleixner * adjustment. 735a57594a1SThomas Gleixner */ 7361696a8beSPeter Zijlstra } 7371696a8beSPeter Zijlstra 73882084984SThomas Gleixner /* 7393eb65aeaSThomas Gleixner * [12] check_exit_conditions_4() protected by task->pi_lock 7403eb65aeaSThomas Gleixner * and lock->wait_lock. The actual decisions are made after we 7413eb65aeaSThomas Gleixner * dropped the locks. 7423eb65aeaSThomas Gleixner * 74382084984SThomas Gleixner * Check whether the task which owns the current lock is pi 74482084984SThomas Gleixner * blocked itself. If yes we store a pointer to the lock for 74582084984SThomas Gleixner * the lock chain change detection above. After we dropped 74682084984SThomas Gleixner * task->pi_lock next_lock cannot be dereferenced anymore. 74782084984SThomas Gleixner */ 74882084984SThomas Gleixner next_lock = task_blocked_on_lock(task); 749a57594a1SThomas Gleixner /* 750a57594a1SThomas Gleixner * Store the top waiter of @lock for the end of chain walk 751a57594a1SThomas Gleixner * decision below. 752a57594a1SThomas Gleixner */ 7531696a8beSPeter Zijlstra top_waiter = rt_mutex_top_waiter(lock); 7543eb65aeaSThomas Gleixner 7553eb65aeaSThomas Gleixner /* [13] Drop the locks */ 756b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 757b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 7581696a8beSPeter Zijlstra 75982084984SThomas Gleixner /* 7603eb65aeaSThomas Gleixner * Make the actual exit decisions [12], based on the stored 7613eb65aeaSThomas Gleixner * values. 7623eb65aeaSThomas Gleixner * 76382084984SThomas Gleixner * We reached the end of the lock chain. Stop right here. No 76482084984SThomas Gleixner * point to go back just to figure that out. 76582084984SThomas Gleixner */ 76682084984SThomas Gleixner if (!next_lock) 76782084984SThomas Gleixner goto out_put_task; 76882084984SThomas Gleixner 769a57594a1SThomas Gleixner /* 770a57594a1SThomas Gleixner * If the current waiter is not the top waiter on the lock, 771a57594a1SThomas Gleixner * then we can stop the chain walk here if we are not in full 772a57594a1SThomas Gleixner * deadlock detection mode. 773a57594a1SThomas Gleixner */ 7741696a8beSPeter Zijlstra if (!detect_deadlock && waiter != top_waiter) 7751696a8beSPeter Zijlstra goto out_put_task; 7761696a8beSPeter Zijlstra 7771696a8beSPeter Zijlstra goto again; 7781696a8beSPeter Zijlstra 7791696a8beSPeter Zijlstra out_unlock_pi: 780b4abf910SThomas Gleixner raw_spin_unlock_irq(&task->pi_lock); 7811696a8beSPeter Zijlstra out_put_task: 7821696a8beSPeter Zijlstra put_task_struct(task); 7831696a8beSPeter Zijlstra 7841696a8beSPeter Zijlstra return ret; 7851696a8beSPeter Zijlstra } 7861696a8beSPeter Zijlstra 7871696a8beSPeter Zijlstra /* 7881696a8beSPeter Zijlstra * Try to take an rt-mutex 7891696a8beSPeter Zijlstra * 790b4abf910SThomas Gleixner * Must be called with lock->wait_lock held and interrupts disabled 7911696a8beSPeter Zijlstra * 792358c331fSThomas Gleixner * @lock: The lock to be acquired. 793358c331fSThomas Gleixner * @task: The task which wants to acquire the lock 7949f40a51aSDavidlohr Bueso * @waiter: The waiter that is queued to the lock's wait tree if the 795358c331fSThomas Gleixner * callsite called task_blocked_on_lock(), otherwise NULL 7961696a8beSPeter Zijlstra */ 797d7a2edb8SThomas Gleixner static int __sched 798d7a2edb8SThomas Gleixner try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 7991696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter) 8001696a8beSPeter Zijlstra { 801e0aad5b4SPeter Zijlstra lockdep_assert_held(&lock->wait_lock); 802e0aad5b4SPeter Zijlstra 8031696a8beSPeter Zijlstra /* 804358c331fSThomas Gleixner * Before testing whether we can acquire @lock, we set the 805358c331fSThomas Gleixner * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 806358c331fSThomas Gleixner * other tasks which try to modify @lock into the slow path 807358c331fSThomas Gleixner * and they serialize on @lock->wait_lock. 8081696a8beSPeter Zijlstra * 809358c331fSThomas Gleixner * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 810358c331fSThomas Gleixner * as explained at the top of this file if and only if: 8111696a8beSPeter Zijlstra * 812358c331fSThomas Gleixner * - There is a lock owner. The caller must fixup the 813358c331fSThomas Gleixner * transient state if it does a trylock or leaves the lock 814358c331fSThomas Gleixner * function due to a signal or timeout. 815358c331fSThomas Gleixner * 816358c331fSThomas Gleixner * - @task acquires the lock and there are no other 817358c331fSThomas Gleixner * waiters. This is undone in rt_mutex_set_owner(@task) at 818358c331fSThomas Gleixner * the end of this function. 8191696a8beSPeter Zijlstra */ 8201696a8beSPeter Zijlstra mark_rt_mutex_waiters(lock); 8211696a8beSPeter Zijlstra 822358c331fSThomas Gleixner /* 823358c331fSThomas Gleixner * If @lock has an owner, give up. 824358c331fSThomas Gleixner */ 8251696a8beSPeter Zijlstra if (rt_mutex_owner(lock)) 8261696a8beSPeter Zijlstra return 0; 8271696a8beSPeter Zijlstra 8281696a8beSPeter Zijlstra /* 829358c331fSThomas Gleixner * If @waiter != NULL, @task has already enqueued the waiter 8309f40a51aSDavidlohr Bueso * into @lock waiter tree. If @waiter == NULL then this is a 831358c331fSThomas Gleixner * trylock attempt. 832358c331fSThomas Gleixner */ 833358c331fSThomas Gleixner if (waiter) { 834358c331fSThomas Gleixner /* 835358c331fSThomas Gleixner * If waiter is not the highest priority waiter of 836358c331fSThomas Gleixner * @lock, give up. 837358c331fSThomas Gleixner */ 838358c331fSThomas Gleixner if (waiter != rt_mutex_top_waiter(lock)) 839358c331fSThomas Gleixner return 0; 840358c331fSThomas Gleixner 841358c331fSThomas Gleixner /* 842358c331fSThomas Gleixner * We can acquire the lock. Remove the waiter from the 8439f40a51aSDavidlohr Bueso * lock waiters tree. 844358c331fSThomas Gleixner */ 845358c331fSThomas Gleixner rt_mutex_dequeue(lock, waiter); 846358c331fSThomas Gleixner 847358c331fSThomas Gleixner } else { 848358c331fSThomas Gleixner /* 849358c331fSThomas Gleixner * If the lock has waiters already we check whether @task is 850358c331fSThomas Gleixner * eligible to take over the lock. 851358c331fSThomas Gleixner * 852358c331fSThomas Gleixner * If there are no other waiters, @task can acquire 853358c331fSThomas Gleixner * the lock. @task->pi_blocked_on is NULL, so it does 854358c331fSThomas Gleixner * not need to be dequeued. 8551696a8beSPeter Zijlstra */ 8561696a8beSPeter Zijlstra if (rt_mutex_has_waiters(lock)) { 857358c331fSThomas Gleixner /* 858358c331fSThomas Gleixner * If @task->prio is greater than or equal to 859358c331fSThomas Gleixner * the top waiter priority (kernel view), 860358c331fSThomas Gleixner * @task lost. 861358c331fSThomas Gleixner */ 86219830e55SPeter Zijlstra if (!rt_mutex_waiter_less(task_to_waiter(task), 86319830e55SPeter Zijlstra rt_mutex_top_waiter(lock))) 8641696a8beSPeter Zijlstra return 0; 865358c331fSThomas Gleixner 866358c331fSThomas Gleixner /* 867358c331fSThomas Gleixner * The current top waiter stays enqueued. We 868358c331fSThomas Gleixner * don't have to change anything in the lock 869358c331fSThomas Gleixner * waiters order. 870358c331fSThomas Gleixner */ 871358c331fSThomas Gleixner } else { 872358c331fSThomas Gleixner /* 873358c331fSThomas Gleixner * No waiters. Take the lock without the 874358c331fSThomas Gleixner * pi_lock dance.@task->pi_blocked_on is NULL 875358c331fSThomas Gleixner * and we have no waiters to enqueue in @task 8769f40a51aSDavidlohr Bueso * pi waiters tree. 877358c331fSThomas Gleixner */ 878358c331fSThomas Gleixner goto takeit; 8791696a8beSPeter Zijlstra } 8801696a8beSPeter Zijlstra } 8811696a8beSPeter Zijlstra 8821696a8beSPeter Zijlstra /* 883358c331fSThomas Gleixner * Clear @task->pi_blocked_on. Requires protection by 884358c331fSThomas Gleixner * @task->pi_lock. Redundant operation for the @waiter == NULL 885358c331fSThomas Gleixner * case, but conditionals are more expensive than a redundant 886358c331fSThomas Gleixner * store. 8871696a8beSPeter Zijlstra */ 888b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 889358c331fSThomas Gleixner task->pi_blocked_on = NULL; 890358c331fSThomas Gleixner /* 891358c331fSThomas Gleixner * Finish the lock acquisition. @task is the new owner. If 892358c331fSThomas Gleixner * other waiters exist we have to insert the highest priority 8939f40a51aSDavidlohr Bueso * waiter into @task->pi_waiters tree. 894358c331fSThomas Gleixner */ 895358c331fSThomas Gleixner if (rt_mutex_has_waiters(lock)) 896358c331fSThomas Gleixner rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 897b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 8981696a8beSPeter Zijlstra 899358c331fSThomas Gleixner takeit: 900358c331fSThomas Gleixner /* 901358c331fSThomas Gleixner * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 902358c331fSThomas Gleixner * are still waiters or clears it. 903358c331fSThomas Gleixner */ 9041696a8beSPeter Zijlstra rt_mutex_set_owner(lock, task); 9051696a8beSPeter Zijlstra 9061696a8beSPeter Zijlstra return 1; 9071696a8beSPeter Zijlstra } 9081696a8beSPeter Zijlstra 9091696a8beSPeter Zijlstra /* 9101696a8beSPeter Zijlstra * Task blocks on lock. 9111696a8beSPeter Zijlstra * 9121696a8beSPeter Zijlstra * Prepare waiter and propagate pi chain 9131696a8beSPeter Zijlstra * 914b4abf910SThomas Gleixner * This must be called with lock->wait_lock held and interrupts disabled 9151696a8beSPeter Zijlstra */ 916d7a2edb8SThomas Gleixner static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock, 9171696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter, 9181696a8beSPeter Zijlstra struct task_struct *task, 9198930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk) 9201696a8beSPeter Zijlstra { 9211696a8beSPeter Zijlstra struct task_struct *owner = rt_mutex_owner(lock); 9221696a8beSPeter Zijlstra struct rt_mutex_waiter *top_waiter = waiter; 92382084984SThomas Gleixner struct rt_mutex *next_lock; 9241696a8beSPeter Zijlstra int chain_walk = 0, res; 9251696a8beSPeter Zijlstra 926e0aad5b4SPeter Zijlstra lockdep_assert_held(&lock->wait_lock); 927e0aad5b4SPeter Zijlstra 928397335f0SThomas Gleixner /* 929397335f0SThomas Gleixner * Early deadlock detection. We really don't want the task to 930397335f0SThomas Gleixner * enqueue on itself just to untangle the mess later. It's not 931397335f0SThomas Gleixner * only an optimization. We drop the locks, so another waiter 932397335f0SThomas Gleixner * can come in before the chain walk detects the deadlock. So 933397335f0SThomas Gleixner * the other will detect the deadlock and return -EDEADLOCK, 934397335f0SThomas Gleixner * which is wrong, as the other waiter is not in a deadlock 935397335f0SThomas Gleixner * situation. 936397335f0SThomas Gleixner */ 9373d5c9340SThomas Gleixner if (owner == task) 938397335f0SThomas Gleixner return -EDEADLK; 939397335f0SThomas Gleixner 940b4abf910SThomas Gleixner raw_spin_lock(&task->pi_lock); 9411696a8beSPeter Zijlstra waiter->task = task; 9421696a8beSPeter Zijlstra waiter->lock = lock; 9432d3d891dSDario Faggioli waiter->prio = task->prio; 944e0aad5b4SPeter Zijlstra waiter->deadline = task->dl.deadline; 9451696a8beSPeter Zijlstra 9461696a8beSPeter Zijlstra /* Get the top priority waiter on the lock */ 9471696a8beSPeter Zijlstra if (rt_mutex_has_waiters(lock)) 9481696a8beSPeter Zijlstra top_waiter = rt_mutex_top_waiter(lock); 949fb00aca4SPeter Zijlstra rt_mutex_enqueue(lock, waiter); 9501696a8beSPeter Zijlstra 9511696a8beSPeter Zijlstra task->pi_blocked_on = waiter; 9521696a8beSPeter Zijlstra 953b4abf910SThomas Gleixner raw_spin_unlock(&task->pi_lock); 9541696a8beSPeter Zijlstra 9551696a8beSPeter Zijlstra if (!owner) 9561696a8beSPeter Zijlstra return 0; 9571696a8beSPeter Zijlstra 958b4abf910SThomas Gleixner raw_spin_lock(&owner->pi_lock); 95982084984SThomas Gleixner if (waiter == rt_mutex_top_waiter(lock)) { 960fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(owner, top_waiter); 961fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(owner, waiter); 9621696a8beSPeter Zijlstra 963acd58620SPeter Zijlstra rt_mutex_adjust_prio(owner); 9641696a8beSPeter Zijlstra if (owner->pi_blocked_on) 9651696a8beSPeter Zijlstra chain_walk = 1; 9668930ed80SThomas Gleixner } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 9671696a8beSPeter Zijlstra chain_walk = 1; 96882084984SThomas Gleixner } 9691696a8beSPeter Zijlstra 97082084984SThomas Gleixner /* Store the lock on which owner is blocked or NULL */ 97182084984SThomas Gleixner next_lock = task_blocked_on_lock(owner); 97282084984SThomas Gleixner 973b4abf910SThomas Gleixner raw_spin_unlock(&owner->pi_lock); 97482084984SThomas Gleixner /* 97582084984SThomas Gleixner * Even if full deadlock detection is on, if the owner is not 97682084984SThomas Gleixner * blocked itself, we can avoid finding this out in the chain 97782084984SThomas Gleixner * walk. 97882084984SThomas Gleixner */ 97982084984SThomas Gleixner if (!chain_walk || !next_lock) 9801696a8beSPeter Zijlstra return 0; 9811696a8beSPeter Zijlstra 9821696a8beSPeter Zijlstra /* 9831696a8beSPeter Zijlstra * The owner can't disappear while holding a lock, 9841696a8beSPeter Zijlstra * so the owner struct is protected by wait_lock. 9851696a8beSPeter Zijlstra * Gets dropped in rt_mutex_adjust_prio_chain()! 9861696a8beSPeter Zijlstra */ 9871696a8beSPeter Zijlstra get_task_struct(owner); 9881696a8beSPeter Zijlstra 989b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 9901696a8beSPeter Zijlstra 9918930ed80SThomas Gleixner res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 99282084984SThomas Gleixner next_lock, waiter, task); 9931696a8beSPeter Zijlstra 994b4abf910SThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 9951696a8beSPeter Zijlstra 9961696a8beSPeter Zijlstra return res; 9971696a8beSPeter Zijlstra } 9981696a8beSPeter Zijlstra 9991696a8beSPeter Zijlstra /* 10009f40a51aSDavidlohr Bueso * Remove the top waiter from the current tasks pi waiter tree and 100145ab4effSDavidlohr Bueso * queue it up. 10021696a8beSPeter Zijlstra * 1003b4abf910SThomas Gleixner * Called with lock->wait_lock held and interrupts disabled. 10041696a8beSPeter Zijlstra */ 1005d7a2edb8SThomas Gleixner static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q, 100645ab4effSDavidlohr Bueso struct rt_mutex *lock) 10071696a8beSPeter Zijlstra { 10081696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter; 10091696a8beSPeter Zijlstra 1010b4abf910SThomas Gleixner raw_spin_lock(¤t->pi_lock); 10111696a8beSPeter Zijlstra 10121696a8beSPeter Zijlstra waiter = rt_mutex_top_waiter(lock); 10131696a8beSPeter Zijlstra 10141696a8beSPeter Zijlstra /* 1015acd58620SPeter Zijlstra * Remove it from current->pi_waiters and deboost. 1016acd58620SPeter Zijlstra * 1017acd58620SPeter Zijlstra * We must in fact deboost here in order to ensure we call 1018acd58620SPeter Zijlstra * rt_mutex_setprio() to update p->pi_top_task before the 1019acd58620SPeter Zijlstra * task unblocks. 10201696a8beSPeter Zijlstra */ 1021fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(current, waiter); 1022acd58620SPeter Zijlstra rt_mutex_adjust_prio(current); 10231696a8beSPeter Zijlstra 102427e35715SThomas Gleixner /* 102527e35715SThomas Gleixner * As we are waking up the top waiter, and the waiter stays 102627e35715SThomas Gleixner * queued on the lock until it gets the lock, this lock 102727e35715SThomas Gleixner * obviously has waiters. Just set the bit here and this has 102827e35715SThomas Gleixner * the added benefit of forcing all new tasks into the 102927e35715SThomas Gleixner * slow path making sure no task of lower priority than 103027e35715SThomas Gleixner * the top waiter can steal this lock. 103127e35715SThomas Gleixner */ 103227e35715SThomas Gleixner lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 10331696a8beSPeter Zijlstra 1034acd58620SPeter Zijlstra /* 1035acd58620SPeter Zijlstra * We deboosted before waking the top waiter task such that we don't 1036acd58620SPeter Zijlstra * run two tasks with the 'same' priority (and ensure the 1037acd58620SPeter Zijlstra * p->pi_top_task pointer points to a blocked task). This however can 1038acd58620SPeter Zijlstra * lead to priority inversion if we would get preempted after the 1039acd58620SPeter Zijlstra * deboost but before waking our donor task, hence the preempt_disable() 1040acd58620SPeter Zijlstra * before unlock. 1041acd58620SPeter Zijlstra * 1042acd58620SPeter Zijlstra * Pairs with preempt_enable() in rt_mutex_postunlock(); 1043acd58620SPeter Zijlstra */ 1044acd58620SPeter Zijlstra preempt_disable(); 104545ab4effSDavidlohr Bueso wake_q_add(wake_q, waiter->task); 1046acd58620SPeter Zijlstra raw_spin_unlock(¤t->pi_lock); 10471696a8beSPeter Zijlstra } 10481696a8beSPeter Zijlstra 10491696a8beSPeter Zijlstra /* 10501696a8beSPeter Zijlstra * Remove a waiter from a lock and give up 10511696a8beSPeter Zijlstra * 1052b4abf910SThomas Gleixner * Must be called with lock->wait_lock held and interrupts disabled. I must 10531696a8beSPeter Zijlstra * have just failed to try_to_take_rt_mutex(). 10541696a8beSPeter Zijlstra */ 1055d7a2edb8SThomas Gleixner static void __sched remove_waiter(struct rt_mutex *lock, 10561696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter) 10571696a8beSPeter Zijlstra { 10581ca7b860SThomas Gleixner bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 10591696a8beSPeter Zijlstra struct task_struct *owner = rt_mutex_owner(lock); 10601ca7b860SThomas Gleixner struct rt_mutex *next_lock; 10611696a8beSPeter Zijlstra 1062e0aad5b4SPeter Zijlstra lockdep_assert_held(&lock->wait_lock); 1063e0aad5b4SPeter Zijlstra 1064b4abf910SThomas Gleixner raw_spin_lock(¤t->pi_lock); 1065fb00aca4SPeter Zijlstra rt_mutex_dequeue(lock, waiter); 10661696a8beSPeter Zijlstra current->pi_blocked_on = NULL; 1067b4abf910SThomas Gleixner raw_spin_unlock(¤t->pi_lock); 10681696a8beSPeter Zijlstra 10691ca7b860SThomas Gleixner /* 10701ca7b860SThomas Gleixner * Only update priority if the waiter was the highest priority 10711ca7b860SThomas Gleixner * waiter of the lock and there is an owner to update. 10721ca7b860SThomas Gleixner */ 10731ca7b860SThomas Gleixner if (!owner || !is_top_waiter) 10741696a8beSPeter Zijlstra return; 10751696a8beSPeter Zijlstra 1076b4abf910SThomas Gleixner raw_spin_lock(&owner->pi_lock); 10771696a8beSPeter Zijlstra 1078fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(owner, waiter); 10791696a8beSPeter Zijlstra 10801ca7b860SThomas Gleixner if (rt_mutex_has_waiters(lock)) 10811ca7b860SThomas Gleixner rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 10821696a8beSPeter Zijlstra 1083acd58620SPeter Zijlstra rt_mutex_adjust_prio(owner); 10841696a8beSPeter Zijlstra 108582084984SThomas Gleixner /* Store the lock on which owner is blocked or NULL */ 108682084984SThomas Gleixner next_lock = task_blocked_on_lock(owner); 10871696a8beSPeter Zijlstra 1088b4abf910SThomas Gleixner raw_spin_unlock(&owner->pi_lock); 10891696a8beSPeter Zijlstra 10901ca7b860SThomas Gleixner /* 10911ca7b860SThomas Gleixner * Don't walk the chain, if the owner task is not blocked 10921ca7b860SThomas Gleixner * itself. 10931ca7b860SThomas Gleixner */ 109482084984SThomas Gleixner if (!next_lock) 10951696a8beSPeter Zijlstra return; 10961696a8beSPeter Zijlstra 10971696a8beSPeter Zijlstra /* gets dropped in rt_mutex_adjust_prio_chain()! */ 10981696a8beSPeter Zijlstra get_task_struct(owner); 10991696a8beSPeter Zijlstra 1100b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 11011696a8beSPeter Zijlstra 11028930ed80SThomas Gleixner rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 11038930ed80SThomas Gleixner next_lock, NULL, current); 11041696a8beSPeter Zijlstra 1105b4abf910SThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 11061696a8beSPeter Zijlstra } 11071696a8beSPeter Zijlstra 11081696a8beSPeter Zijlstra /** 11091696a8beSPeter Zijlstra * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 11101696a8beSPeter Zijlstra * @lock: the rt_mutex to take 11111696a8beSPeter Zijlstra * @state: the state the task should block in (TASK_INTERRUPTIBLE 11121696a8beSPeter Zijlstra * or TASK_UNINTERRUPTIBLE) 11131696a8beSPeter Zijlstra * @timeout: the pre-initialized and started timer, or NULL for none 11141696a8beSPeter Zijlstra * @waiter: the pre-initialized rt_mutex_waiter 11151696a8beSPeter Zijlstra * 1116b4abf910SThomas Gleixner * Must be called with lock->wait_lock held and interrupts disabled 11171696a8beSPeter Zijlstra */ 11182f064a59SPeter Zijlstra static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, 11191696a8beSPeter Zijlstra struct hrtimer_sleeper *timeout, 11201696a8beSPeter Zijlstra struct rt_mutex_waiter *waiter) 11211696a8beSPeter Zijlstra { 11221696a8beSPeter Zijlstra int ret = 0; 11231696a8beSPeter Zijlstra 11241696a8beSPeter Zijlstra for (;;) { 11251696a8beSPeter Zijlstra /* Try to acquire the lock: */ 11261696a8beSPeter Zijlstra if (try_to_take_rt_mutex(lock, current, waiter)) 11271696a8beSPeter Zijlstra break; 11281696a8beSPeter Zijlstra 1129a51a327fSThomas Gleixner if (timeout && !timeout->task) { 11301696a8beSPeter Zijlstra ret = -ETIMEDOUT; 1131a51a327fSThomas Gleixner break; 1132a51a327fSThomas Gleixner } 1133a51a327fSThomas Gleixner if (signal_pending_state(state, current)) { 1134a51a327fSThomas Gleixner ret = -EINTR; 11351696a8beSPeter Zijlstra break; 11361696a8beSPeter Zijlstra } 11371696a8beSPeter Zijlstra 1138b4abf910SThomas Gleixner raw_spin_unlock_irq(&lock->wait_lock); 11391696a8beSPeter Zijlstra 11401b0b7c17SDavidlohr Bueso schedule(); 11411696a8beSPeter Zijlstra 1142b4abf910SThomas Gleixner raw_spin_lock_irq(&lock->wait_lock); 11431696a8beSPeter Zijlstra set_current_state(state); 11441696a8beSPeter Zijlstra } 11451696a8beSPeter Zijlstra 1146afffc6c1SDavidlohr Bueso __set_current_state(TASK_RUNNING); 11471696a8beSPeter Zijlstra return ret; 11481696a8beSPeter Zijlstra } 11491696a8beSPeter Zijlstra 1150d7a2edb8SThomas Gleixner static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 11513d5c9340SThomas Gleixner struct rt_mutex_waiter *w) 11523d5c9340SThomas Gleixner { 11533d5c9340SThomas Gleixner /* 11543d5c9340SThomas Gleixner * If the result is not -EDEADLOCK or the caller requested 11553d5c9340SThomas Gleixner * deadlock detection, nothing to do here. 11563d5c9340SThomas Gleixner */ 11573d5c9340SThomas Gleixner if (res != -EDEADLOCK || detect_deadlock) 11583d5c9340SThomas Gleixner return; 11593d5c9340SThomas Gleixner 11603d5c9340SThomas Gleixner /* 1161e2db7592SIngo Molnar * Yell loudly and stop the task right here. 11623d5c9340SThomas Gleixner */ 11636d41c675SSebastian Andrzej Siewior WARN(1, "rtmutex deadlock detected\n"); 11643d5c9340SThomas Gleixner while (1) { 11653d5c9340SThomas Gleixner set_current_state(TASK_INTERRUPTIBLE); 11663d5c9340SThomas Gleixner schedule(); 11673d5c9340SThomas Gleixner } 11683d5c9340SThomas Gleixner } 11693d5c9340SThomas Gleixner 11701696a8beSPeter Zijlstra /* 11711696a8beSPeter Zijlstra * Slow path lock function: 11721696a8beSPeter Zijlstra */ 11732f064a59SPeter Zijlstra static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, 11741696a8beSPeter Zijlstra struct hrtimer_sleeper *timeout, 11758930ed80SThomas Gleixner enum rtmutex_chainwalk chwalk) 11761696a8beSPeter Zijlstra { 11771696a8beSPeter Zijlstra struct rt_mutex_waiter waiter; 1178b4abf910SThomas Gleixner unsigned long flags; 11791696a8beSPeter Zijlstra int ret = 0; 11801696a8beSPeter Zijlstra 118150809358SPeter Zijlstra rt_mutex_init_waiter(&waiter); 11821696a8beSPeter Zijlstra 1183b4abf910SThomas Gleixner /* 1184b4abf910SThomas Gleixner * Technically we could use raw_spin_[un]lock_irq() here, but this can 1185b4abf910SThomas Gleixner * be called in early boot if the cmpxchg() fast path is disabled 1186b4abf910SThomas Gleixner * (debug, no architecture support). In this case we will acquire the 1187b4abf910SThomas Gleixner * rtmutex with lock->wait_lock held. But we cannot unconditionally 1188b4abf910SThomas Gleixner * enable interrupts in that early boot case. So we need to use the 1189b4abf910SThomas Gleixner * irqsave/restore variants. 1190b4abf910SThomas Gleixner */ 1191b4abf910SThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 11921696a8beSPeter Zijlstra 11931696a8beSPeter Zijlstra /* Try to acquire the lock again: */ 11941696a8beSPeter Zijlstra if (try_to_take_rt_mutex(lock, current, NULL)) { 1195b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 11961696a8beSPeter Zijlstra return 0; 11971696a8beSPeter Zijlstra } 11981696a8beSPeter Zijlstra 11991696a8beSPeter Zijlstra set_current_state(state); 12001696a8beSPeter Zijlstra 12011696a8beSPeter Zijlstra /* Setup the timer, when timeout != NULL */ 1202ccdd92c1SThomas Gleixner if (unlikely(timeout)) 12031696a8beSPeter Zijlstra hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 12041696a8beSPeter Zijlstra 12058930ed80SThomas Gleixner ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 12061696a8beSPeter Zijlstra 12071696a8beSPeter Zijlstra if (likely(!ret)) 1208afffc6c1SDavidlohr Bueso /* sleep on the mutex */ 12091696a8beSPeter Zijlstra ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 12101696a8beSPeter Zijlstra 12113d5c9340SThomas Gleixner if (unlikely(ret)) { 12129d3e2d02SSebastian Andrzej Siewior __set_current_state(TASK_RUNNING); 12131696a8beSPeter Zijlstra remove_waiter(lock, &waiter); 12148930ed80SThomas Gleixner rt_mutex_handle_deadlock(ret, chwalk, &waiter); 12153d5c9340SThomas Gleixner } 12161696a8beSPeter Zijlstra 12171696a8beSPeter Zijlstra /* 12181696a8beSPeter Zijlstra * try_to_take_rt_mutex() sets the waiter bit 12191696a8beSPeter Zijlstra * unconditionally. We might have to fix that up. 12201696a8beSPeter Zijlstra */ 12211696a8beSPeter Zijlstra fixup_rt_mutex_waiters(lock); 12221696a8beSPeter Zijlstra 1223b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 12241696a8beSPeter Zijlstra 12251696a8beSPeter Zijlstra /* Remove pending timer: */ 12261696a8beSPeter Zijlstra if (unlikely(timeout)) 12271696a8beSPeter Zijlstra hrtimer_cancel(&timeout->timer); 12281696a8beSPeter Zijlstra 12291696a8beSPeter Zijlstra debug_rt_mutex_free_waiter(&waiter); 12301696a8beSPeter Zijlstra 12311696a8beSPeter Zijlstra return ret; 12321696a8beSPeter Zijlstra } 12331696a8beSPeter Zijlstra 1234*531ae4b0SThomas Gleixner static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, 1235*531ae4b0SThomas Gleixner unsigned int state) 1236*531ae4b0SThomas Gleixner { 1237*531ae4b0SThomas Gleixner if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1238*531ae4b0SThomas Gleixner return 0; 1239*531ae4b0SThomas Gleixner 1240*531ae4b0SThomas Gleixner return rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1241*531ae4b0SThomas Gleixner } 1242*531ae4b0SThomas Gleixner 1243d7a2edb8SThomas Gleixner static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) 1244c1e2f0eaSPeter Zijlstra { 1245c1e2f0eaSPeter Zijlstra int ret = try_to_take_rt_mutex(lock, current, NULL); 1246c1e2f0eaSPeter Zijlstra 1247c1e2f0eaSPeter Zijlstra /* 1248c1e2f0eaSPeter Zijlstra * try_to_take_rt_mutex() sets the lock waiters bit 1249c1e2f0eaSPeter Zijlstra * unconditionally. Clean this up. 1250c1e2f0eaSPeter Zijlstra */ 1251c1e2f0eaSPeter Zijlstra fixup_rt_mutex_waiters(lock); 1252c1e2f0eaSPeter Zijlstra 1253c1e2f0eaSPeter Zijlstra return ret; 1254c1e2f0eaSPeter Zijlstra } 1255c1e2f0eaSPeter Zijlstra 12561696a8beSPeter Zijlstra /* 12571696a8beSPeter Zijlstra * Slow path try-lock function: 12581696a8beSPeter Zijlstra */ 1259d7a2edb8SThomas Gleixner static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock) 12601696a8beSPeter Zijlstra { 1261b4abf910SThomas Gleixner unsigned long flags; 126288f2b4c1SThomas Gleixner int ret; 12631696a8beSPeter Zijlstra 126488f2b4c1SThomas Gleixner /* 126588f2b4c1SThomas Gleixner * If the lock already has an owner we fail to get the lock. 126688f2b4c1SThomas Gleixner * This can be done without taking the @lock->wait_lock as 126788f2b4c1SThomas Gleixner * it is only being read, and this is a trylock anyway. 126888f2b4c1SThomas Gleixner */ 126988f2b4c1SThomas Gleixner if (rt_mutex_owner(lock)) 127088f2b4c1SThomas Gleixner return 0; 127188f2b4c1SThomas Gleixner 127288f2b4c1SThomas Gleixner /* 1273b4abf910SThomas Gleixner * The mutex has currently no owner. Lock the wait lock and try to 1274b4abf910SThomas Gleixner * acquire the lock. We use irqsave here to support early boot calls. 127588f2b4c1SThomas Gleixner */ 1276b4abf910SThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 12771696a8beSPeter Zijlstra 1278c1e2f0eaSPeter Zijlstra ret = __rt_mutex_slowtrylock(lock); 12791696a8beSPeter Zijlstra 1280b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 12811696a8beSPeter Zijlstra 12821696a8beSPeter Zijlstra return ret; 12831696a8beSPeter Zijlstra } 12841696a8beSPeter Zijlstra 1285*531ae4b0SThomas Gleixner static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) 128670c80103SThomas Gleixner { 1287*531ae4b0SThomas Gleixner if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1288*531ae4b0SThomas Gleixner return 1; 128970c80103SThomas Gleixner 1290*531ae4b0SThomas Gleixner return rt_mutex_slowtrylock(lock); 129170c80103SThomas Gleixner } 129270c80103SThomas Gleixner 129370c80103SThomas Gleixner /* 1294802ab58dSSebastian Andrzej Siewior * Slow path to release a rt-mutex. 12951696a8beSPeter Zijlstra */ 129670c80103SThomas Gleixner static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) 12971696a8beSPeter Zijlstra { 129870c80103SThomas Gleixner DEFINE_WAKE_Q(wake_q); 1299b4abf910SThomas Gleixner unsigned long flags; 1300b4abf910SThomas Gleixner 1301b4abf910SThomas Gleixner /* irqsave required to support early boot calls */ 1302b4abf910SThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 13031696a8beSPeter Zijlstra 13041696a8beSPeter Zijlstra debug_rt_mutex_unlock(lock); 13051696a8beSPeter Zijlstra 130627e35715SThomas Gleixner /* 130727e35715SThomas Gleixner * We must be careful here if the fast path is enabled. If we 130827e35715SThomas Gleixner * have no waiters queued we cannot set owner to NULL here 130927e35715SThomas Gleixner * because of: 131027e35715SThomas Gleixner * 131127e35715SThomas Gleixner * foo->lock->owner = NULL; 131227e35715SThomas Gleixner * rtmutex_lock(foo->lock); <- fast path 131327e35715SThomas Gleixner * free = atomic_dec_and_test(foo->refcnt); 131427e35715SThomas Gleixner * rtmutex_unlock(foo->lock); <- fast path 131527e35715SThomas Gleixner * if (free) 131627e35715SThomas Gleixner * kfree(foo); 131727e35715SThomas Gleixner * raw_spin_unlock(foo->lock->wait_lock); 131827e35715SThomas Gleixner * 131927e35715SThomas Gleixner * So for the fastpath enabled kernel: 132027e35715SThomas Gleixner * 132127e35715SThomas Gleixner * Nothing can set the waiters bit as long as we hold 132227e35715SThomas Gleixner * lock->wait_lock. So we do the following sequence: 132327e35715SThomas Gleixner * 132427e35715SThomas Gleixner * owner = rt_mutex_owner(lock); 132527e35715SThomas Gleixner * clear_rt_mutex_waiters(lock); 132627e35715SThomas Gleixner * raw_spin_unlock(&lock->wait_lock); 132727e35715SThomas Gleixner * if (cmpxchg(&lock->owner, owner, 0) == owner) 132827e35715SThomas Gleixner * return; 132927e35715SThomas Gleixner * goto retry; 133027e35715SThomas Gleixner * 133127e35715SThomas Gleixner * The fastpath disabled variant is simple as all access to 133227e35715SThomas Gleixner * lock->owner is serialized by lock->wait_lock: 133327e35715SThomas Gleixner * 133427e35715SThomas Gleixner * lock->owner = NULL; 133527e35715SThomas Gleixner * raw_spin_unlock(&lock->wait_lock); 133627e35715SThomas Gleixner */ 133727e35715SThomas Gleixner while (!rt_mutex_has_waiters(lock)) { 133827e35715SThomas Gleixner /* Drops lock->wait_lock ! */ 1339b4abf910SThomas Gleixner if (unlock_rt_mutex_safe(lock, flags) == true) 134070c80103SThomas Gleixner return; 134127e35715SThomas Gleixner /* Relock the rtmutex and try again */ 1342b4abf910SThomas Gleixner raw_spin_lock_irqsave(&lock->wait_lock, flags); 13431696a8beSPeter Zijlstra } 13441696a8beSPeter Zijlstra 134527e35715SThomas Gleixner /* 134627e35715SThomas Gleixner * The wakeup next waiter path does not suffer from the above 134727e35715SThomas Gleixner * race. See the comments there. 134845ab4effSDavidlohr Bueso * 134945ab4effSDavidlohr Bueso * Queue the next waiter for wakeup once we release the wait_lock. 135027e35715SThomas Gleixner */ 135170c80103SThomas Gleixner mark_wakeup_next_waiter(&wake_q, lock); 1352b4abf910SThomas Gleixner raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 13531696a8beSPeter Zijlstra 135470c80103SThomas Gleixner rt_mutex_postunlock(&wake_q); 13551696a8beSPeter Zijlstra } 13561696a8beSPeter Zijlstra 1357*531ae4b0SThomas Gleixner static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) 13581696a8beSPeter Zijlstra { 135970c80103SThomas Gleixner if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 136070c80103SThomas Gleixner return; 136170c80103SThomas Gleixner 136270c80103SThomas Gleixner rt_mutex_slowunlock(lock); 13631696a8beSPeter Zijlstra } 1364