xref: /linux/kernel/locking/rtmutex.c (revision f7853c34241807bb97673a5e97719123be39a09e)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21696a8beSPeter Zijlstra /*
31696a8beSPeter Zijlstra  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
41696a8beSPeter Zijlstra  *
51696a8beSPeter Zijlstra  * started by Ingo Molnar and Thomas Gleixner.
61696a8beSPeter Zijlstra  *
71696a8beSPeter Zijlstra  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
81696a8beSPeter Zijlstra  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
91696a8beSPeter Zijlstra  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
101696a8beSPeter Zijlstra  *  Copyright (C) 2006 Esben Nielsen
11992caf7fSSteven Rostedt  * Adaptive Spinlocks:
12992caf7fSSteven Rostedt  *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13992caf7fSSteven Rostedt  *				     and Peter Morreale,
14992caf7fSSteven Rostedt  * Adaptive Spinlocks simplification:
15992caf7fSSteven Rostedt  *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
161696a8beSPeter Zijlstra  *
17387b1468SMauro Carvalho Chehab  *  See Documentation/locking/rt-mutex-design.rst for details.
181696a8beSPeter Zijlstra  */
19531ae4b0SThomas Gleixner #include <linux/sched.h>
20531ae4b0SThomas Gleixner #include <linux/sched/debug.h>
21531ae4b0SThomas Gleixner #include <linux/sched/deadline.h>
22174cd4b1SIngo Molnar #include <linux/sched/signal.h>
231696a8beSPeter Zijlstra #include <linux/sched/rt.h>
2484f001e1SIngo Molnar #include <linux/sched/wake_q.h>
25add46132SPeter Zijlstra #include <linux/ww_mutex.h>
261696a8beSPeter Zijlstra 
27ee042be1SNamhyung Kim #include <trace/events/lock.h>
28ee042be1SNamhyung Kim 
291696a8beSPeter Zijlstra #include "rtmutex_common.h"
301696a8beSPeter Zijlstra 
31add46132SPeter Zijlstra #ifndef WW_RT
32add46132SPeter Zijlstra # define build_ww_mutex()	(false)
33add46132SPeter Zijlstra # define ww_container_of(rtm)	NULL
34add46132SPeter Zijlstra 
35add46132SPeter Zijlstra static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
36add46132SPeter Zijlstra 					struct rt_mutex *lock,
37add46132SPeter Zijlstra 					struct ww_acquire_ctx *ww_ctx)
38add46132SPeter Zijlstra {
39add46132SPeter Zijlstra 	return 0;
40add46132SPeter Zijlstra }
41add46132SPeter Zijlstra 
42add46132SPeter Zijlstra static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
43add46132SPeter Zijlstra 					    struct ww_acquire_ctx *ww_ctx)
44add46132SPeter Zijlstra {
45add46132SPeter Zijlstra }
46add46132SPeter Zijlstra 
47add46132SPeter Zijlstra static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
48add46132SPeter Zijlstra 					  struct ww_acquire_ctx *ww_ctx)
49add46132SPeter Zijlstra {
50add46132SPeter Zijlstra }
51add46132SPeter Zijlstra 
52add46132SPeter Zijlstra static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
53add46132SPeter Zijlstra 					struct rt_mutex_waiter *waiter,
54add46132SPeter Zijlstra 					struct ww_acquire_ctx *ww_ctx)
55add46132SPeter Zijlstra {
56add46132SPeter Zijlstra 	return 0;
57add46132SPeter Zijlstra }
58add46132SPeter Zijlstra 
59add46132SPeter Zijlstra #else
60add46132SPeter Zijlstra # define build_ww_mutex()	(true)
61add46132SPeter Zijlstra # define ww_container_of(rtm)	container_of(rtm, struct ww_mutex, base)
62add46132SPeter Zijlstra # include "ww_mutex.h"
63add46132SPeter Zijlstra #endif
64add46132SPeter Zijlstra 
651696a8beSPeter Zijlstra /*
661696a8beSPeter Zijlstra  * lock->owner state tracking:
671696a8beSPeter Zijlstra  *
681696a8beSPeter Zijlstra  * lock->owner holds the task_struct pointer of the owner. Bit 0
691696a8beSPeter Zijlstra  * is used to keep track of the "lock has waiters" state.
701696a8beSPeter Zijlstra  *
711696a8beSPeter Zijlstra  * owner	bit0
721696a8beSPeter Zijlstra  * NULL		0	lock is free (fast acquire possible)
731696a8beSPeter Zijlstra  * NULL		1	lock is free and has waiters and the top waiter
741696a8beSPeter Zijlstra  *				is going to take the lock*
751696a8beSPeter Zijlstra  * taskpointer	0	lock is held (fast release possible)
761696a8beSPeter Zijlstra  * taskpointer	1	lock is held and has waiters**
771696a8beSPeter Zijlstra  *
781696a8beSPeter Zijlstra  * The fast atomic compare exchange based acquire and release is only
791696a8beSPeter Zijlstra  * possible when bit 0 of lock->owner is 0.
801696a8beSPeter Zijlstra  *
811696a8beSPeter Zijlstra  * (*) It also can be a transitional state when grabbing the lock
821696a8beSPeter Zijlstra  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
831696a8beSPeter Zijlstra  * we need to set the bit0 before looking at the lock, and the owner may be
841696a8beSPeter Zijlstra  * NULL in this small time, hence this can be a transitional state.
851696a8beSPeter Zijlstra  *
861696a8beSPeter Zijlstra  * (**) There is a small time when bit 0 is set but there are no
871696a8beSPeter Zijlstra  * waiters. This can happen when grabbing the lock in the slow path.
881696a8beSPeter Zijlstra  * To prevent a cmpxchg of the owner releasing the lock, we need to
891696a8beSPeter Zijlstra  * set this bit before looking at the lock.
901696a8beSPeter Zijlstra  */
911696a8beSPeter Zijlstra 
921c0908d8SMel Gorman static __always_inline struct task_struct *
931c0908d8SMel Gorman rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
941696a8beSPeter Zijlstra {
951696a8beSPeter Zijlstra 	unsigned long val = (unsigned long)owner;
961696a8beSPeter Zijlstra 
971696a8beSPeter Zijlstra 	if (rt_mutex_has_waiters(lock))
981696a8beSPeter Zijlstra 		val |= RT_MUTEX_HAS_WAITERS;
991696a8beSPeter Zijlstra 
1001c0908d8SMel Gorman 	return (struct task_struct *)val;
1011c0908d8SMel Gorman }
1021c0908d8SMel Gorman 
1031c0908d8SMel Gorman static __always_inline void
1041c0908d8SMel Gorman rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
1051c0908d8SMel Gorman {
1061c0908d8SMel Gorman 	/*
1071c0908d8SMel Gorman 	 * lock->wait_lock is held but explicit acquire semantics are needed
1081c0908d8SMel Gorman 	 * for a new lock owner so WRITE_ONCE is insufficient.
1091c0908d8SMel Gorman 	 */
1101c0908d8SMel Gorman 	xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
1111c0908d8SMel Gorman }
1121c0908d8SMel Gorman 
1131c0908d8SMel Gorman static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
1141c0908d8SMel Gorman {
1151c0908d8SMel Gorman 	/* lock->wait_lock is held so the unlock provides release semantics. */
1161c0908d8SMel Gorman 	WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
1171696a8beSPeter Zijlstra }
1181696a8beSPeter Zijlstra 
119830e6accSPeter Zijlstra static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
1201696a8beSPeter Zijlstra {
1211696a8beSPeter Zijlstra 	lock->owner = (struct task_struct *)
1221696a8beSPeter Zijlstra 			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
1231696a8beSPeter Zijlstra }
1241696a8beSPeter Zijlstra 
1251c0908d8SMel Gorman static __always_inline void
1261c0908d8SMel Gorman fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
1271696a8beSPeter Zijlstra {
128dbb26055SThomas Gleixner 	unsigned long owner, *p = (unsigned long *) &lock->owner;
129dbb26055SThomas Gleixner 
130dbb26055SThomas Gleixner 	if (rt_mutex_has_waiters(lock))
131dbb26055SThomas Gleixner 		return;
132dbb26055SThomas Gleixner 
133dbb26055SThomas Gleixner 	/*
134dbb26055SThomas Gleixner 	 * The rbtree has no waiters enqueued, now make sure that the
135dbb26055SThomas Gleixner 	 * lock->owner still has the waiters bit set, otherwise the
136dbb26055SThomas Gleixner 	 * following can happen:
137dbb26055SThomas Gleixner 	 *
138dbb26055SThomas Gleixner 	 * CPU 0	CPU 1		CPU2
139dbb26055SThomas Gleixner 	 * l->owner=T1
140dbb26055SThomas Gleixner 	 *		rt_mutex_lock(l)
141dbb26055SThomas Gleixner 	 *		lock(l->lock)
142dbb26055SThomas Gleixner 	 *		l->owner = T1 | HAS_WAITERS;
143dbb26055SThomas Gleixner 	 *		enqueue(T2)
144dbb26055SThomas Gleixner 	 *		boost()
145dbb26055SThomas Gleixner 	 *		  unlock(l->lock)
146dbb26055SThomas Gleixner 	 *		block()
147dbb26055SThomas Gleixner 	 *
148dbb26055SThomas Gleixner 	 *				rt_mutex_lock(l)
149dbb26055SThomas Gleixner 	 *				lock(l->lock)
150dbb26055SThomas Gleixner 	 *				l->owner = T1 | HAS_WAITERS;
151dbb26055SThomas Gleixner 	 *				enqueue(T3)
152dbb26055SThomas Gleixner 	 *				boost()
153dbb26055SThomas Gleixner 	 *				  unlock(l->lock)
154dbb26055SThomas Gleixner 	 *				block()
155dbb26055SThomas Gleixner 	 *		signal(->T2)	signal(->T3)
156dbb26055SThomas Gleixner 	 *		lock(l->lock)
157dbb26055SThomas Gleixner 	 *		dequeue(T2)
158dbb26055SThomas Gleixner 	 *		deboost()
159dbb26055SThomas Gleixner 	 *		  unlock(l->lock)
160dbb26055SThomas Gleixner 	 *				lock(l->lock)
161dbb26055SThomas Gleixner 	 *				dequeue(T3)
162dbb26055SThomas Gleixner 	 *				 ==> wait list is empty
163dbb26055SThomas Gleixner 	 *				deboost()
164dbb26055SThomas Gleixner 	 *				 unlock(l->lock)
165dbb26055SThomas Gleixner 	 *		lock(l->lock)
166dbb26055SThomas Gleixner 	 *		fixup_rt_mutex_waiters()
167dbb26055SThomas Gleixner 	 *		  if (wait_list_empty(l) {
168dbb26055SThomas Gleixner 	 *		    l->owner = owner
169dbb26055SThomas Gleixner 	 *		    owner = l->owner & ~HAS_WAITERS;
170dbb26055SThomas Gleixner 	 *		      ==> l->owner = T1
171dbb26055SThomas Gleixner 	 *		  }
172dbb26055SThomas Gleixner 	 *				lock(l->lock)
173dbb26055SThomas Gleixner 	 * rt_mutex_unlock(l)		fixup_rt_mutex_waiters()
174dbb26055SThomas Gleixner 	 *				  if (wait_list_empty(l) {
175dbb26055SThomas Gleixner 	 *				    owner = l->owner & ~HAS_WAITERS;
176dbb26055SThomas Gleixner 	 * cmpxchg(l->owner, T1, NULL)
177dbb26055SThomas Gleixner 	 *  ===> Success (l->owner = NULL)
178dbb26055SThomas Gleixner 	 *
179dbb26055SThomas Gleixner 	 *				    l->owner = owner
180dbb26055SThomas Gleixner 	 *				      ==> l->owner = T1
181dbb26055SThomas Gleixner 	 *				  }
182dbb26055SThomas Gleixner 	 *
183dbb26055SThomas Gleixner 	 * With the check for the waiter bit in place T3 on CPU2 will not
184dbb26055SThomas Gleixner 	 * overwrite. All tasks fiddling with the waiters bit are
185dbb26055SThomas Gleixner 	 * serialized by l->lock, so nothing else can modify the waiters
186dbb26055SThomas Gleixner 	 * bit. If the bit is set then nothing can change l->owner either
187dbb26055SThomas Gleixner 	 * so the simple RMW is safe. The cmpxchg() will simply fail if it
188dbb26055SThomas Gleixner 	 * happens in the middle of the RMW because the waiters bit is
189dbb26055SThomas Gleixner 	 * still set.
190dbb26055SThomas Gleixner 	 */
191dbb26055SThomas Gleixner 	owner = READ_ONCE(*p);
1921c0908d8SMel Gorman 	if (owner & RT_MUTEX_HAS_WAITERS) {
1931c0908d8SMel Gorman 		/*
1941c0908d8SMel Gorman 		 * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
1951c0908d8SMel Gorman 		 * why xchg_acquire() is used for updating owner for
1961c0908d8SMel Gorman 		 * locking and WRITE_ONCE() for unlocking.
1971c0908d8SMel Gorman 		 *
1981c0908d8SMel Gorman 		 * WRITE_ONCE() would work for the acquire case too, but
1991c0908d8SMel Gorman 		 * in case that the lock acquisition failed it might
2001c0908d8SMel Gorman 		 * force other lockers into the slow path unnecessarily.
2011c0908d8SMel Gorman 		 */
2021c0908d8SMel Gorman 		if (acquire_lock)
2031c0908d8SMel Gorman 			xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
2041c0908d8SMel Gorman 		else
205dbb26055SThomas Gleixner 			WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
2061696a8beSPeter Zijlstra 	}
2071c0908d8SMel Gorman }
2081696a8beSPeter Zijlstra 
2091696a8beSPeter Zijlstra /*
210cede8841SSebastian Andrzej Siewior  * We can speed up the acquire/release, if there's no debugging state to be
211cede8841SSebastian Andrzej Siewior  * set up.
2121696a8beSPeter Zijlstra  */
213cede8841SSebastian Andrzej Siewior #ifndef CONFIG_DEBUG_RT_MUTEXES
214830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
21578515930SSebastian Andrzej Siewior 						     struct task_struct *old,
21678515930SSebastian Andrzej Siewior 						     struct task_struct *new)
21778515930SSebastian Andrzej Siewior {
218709e0b62SThomas Gleixner 	return try_cmpxchg_acquire(&lock->owner, &old, new);
21978515930SSebastian Andrzej Siewior }
22078515930SSebastian Andrzej Siewior 
221830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
22278515930SSebastian Andrzej Siewior 						     struct task_struct *old,
22378515930SSebastian Andrzej Siewior 						     struct task_struct *new)
22478515930SSebastian Andrzej Siewior {
225709e0b62SThomas Gleixner 	return try_cmpxchg_release(&lock->owner, &old, new);
22678515930SSebastian Andrzej Siewior }
227700318d1SDavidlohr Bueso 
228700318d1SDavidlohr Bueso /*
229700318d1SDavidlohr Bueso  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
230700318d1SDavidlohr Bueso  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
231700318d1SDavidlohr Bueso  * relaxed semantics suffice.
232700318d1SDavidlohr Bueso  */
233830e6accSPeter Zijlstra static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
2341696a8beSPeter Zijlstra {
2351696a8beSPeter Zijlstra 	unsigned long owner, *p = (unsigned long *) &lock->owner;
2361696a8beSPeter Zijlstra 
2371696a8beSPeter Zijlstra 	do {
2381696a8beSPeter Zijlstra 		owner = *p;
239700318d1SDavidlohr Bueso 	} while (cmpxchg_relaxed(p, owner,
240700318d1SDavidlohr Bueso 				 owner | RT_MUTEX_HAS_WAITERS) != owner);
2411c0908d8SMel Gorman 
2421c0908d8SMel Gorman 	/*
2431c0908d8SMel Gorman 	 * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
2441c0908d8SMel Gorman 	 * operations in the event of contention. Ensure the successful
2451c0908d8SMel Gorman 	 * cmpxchg is visible.
2461c0908d8SMel Gorman 	 */
2471c0908d8SMel Gorman 	smp_mb__after_atomic();
2481696a8beSPeter Zijlstra }
24927e35715SThomas Gleixner 
25027e35715SThomas Gleixner /*
25127e35715SThomas Gleixner  * Safe fastpath aware unlock:
25227e35715SThomas Gleixner  * 1) Clear the waiters bit
25327e35715SThomas Gleixner  * 2) Drop lock->wait_lock
25427e35715SThomas Gleixner  * 3) Try to unlock the lock with cmpxchg
25527e35715SThomas Gleixner  */
256830e6accSPeter Zijlstra static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
257b4abf910SThomas Gleixner 						 unsigned long flags)
25827e35715SThomas Gleixner 	__releases(lock->wait_lock)
25927e35715SThomas Gleixner {
26027e35715SThomas Gleixner 	struct task_struct *owner = rt_mutex_owner(lock);
26127e35715SThomas Gleixner 
26227e35715SThomas Gleixner 	clear_rt_mutex_waiters(lock);
263b4abf910SThomas Gleixner 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
26427e35715SThomas Gleixner 	/*
26527e35715SThomas Gleixner 	 * If a new waiter comes in between the unlock and the cmpxchg
26627e35715SThomas Gleixner 	 * we have two situations:
26727e35715SThomas Gleixner 	 *
26827e35715SThomas Gleixner 	 * unlock(wait_lock);
26927e35715SThomas Gleixner 	 *					lock(wait_lock);
27027e35715SThomas Gleixner 	 * cmpxchg(p, owner, 0) == owner
27127e35715SThomas Gleixner 	 *					mark_rt_mutex_waiters(lock);
27227e35715SThomas Gleixner 	 *					acquire(lock);
27327e35715SThomas Gleixner 	 * or:
27427e35715SThomas Gleixner 	 *
27527e35715SThomas Gleixner 	 * unlock(wait_lock);
27627e35715SThomas Gleixner 	 *					lock(wait_lock);
27727e35715SThomas Gleixner 	 *					mark_rt_mutex_waiters(lock);
27827e35715SThomas Gleixner 	 *
27927e35715SThomas Gleixner 	 * cmpxchg(p, owner, 0) != owner
28027e35715SThomas Gleixner 	 *					enqueue_waiter();
28127e35715SThomas Gleixner 	 *					unlock(wait_lock);
28227e35715SThomas Gleixner 	 * lock(wait_lock);
28327e35715SThomas Gleixner 	 * wake waiter();
28427e35715SThomas Gleixner 	 * unlock(wait_lock);
28527e35715SThomas Gleixner 	 *					lock(wait_lock);
28627e35715SThomas Gleixner 	 *					acquire(lock);
28727e35715SThomas Gleixner 	 */
288700318d1SDavidlohr Bueso 	return rt_mutex_cmpxchg_release(lock, owner, NULL);
28927e35715SThomas Gleixner }
29027e35715SThomas Gleixner 
2911696a8beSPeter Zijlstra #else
292830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
29378515930SSebastian Andrzej Siewior 						     struct task_struct *old,
29478515930SSebastian Andrzej Siewior 						     struct task_struct *new)
29578515930SSebastian Andrzej Siewior {
29678515930SSebastian Andrzej Siewior 	return false;
29778515930SSebastian Andrzej Siewior 
29878515930SSebastian Andrzej Siewior }
29978515930SSebastian Andrzej Siewior 
300830e6accSPeter Zijlstra static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
30178515930SSebastian Andrzej Siewior 						     struct task_struct *old,
30278515930SSebastian Andrzej Siewior 						     struct task_struct *new)
30378515930SSebastian Andrzej Siewior {
30478515930SSebastian Andrzej Siewior 	return false;
30578515930SSebastian Andrzej Siewior }
306700318d1SDavidlohr Bueso 
307830e6accSPeter Zijlstra static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
3081696a8beSPeter Zijlstra {
3091696a8beSPeter Zijlstra 	lock->owner = (struct task_struct *)
3101696a8beSPeter Zijlstra 			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
3111696a8beSPeter Zijlstra }
31227e35715SThomas Gleixner 
31327e35715SThomas Gleixner /*
31427e35715SThomas Gleixner  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
31527e35715SThomas Gleixner  */
316830e6accSPeter Zijlstra static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
317b4abf910SThomas Gleixner 						 unsigned long flags)
31827e35715SThomas Gleixner 	__releases(lock->wait_lock)
31927e35715SThomas Gleixner {
32027e35715SThomas Gleixner 	lock->owner = NULL;
321b4abf910SThomas Gleixner 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
32227e35715SThomas Gleixner 	return true;
32327e35715SThomas Gleixner }
3241696a8beSPeter Zijlstra #endif
3251696a8beSPeter Zijlstra 
326715f7f9eSPeter Zijlstra static __always_inline int __waiter_prio(struct task_struct *task)
327715f7f9eSPeter Zijlstra {
328715f7f9eSPeter Zijlstra 	int prio = task->prio;
329715f7f9eSPeter Zijlstra 
330715f7f9eSPeter Zijlstra 	if (!rt_prio(prio))
331715f7f9eSPeter Zijlstra 		return DEFAULT_PRIO;
332715f7f9eSPeter Zijlstra 
333715f7f9eSPeter Zijlstra 	return prio;
334715f7f9eSPeter Zijlstra }
335715f7f9eSPeter Zijlstra 
336*f7853c34SPeter Zijlstra /*
337*f7853c34SPeter Zijlstra  * Update the waiter->tree copy of the sort keys.
338*f7853c34SPeter Zijlstra  */
339715f7f9eSPeter Zijlstra static __always_inline void
340715f7f9eSPeter Zijlstra waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
341715f7f9eSPeter Zijlstra {
342*f7853c34SPeter Zijlstra 	lockdep_assert_held(&waiter->lock->wait_lock);
343*f7853c34SPeter Zijlstra 	lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
344*f7853c34SPeter Zijlstra 
345*f7853c34SPeter Zijlstra 	waiter->tree.prio = __waiter_prio(task);
346*f7853c34SPeter Zijlstra 	waiter->tree.deadline = task->dl.deadline;
347715f7f9eSPeter Zijlstra }
348715f7f9eSPeter Zijlstra 
34919830e55SPeter Zijlstra /*
350*f7853c34SPeter Zijlstra  * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
35119830e55SPeter Zijlstra  */
352*f7853c34SPeter Zijlstra static __always_inline void
353*f7853c34SPeter Zijlstra waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
354*f7853c34SPeter Zijlstra {
355*f7853c34SPeter Zijlstra 	lockdep_assert_held(&waiter->lock->wait_lock);
356*f7853c34SPeter Zijlstra 	lockdep_assert_held(&task->pi_lock);
357*f7853c34SPeter Zijlstra 	lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
35819830e55SPeter Zijlstra 
359*f7853c34SPeter Zijlstra 	waiter->pi_tree.prio = waiter->tree.prio;
360*f7853c34SPeter Zijlstra 	waiter->pi_tree.deadline = waiter->tree.deadline;
361*f7853c34SPeter Zijlstra }
362*f7853c34SPeter Zijlstra 
363*f7853c34SPeter Zijlstra /*
364*f7853c34SPeter Zijlstra  * Only use with rt_waiter_node_{less,equal}()
365*f7853c34SPeter Zijlstra  */
366*f7853c34SPeter Zijlstra #define task_to_waiter_node(p)	\
367*f7853c34SPeter Zijlstra 	&(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
368*f7853c34SPeter Zijlstra #define task_to_waiter(p)	\
369*f7853c34SPeter Zijlstra 	&(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
370*f7853c34SPeter Zijlstra 
371*f7853c34SPeter Zijlstra static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
372*f7853c34SPeter Zijlstra 					       struct rt_waiter_node *right)
373fb00aca4SPeter Zijlstra {
3742d3d891dSDario Faggioli 	if (left->prio < right->prio)
375fb00aca4SPeter Zijlstra 		return 1;
376fb00aca4SPeter Zijlstra 
3771696a8beSPeter Zijlstra 	/*
3782d3d891dSDario Faggioli 	 * If both waiters have dl_prio(), we check the deadlines of the
3792d3d891dSDario Faggioli 	 * associated tasks.
3802d3d891dSDario Faggioli 	 * If left waiter has a dl_prio(), and we didn't return 1 above,
3812d3d891dSDario Faggioli 	 * then right waiter has a dl_prio() too.
382fb00aca4SPeter Zijlstra 	 */
3832d3d891dSDario Faggioli 	if (dl_prio(left->prio))
384e0aad5b4SPeter Zijlstra 		return dl_time_before(left->deadline, right->deadline);
385fb00aca4SPeter Zijlstra 
386fb00aca4SPeter Zijlstra 	return 0;
387fb00aca4SPeter Zijlstra }
388fb00aca4SPeter Zijlstra 
389*f7853c34SPeter Zijlstra static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
390*f7853c34SPeter Zijlstra 						 struct rt_waiter_node *right)
39119830e55SPeter Zijlstra {
39219830e55SPeter Zijlstra 	if (left->prio != right->prio)
39319830e55SPeter Zijlstra 		return 0;
39419830e55SPeter Zijlstra 
39519830e55SPeter Zijlstra 	/*
39619830e55SPeter Zijlstra 	 * If both waiters have dl_prio(), we check the deadlines of the
39719830e55SPeter Zijlstra 	 * associated tasks.
39819830e55SPeter Zijlstra 	 * If left waiter has a dl_prio(), and we didn't return 0 above,
39919830e55SPeter Zijlstra 	 * then right waiter has a dl_prio() too.
40019830e55SPeter Zijlstra 	 */
40119830e55SPeter Zijlstra 	if (dl_prio(left->prio))
40219830e55SPeter Zijlstra 		return left->deadline == right->deadline;
40319830e55SPeter Zijlstra 
40419830e55SPeter Zijlstra 	return 1;
40519830e55SPeter Zijlstra }
40619830e55SPeter Zijlstra 
40748eb3f4fSGregory Haskins static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
40848eb3f4fSGregory Haskins 				  struct rt_mutex_waiter *top_waiter)
40948eb3f4fSGregory Haskins {
410*f7853c34SPeter Zijlstra 	if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
41148eb3f4fSGregory Haskins 		return true;
41248eb3f4fSGregory Haskins 
41348eb3f4fSGregory Haskins #ifdef RT_MUTEX_BUILD_SPINLOCKS
41448eb3f4fSGregory Haskins 	/*
41548eb3f4fSGregory Haskins 	 * Note that RT tasks are excluded from same priority (lateral)
41648eb3f4fSGregory Haskins 	 * steals to prevent the introduction of an unbounded latency.
41748eb3f4fSGregory Haskins 	 */
418*f7853c34SPeter Zijlstra 	if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
41948eb3f4fSGregory Haskins 		return false;
42048eb3f4fSGregory Haskins 
421*f7853c34SPeter Zijlstra 	return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
42248eb3f4fSGregory Haskins #else
42348eb3f4fSGregory Haskins 	return false;
42448eb3f4fSGregory Haskins #endif
42548eb3f4fSGregory Haskins }
42648eb3f4fSGregory Haskins 
4275a798725SPeter Zijlstra #define __node_2_waiter(node) \
428*f7853c34SPeter Zijlstra 	rb_entry((node), struct rt_mutex_waiter, tree.entry)
4295a798725SPeter Zijlstra 
430d7a2edb8SThomas Gleixner static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
4315a798725SPeter Zijlstra {
432add46132SPeter Zijlstra 	struct rt_mutex_waiter *aw = __node_2_waiter(a);
433add46132SPeter Zijlstra 	struct rt_mutex_waiter *bw = __node_2_waiter(b);
434add46132SPeter Zijlstra 
435*f7853c34SPeter Zijlstra 	if (rt_waiter_node_less(&aw->tree, &bw->tree))
436add46132SPeter Zijlstra 		return 1;
437add46132SPeter Zijlstra 
438add46132SPeter Zijlstra 	if (!build_ww_mutex())
439add46132SPeter Zijlstra 		return 0;
440add46132SPeter Zijlstra 
441*f7853c34SPeter Zijlstra 	if (rt_waiter_node_less(&bw->tree, &aw->tree))
442add46132SPeter Zijlstra 		return 0;
443add46132SPeter Zijlstra 
444add46132SPeter Zijlstra 	/* NOTE: relies on waiter->ww_ctx being set before insertion */
445add46132SPeter Zijlstra 	if (aw->ww_ctx) {
446add46132SPeter Zijlstra 		if (!bw->ww_ctx)
447add46132SPeter Zijlstra 			return 1;
448add46132SPeter Zijlstra 
449add46132SPeter Zijlstra 		return (signed long)(aw->ww_ctx->stamp -
450add46132SPeter Zijlstra 				     bw->ww_ctx->stamp) < 0;
451add46132SPeter Zijlstra 	}
452add46132SPeter Zijlstra 
453add46132SPeter Zijlstra 	return 0;
4545a798725SPeter Zijlstra }
4555a798725SPeter Zijlstra 
456d7a2edb8SThomas Gleixner static __always_inline void
457830e6accSPeter Zijlstra rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
458fb00aca4SPeter Zijlstra {
459*f7853c34SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
460*f7853c34SPeter Zijlstra 
461*f7853c34SPeter Zijlstra 	rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
462fb00aca4SPeter Zijlstra }
463fb00aca4SPeter Zijlstra 
464d7a2edb8SThomas Gleixner static __always_inline void
465830e6accSPeter Zijlstra rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
466fb00aca4SPeter Zijlstra {
467*f7853c34SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
468*f7853c34SPeter Zijlstra 
469*f7853c34SPeter Zijlstra 	if (RB_EMPTY_NODE(&waiter->tree.entry))
470fb00aca4SPeter Zijlstra 		return;
471fb00aca4SPeter Zijlstra 
472*f7853c34SPeter Zijlstra 	rb_erase_cached(&waiter->tree.entry, &lock->waiters);
473*f7853c34SPeter Zijlstra 	RB_CLEAR_NODE(&waiter->tree.entry);
474fb00aca4SPeter Zijlstra }
475fb00aca4SPeter Zijlstra 
476*f7853c34SPeter Zijlstra #define __node_2_rt_node(node) \
477*f7853c34SPeter Zijlstra 	rb_entry((node), struct rt_waiter_node, entry)
4785a798725SPeter Zijlstra 
479*f7853c34SPeter Zijlstra static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
4805a798725SPeter Zijlstra {
481*f7853c34SPeter Zijlstra 	return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
4825a798725SPeter Zijlstra }
4835a798725SPeter Zijlstra 
484d7a2edb8SThomas Gleixner static __always_inline void
485fb00aca4SPeter Zijlstra rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
486fb00aca4SPeter Zijlstra {
487*f7853c34SPeter Zijlstra 	lockdep_assert_held(&task->pi_lock);
488*f7853c34SPeter Zijlstra 
489*f7853c34SPeter Zijlstra 	rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
490fb00aca4SPeter Zijlstra }
491fb00aca4SPeter Zijlstra 
492d7a2edb8SThomas Gleixner static __always_inline void
493fb00aca4SPeter Zijlstra rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
494fb00aca4SPeter Zijlstra {
495*f7853c34SPeter Zijlstra 	lockdep_assert_held(&task->pi_lock);
496*f7853c34SPeter Zijlstra 
497*f7853c34SPeter Zijlstra 	if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
498fb00aca4SPeter Zijlstra 		return;
499fb00aca4SPeter Zijlstra 
500*f7853c34SPeter Zijlstra 	rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
501*f7853c34SPeter Zijlstra 	RB_CLEAR_NODE(&waiter->pi_tree.entry);
502fb00aca4SPeter Zijlstra }
503fb00aca4SPeter Zijlstra 
504*f7853c34SPeter Zijlstra static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
505*f7853c34SPeter Zijlstra 						 struct task_struct *p)
506e96a7705SXunlei Pang {
507acd58620SPeter Zijlstra 	struct task_struct *pi_task = NULL;
508e96a7705SXunlei Pang 
509*f7853c34SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
510*f7853c34SPeter Zijlstra 	lockdep_assert(rt_mutex_owner(lock) == p);
511acd58620SPeter Zijlstra 	lockdep_assert_held(&p->pi_lock);
512e96a7705SXunlei Pang 
513acd58620SPeter Zijlstra 	if (task_has_pi_waiters(p))
514acd58620SPeter Zijlstra 		pi_task = task_top_pi_waiter(p)->task;
5151696a8beSPeter Zijlstra 
516acd58620SPeter Zijlstra 	rt_mutex_setprio(p, pi_task);
5171696a8beSPeter Zijlstra }
5181696a8beSPeter Zijlstra 
519b576e640SThomas Gleixner /* RT mutex specific wake_q wrappers */
5209321f815SThomas Gleixner static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
5219321f815SThomas Gleixner 						     struct task_struct *task,
5229321f815SThomas Gleixner 						     unsigned int wake_state)
5239321f815SThomas Gleixner {
5249321f815SThomas Gleixner 	if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
5259321f815SThomas Gleixner 		if (IS_ENABLED(CONFIG_PROVE_LOCKING))
5269321f815SThomas Gleixner 			WARN_ON_ONCE(wqh->rtlock_task);
5279321f815SThomas Gleixner 		get_task_struct(task);
5289321f815SThomas Gleixner 		wqh->rtlock_task = task;
5299321f815SThomas Gleixner 	} else {
5309321f815SThomas Gleixner 		wake_q_add(&wqh->head, task);
5319321f815SThomas Gleixner 	}
5329321f815SThomas Gleixner }
5339321f815SThomas Gleixner 
534b576e640SThomas Gleixner static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
535b576e640SThomas Gleixner 						struct rt_mutex_waiter *w)
536b576e640SThomas Gleixner {
5379321f815SThomas Gleixner 	rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
538456cfbc6SThomas Gleixner }
539b576e640SThomas Gleixner 
540b576e640SThomas Gleixner static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
541b576e640SThomas Gleixner {
542456cfbc6SThomas Gleixner 	if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
543456cfbc6SThomas Gleixner 		wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
544456cfbc6SThomas Gleixner 		put_task_struct(wqh->rtlock_task);
545456cfbc6SThomas Gleixner 		wqh->rtlock_task = NULL;
546456cfbc6SThomas Gleixner 	}
547456cfbc6SThomas Gleixner 
548456cfbc6SThomas Gleixner 	if (!wake_q_empty(&wqh->head))
549b576e640SThomas Gleixner 		wake_up_q(&wqh->head);
550b576e640SThomas Gleixner 
551b576e640SThomas Gleixner 	/* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
552b576e640SThomas Gleixner 	preempt_enable();
553b576e640SThomas Gleixner }
554b576e640SThomas Gleixner 
5551696a8beSPeter Zijlstra /*
5568930ed80SThomas Gleixner  * Deadlock detection is conditional:
5578930ed80SThomas Gleixner  *
5588930ed80SThomas Gleixner  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
5598930ed80SThomas Gleixner  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
5608930ed80SThomas Gleixner  *
5618930ed80SThomas Gleixner  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
5628930ed80SThomas Gleixner  * conducted independent of the detect argument.
5638930ed80SThomas Gleixner  *
5648930ed80SThomas Gleixner  * If the waiter argument is NULL this indicates the deboost path and
5658930ed80SThomas Gleixner  * deadlock detection is disabled independent of the detect argument
5668930ed80SThomas Gleixner  * and the config settings.
5678930ed80SThomas Gleixner  */
568d7a2edb8SThomas Gleixner static __always_inline bool
569d7a2edb8SThomas Gleixner rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
5708930ed80SThomas Gleixner 			      enum rtmutex_chainwalk chwalk)
5718930ed80SThomas Gleixner {
57207d25971SZhen Lei 	if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
573f7efc479SThomas Gleixner 		return waiter != NULL;
574f7efc479SThomas Gleixner 	return chwalk == RT_MUTEX_FULL_CHAINWALK;
5758930ed80SThomas Gleixner }
5768930ed80SThomas Gleixner 
577830e6accSPeter Zijlstra static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
57882084984SThomas Gleixner {
57982084984SThomas Gleixner 	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
58082084984SThomas Gleixner }
58182084984SThomas Gleixner 
5821696a8beSPeter Zijlstra /*
5831696a8beSPeter Zijlstra  * Adjust the priority chain. Also used for deadlock detection.
5841696a8beSPeter Zijlstra  * Decreases task's usage by one - may thus free the task.
5851696a8beSPeter Zijlstra  *
58682084984SThomas Gleixner  * @task:	the task owning the mutex (owner) for which a chain walk is
58782084984SThomas Gleixner  *		probably needed
588e6beaa36STom(JeHyeon) Yeon  * @chwalk:	do we have to carry out deadlock detection?
5891696a8beSPeter Zijlstra  * @orig_lock:	the mutex (can be NULL if we are walking the chain to recheck
5901696a8beSPeter Zijlstra  *		things for a task that has just got its priority adjusted, and
5911696a8beSPeter Zijlstra  *		is waiting on a mutex)
59282084984SThomas Gleixner  * @next_lock:	the mutex on which the owner of @orig_lock was blocked before
59382084984SThomas Gleixner  *		we dropped its pi_lock. Is never dereferenced, only used for
59482084984SThomas Gleixner  *		comparison to detect lock chain changes.
5951696a8beSPeter Zijlstra  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
5961696a8beSPeter Zijlstra  *		its priority to the mutex owner (can be NULL in the case
5971696a8beSPeter Zijlstra  *		depicted above or if the top waiter is gone away and we are
5981696a8beSPeter Zijlstra  *		actually deboosting the owner)
5991696a8beSPeter Zijlstra  * @top_task:	the current top waiter
6001696a8beSPeter Zijlstra  *
6011696a8beSPeter Zijlstra  * Returns 0 or -EDEADLK.
6023eb65aeaSThomas Gleixner  *
6033eb65aeaSThomas Gleixner  * Chain walk basics and protection scope
6043eb65aeaSThomas Gleixner  *
6053eb65aeaSThomas Gleixner  * [R] refcount on task
606*f7853c34SPeter Zijlstra  * [Pn] task->pi_lock held
6073eb65aeaSThomas Gleixner  * [L] rtmutex->wait_lock held
6083eb65aeaSThomas Gleixner  *
609*f7853c34SPeter Zijlstra  * Normal locking order:
610*f7853c34SPeter Zijlstra  *
611*f7853c34SPeter Zijlstra  *   rtmutex->wait_lock
612*f7853c34SPeter Zijlstra  *     task->pi_lock
613*f7853c34SPeter Zijlstra  *
6143eb65aeaSThomas Gleixner  * Step	Description				Protected by
6153eb65aeaSThomas Gleixner  *	function arguments:
6163eb65aeaSThomas Gleixner  *	@task					[R]
6173eb65aeaSThomas Gleixner  *	@orig_lock if != NULL			@top_task is blocked on it
6183eb65aeaSThomas Gleixner  *	@next_lock				Unprotected. Cannot be
6193eb65aeaSThomas Gleixner  *						dereferenced. Only used for
6203eb65aeaSThomas Gleixner  *						comparison.
6213eb65aeaSThomas Gleixner  *	@orig_waiter if != NULL			@top_task is blocked on it
6223eb65aeaSThomas Gleixner  *	@top_task				current, or in case of proxy
6233eb65aeaSThomas Gleixner  *						locking protected by calling
6243eb65aeaSThomas Gleixner  *						code
6253eb65aeaSThomas Gleixner  *	again:
6263eb65aeaSThomas Gleixner  *	  loop_sanity_check();
6273eb65aeaSThomas Gleixner  *	retry:
628*f7853c34SPeter Zijlstra  * [1]	  lock(task->pi_lock);			[R] acquire [P1]
629*f7853c34SPeter Zijlstra  * [2]	  waiter = task->pi_blocked_on;		[P1]
630*f7853c34SPeter Zijlstra  * [3]	  check_exit_conditions_1();		[P1]
631*f7853c34SPeter Zijlstra  * [4]	  lock = waiter->lock;			[P1]
632*f7853c34SPeter Zijlstra  * [5]	  if (!try_lock(lock->wait_lock)) {	[P1] try to acquire [L]
633*f7853c34SPeter Zijlstra  *	    unlock(task->pi_lock);		release [P1]
6343eb65aeaSThomas Gleixner  *	    goto retry;
6353eb65aeaSThomas Gleixner  *	  }
636*f7853c34SPeter Zijlstra  * [6]	  check_exit_conditions_2();		[P1] + [L]
637*f7853c34SPeter Zijlstra  * [7]	  requeue_lock_waiter(lock, waiter);	[P1] + [L]
638*f7853c34SPeter Zijlstra  * [8]	  unlock(task->pi_lock);		release [P1]
6393eb65aeaSThomas Gleixner  *	  put_task_struct(task);		release [R]
6403eb65aeaSThomas Gleixner  * [9]	  check_exit_conditions_3();		[L]
6413eb65aeaSThomas Gleixner  * [10]	  task = owner(lock);			[L]
6423eb65aeaSThomas Gleixner  *	  get_task_struct(task);		[L] acquire [R]
643*f7853c34SPeter Zijlstra  *	  lock(task->pi_lock);			[L] acquire [P2]
644*f7853c34SPeter Zijlstra  * [11]	  requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
645*f7853c34SPeter Zijlstra  * [12]	  check_exit_conditions_4();		[P2] + [L]
646*f7853c34SPeter Zijlstra  * [13]	  unlock(task->pi_lock);		release [P2]
6473eb65aeaSThomas Gleixner  *	  unlock(lock->wait_lock);		release [L]
6483eb65aeaSThomas Gleixner  *	  goto again;
649*f7853c34SPeter Zijlstra  *
650*f7853c34SPeter Zijlstra  * Where P1 is the blocking task and P2 is the lock owner; going up one step
651*f7853c34SPeter Zijlstra  * the owner becomes the next blocked task etc..
652*f7853c34SPeter Zijlstra  *
653*f7853c34SPeter Zijlstra *
6541696a8beSPeter Zijlstra  */
655d7a2edb8SThomas Gleixner static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
6568930ed80SThomas Gleixner 					      enum rtmutex_chainwalk chwalk,
657830e6accSPeter Zijlstra 					      struct rt_mutex_base *orig_lock,
658830e6accSPeter Zijlstra 					      struct rt_mutex_base *next_lock,
6591696a8beSPeter Zijlstra 					      struct rt_mutex_waiter *orig_waiter,
6601696a8beSPeter Zijlstra 					      struct task_struct *top_task)
6611696a8beSPeter Zijlstra {
6621696a8beSPeter Zijlstra 	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
663a57594a1SThomas Gleixner 	struct rt_mutex_waiter *prerequeue_top_waiter;
6648930ed80SThomas Gleixner 	int ret = 0, depth = 0;
665830e6accSPeter Zijlstra 	struct rt_mutex_base *lock;
6668930ed80SThomas Gleixner 	bool detect_deadlock;
66767792e2cSThomas Gleixner 	bool requeue = true;
6681696a8beSPeter Zijlstra 
6698930ed80SThomas Gleixner 	detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
6701696a8beSPeter Zijlstra 
6711696a8beSPeter Zijlstra 	/*
6721696a8beSPeter Zijlstra 	 * The (de)boosting is a step by step approach with a lot of
6731696a8beSPeter Zijlstra 	 * pitfalls. We want this to be preemptible and we want hold a
6741696a8beSPeter Zijlstra 	 * maximum of two locks per step. So we have to check
6751696a8beSPeter Zijlstra 	 * carefully whether things change under us.
6761696a8beSPeter Zijlstra 	 */
6771696a8beSPeter Zijlstra  again:
6783eb65aeaSThomas Gleixner 	/*
6793eb65aeaSThomas Gleixner 	 * We limit the lock chain length for each invocation.
6803eb65aeaSThomas Gleixner 	 */
6811696a8beSPeter Zijlstra 	if (++depth > max_lock_depth) {
6821696a8beSPeter Zijlstra 		static int prev_max;
6831696a8beSPeter Zijlstra 
6841696a8beSPeter Zijlstra 		/*
6851696a8beSPeter Zijlstra 		 * Print this only once. If the admin changes the limit,
6861696a8beSPeter Zijlstra 		 * print a new message when reaching the limit again.
6871696a8beSPeter Zijlstra 		 */
6881696a8beSPeter Zijlstra 		if (prev_max != max_lock_depth) {
6891696a8beSPeter Zijlstra 			prev_max = max_lock_depth;
6901696a8beSPeter Zijlstra 			printk(KERN_WARNING "Maximum lock depth %d reached "
6911696a8beSPeter Zijlstra 			       "task: %s (%d)\n", max_lock_depth,
6921696a8beSPeter Zijlstra 			       top_task->comm, task_pid_nr(top_task));
6931696a8beSPeter Zijlstra 		}
6941696a8beSPeter Zijlstra 		put_task_struct(task);
6951696a8beSPeter Zijlstra 
6963d5c9340SThomas Gleixner 		return -EDEADLK;
6971696a8beSPeter Zijlstra 	}
6983eb65aeaSThomas Gleixner 
6993eb65aeaSThomas Gleixner 	/*
7003eb65aeaSThomas Gleixner 	 * We are fully preemptible here and only hold the refcount on
7013eb65aeaSThomas Gleixner 	 * @task. So everything can have changed under us since the
7023eb65aeaSThomas Gleixner 	 * caller or our own code below (goto retry/again) dropped all
7033eb65aeaSThomas Gleixner 	 * locks.
7043eb65aeaSThomas Gleixner 	 */
7051696a8beSPeter Zijlstra  retry:
7061696a8beSPeter Zijlstra 	/*
7073eb65aeaSThomas Gleixner 	 * [1] Task cannot go away as we did a get_task() before !
7081696a8beSPeter Zijlstra 	 */
709b4abf910SThomas Gleixner 	raw_spin_lock_irq(&task->pi_lock);
7101696a8beSPeter Zijlstra 
7113eb65aeaSThomas Gleixner 	/*
7123eb65aeaSThomas Gleixner 	 * [2] Get the waiter on which @task is blocked on.
7133eb65aeaSThomas Gleixner 	 */
7141696a8beSPeter Zijlstra 	waiter = task->pi_blocked_on;
7153eb65aeaSThomas Gleixner 
7163eb65aeaSThomas Gleixner 	/*
7173eb65aeaSThomas Gleixner 	 * [3] check_exit_conditions_1() protected by task->pi_lock.
7183eb65aeaSThomas Gleixner 	 */
7193eb65aeaSThomas Gleixner 
7201696a8beSPeter Zijlstra 	/*
7211696a8beSPeter Zijlstra 	 * Check whether the end of the boosting chain has been
7221696a8beSPeter Zijlstra 	 * reached or the state of the chain has changed while we
7231696a8beSPeter Zijlstra 	 * dropped the locks.
7241696a8beSPeter Zijlstra 	 */
7251696a8beSPeter Zijlstra 	if (!waiter)
7261696a8beSPeter Zijlstra 		goto out_unlock_pi;
7271696a8beSPeter Zijlstra 
7281696a8beSPeter Zijlstra 	/*
7291696a8beSPeter Zijlstra 	 * Check the orig_waiter state. After we dropped the locks,
7301696a8beSPeter Zijlstra 	 * the previous owner of the lock might have released the lock.
7311696a8beSPeter Zijlstra 	 */
7321696a8beSPeter Zijlstra 	if (orig_waiter && !rt_mutex_owner(orig_lock))
7331696a8beSPeter Zijlstra 		goto out_unlock_pi;
7341696a8beSPeter Zijlstra 
7351696a8beSPeter Zijlstra 	/*
73682084984SThomas Gleixner 	 * We dropped all locks after taking a refcount on @task, so
73782084984SThomas Gleixner 	 * the task might have moved on in the lock chain or even left
73882084984SThomas Gleixner 	 * the chain completely and blocks now on an unrelated lock or
73982084984SThomas Gleixner 	 * on @orig_lock.
74082084984SThomas Gleixner 	 *
74182084984SThomas Gleixner 	 * We stored the lock on which @task was blocked in @next_lock,
74282084984SThomas Gleixner 	 * so we can detect the chain change.
74382084984SThomas Gleixner 	 */
74482084984SThomas Gleixner 	if (next_lock != waiter->lock)
74582084984SThomas Gleixner 		goto out_unlock_pi;
74682084984SThomas Gleixner 
74782084984SThomas Gleixner 	/*
7486467822bSPeter Zijlstra 	 * There could be 'spurious' loops in the lock graph due to ww_mutex,
7496467822bSPeter Zijlstra 	 * consider:
7506467822bSPeter Zijlstra 	 *
7516467822bSPeter Zijlstra 	 *   P1: A, ww_A, ww_B
7526467822bSPeter Zijlstra 	 *   P2: ww_B, ww_A
7536467822bSPeter Zijlstra 	 *   P3: A
7546467822bSPeter Zijlstra 	 *
7556467822bSPeter Zijlstra 	 * P3 should not return -EDEADLK because it gets trapped in the cycle
7566467822bSPeter Zijlstra 	 * created by P1 and P2 (which will resolve -- and runs into
7576467822bSPeter Zijlstra 	 * max_lock_depth above). Therefore disable detect_deadlock such that
7586467822bSPeter Zijlstra 	 * the below termination condition can trigger once all relevant tasks
7596467822bSPeter Zijlstra 	 * are boosted.
7606467822bSPeter Zijlstra 	 *
7616467822bSPeter Zijlstra 	 * Even when we start with ww_mutex we can disable deadlock detection,
7626467822bSPeter Zijlstra 	 * since we would supress a ww_mutex induced deadlock at [6] anyway.
7636467822bSPeter Zijlstra 	 * Supressing it here however is not sufficient since we might still
7646467822bSPeter Zijlstra 	 * hit [6] due to adjustment driven iteration.
7656467822bSPeter Zijlstra 	 *
7666467822bSPeter Zijlstra 	 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
7676467822bSPeter Zijlstra 	 * utterly fail to report it; lockdep should.
7686467822bSPeter Zijlstra 	 */
7696467822bSPeter Zijlstra 	if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
7706467822bSPeter Zijlstra 		detect_deadlock = false;
7716467822bSPeter Zijlstra 
7726467822bSPeter Zijlstra 	/*
7731696a8beSPeter Zijlstra 	 * Drop out, when the task has no waiters. Note,
7741696a8beSPeter Zijlstra 	 * top_waiter can be NULL, when we are in the deboosting
7751696a8beSPeter Zijlstra 	 * mode!
7761696a8beSPeter Zijlstra 	 */
777397335f0SThomas Gleixner 	if (top_waiter) {
778397335f0SThomas Gleixner 		if (!task_has_pi_waiters(task))
7791696a8beSPeter Zijlstra 			goto out_unlock_pi;
780397335f0SThomas Gleixner 		/*
781397335f0SThomas Gleixner 		 * If deadlock detection is off, we stop here if we
78267792e2cSThomas Gleixner 		 * are not the top pi waiter of the task. If deadlock
78367792e2cSThomas Gleixner 		 * detection is enabled we continue, but stop the
78467792e2cSThomas Gleixner 		 * requeueing in the chain walk.
785397335f0SThomas Gleixner 		 */
78667792e2cSThomas Gleixner 		if (top_waiter != task_top_pi_waiter(task)) {
78767792e2cSThomas Gleixner 			if (!detect_deadlock)
788397335f0SThomas Gleixner 				goto out_unlock_pi;
78967792e2cSThomas Gleixner 			else
79067792e2cSThomas Gleixner 				requeue = false;
79167792e2cSThomas Gleixner 		}
792397335f0SThomas Gleixner 	}
7931696a8beSPeter Zijlstra 
7941696a8beSPeter Zijlstra 	/*
79567792e2cSThomas Gleixner 	 * If the waiter priority is the same as the task priority
79667792e2cSThomas Gleixner 	 * then there is no further priority adjustment necessary.  If
79767792e2cSThomas Gleixner 	 * deadlock detection is off, we stop the chain walk. If its
79867792e2cSThomas Gleixner 	 * enabled we continue, but stop the requeueing in the chain
79967792e2cSThomas Gleixner 	 * walk.
8001696a8beSPeter Zijlstra 	 */
801*f7853c34SPeter Zijlstra 	if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
80267792e2cSThomas Gleixner 		if (!detect_deadlock)
8031696a8beSPeter Zijlstra 			goto out_unlock_pi;
80467792e2cSThomas Gleixner 		else
80567792e2cSThomas Gleixner 			requeue = false;
80667792e2cSThomas Gleixner 	}
8071696a8beSPeter Zijlstra 
8083eb65aeaSThomas Gleixner 	/*
809*f7853c34SPeter Zijlstra 	 * [4] Get the next lock; per holding task->pi_lock we can't unblock
810*f7853c34SPeter Zijlstra 	 * and guarantee @lock's existence.
8113eb65aeaSThomas Gleixner 	 */
8121696a8beSPeter Zijlstra 	lock = waiter->lock;
8133eb65aeaSThomas Gleixner 	/*
8143eb65aeaSThomas Gleixner 	 * [5] We need to trylock here as we are holding task->pi_lock,
8153eb65aeaSThomas Gleixner 	 * which is the reverse lock order versus the other rtmutex
8163eb65aeaSThomas Gleixner 	 * operations.
817*f7853c34SPeter Zijlstra 	 *
818*f7853c34SPeter Zijlstra 	 * Per the above, holding task->pi_lock guarantees lock exists, so
819*f7853c34SPeter Zijlstra 	 * inverting this lock order is infeasible from a life-time
820*f7853c34SPeter Zijlstra 	 * perspective.
8213eb65aeaSThomas Gleixner 	 */
8221696a8beSPeter Zijlstra 	if (!raw_spin_trylock(&lock->wait_lock)) {
823b4abf910SThomas Gleixner 		raw_spin_unlock_irq(&task->pi_lock);
8241696a8beSPeter Zijlstra 		cpu_relax();
8251696a8beSPeter Zijlstra 		goto retry;
8261696a8beSPeter Zijlstra 	}
8271696a8beSPeter Zijlstra 
828397335f0SThomas Gleixner 	/*
8293eb65aeaSThomas Gleixner 	 * [6] check_exit_conditions_2() protected by task->pi_lock and
8303eb65aeaSThomas Gleixner 	 * lock->wait_lock.
8313eb65aeaSThomas Gleixner 	 *
832397335f0SThomas Gleixner 	 * Deadlock detection. If the lock is the same as the original
833397335f0SThomas Gleixner 	 * lock which caused us to walk the lock chain or if the
834397335f0SThomas Gleixner 	 * current lock is owned by the task which initiated the chain
835397335f0SThomas Gleixner 	 * walk, we detected a deadlock.
836397335f0SThomas Gleixner 	 */
8371696a8beSPeter Zijlstra 	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
8383d5c9340SThomas Gleixner 		ret = -EDEADLK;
839a055fcc1SPeter Zijlstra 
840a055fcc1SPeter Zijlstra 		/*
841a055fcc1SPeter Zijlstra 		 * When the deadlock is due to ww_mutex; also see above. Don't
842a055fcc1SPeter Zijlstra 		 * report the deadlock and instead let the ww_mutex wound/die
843a055fcc1SPeter Zijlstra 		 * logic pick which of the contending threads gets -EDEADLK.
844a055fcc1SPeter Zijlstra 		 *
845a055fcc1SPeter Zijlstra 		 * NOTE: assumes the cycle only contains a single ww_class; any
846a055fcc1SPeter Zijlstra 		 * other configuration and we fail to report; also, see
847a055fcc1SPeter Zijlstra 		 * lockdep.
848a055fcc1SPeter Zijlstra 		 */
849e5480572SPeter Zijlstra 		if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
850a055fcc1SPeter Zijlstra 			ret = 0;
851a055fcc1SPeter Zijlstra 
852a055fcc1SPeter Zijlstra 		raw_spin_unlock(&lock->wait_lock);
8531696a8beSPeter Zijlstra 		goto out_unlock_pi;
8541696a8beSPeter Zijlstra 	}
8551696a8beSPeter Zijlstra 
856a57594a1SThomas Gleixner 	/*
85767792e2cSThomas Gleixner 	 * If we just follow the lock chain for deadlock detection, no
85867792e2cSThomas Gleixner 	 * need to do all the requeue operations. To avoid a truckload
85967792e2cSThomas Gleixner 	 * of conditionals around the various places below, just do the
86067792e2cSThomas Gleixner 	 * minimum chain walk checks.
86167792e2cSThomas Gleixner 	 */
86267792e2cSThomas Gleixner 	if (!requeue) {
86367792e2cSThomas Gleixner 		/*
86467792e2cSThomas Gleixner 		 * No requeue[7] here. Just release @task [8]
86567792e2cSThomas Gleixner 		 */
866b4abf910SThomas Gleixner 		raw_spin_unlock(&task->pi_lock);
86767792e2cSThomas Gleixner 		put_task_struct(task);
86867792e2cSThomas Gleixner 
86967792e2cSThomas Gleixner 		/*
87067792e2cSThomas Gleixner 		 * [9] check_exit_conditions_3 protected by lock->wait_lock.
87167792e2cSThomas Gleixner 		 * If there is no owner of the lock, end of chain.
87267792e2cSThomas Gleixner 		 */
87367792e2cSThomas Gleixner 		if (!rt_mutex_owner(lock)) {
874b4abf910SThomas Gleixner 			raw_spin_unlock_irq(&lock->wait_lock);
87567792e2cSThomas Gleixner 			return 0;
87667792e2cSThomas Gleixner 		}
87767792e2cSThomas Gleixner 
87867792e2cSThomas Gleixner 		/* [10] Grab the next task, i.e. owner of @lock */
8797b3c92b8SMatthew Wilcox (Oracle) 		task = get_task_struct(rt_mutex_owner(lock));
880b4abf910SThomas Gleixner 		raw_spin_lock(&task->pi_lock);
88167792e2cSThomas Gleixner 
88267792e2cSThomas Gleixner 		/*
88367792e2cSThomas Gleixner 		 * No requeue [11] here. We just do deadlock detection.
88467792e2cSThomas Gleixner 		 *
88567792e2cSThomas Gleixner 		 * [12] Store whether owner is blocked
88667792e2cSThomas Gleixner 		 * itself. Decision is made after dropping the locks
88767792e2cSThomas Gleixner 		 */
88867792e2cSThomas Gleixner 		next_lock = task_blocked_on_lock(task);
88967792e2cSThomas Gleixner 		/*
89067792e2cSThomas Gleixner 		 * Get the top waiter for the next iteration
89167792e2cSThomas Gleixner 		 */
89267792e2cSThomas Gleixner 		top_waiter = rt_mutex_top_waiter(lock);
89367792e2cSThomas Gleixner 
89467792e2cSThomas Gleixner 		/* [13] Drop locks */
895b4abf910SThomas Gleixner 		raw_spin_unlock(&task->pi_lock);
896b4abf910SThomas Gleixner 		raw_spin_unlock_irq(&lock->wait_lock);
89767792e2cSThomas Gleixner 
89867792e2cSThomas Gleixner 		/* If owner is not blocked, end of chain. */
89967792e2cSThomas Gleixner 		if (!next_lock)
90067792e2cSThomas Gleixner 			goto out_put_task;
90167792e2cSThomas Gleixner 		goto again;
90267792e2cSThomas Gleixner 	}
90367792e2cSThomas Gleixner 
90467792e2cSThomas Gleixner 	/*
905a57594a1SThomas Gleixner 	 * Store the current top waiter before doing the requeue
906a57594a1SThomas Gleixner 	 * operation on @lock. We need it for the boost/deboost
907a57594a1SThomas Gleixner 	 * decision below.
908a57594a1SThomas Gleixner 	 */
909a57594a1SThomas Gleixner 	prerequeue_top_waiter = rt_mutex_top_waiter(lock);
9101696a8beSPeter Zijlstra 
9119f40a51aSDavidlohr Bueso 	/* [7] Requeue the waiter in the lock waiter tree. */
912fb00aca4SPeter Zijlstra 	rt_mutex_dequeue(lock, waiter);
913e0aad5b4SPeter Zijlstra 
914e0aad5b4SPeter Zijlstra 	/*
915e0aad5b4SPeter Zijlstra 	 * Update the waiter prio fields now that we're dequeued.
916e0aad5b4SPeter Zijlstra 	 *
917e0aad5b4SPeter Zijlstra 	 * These values can have changed through either:
918e0aad5b4SPeter Zijlstra 	 *
919e0aad5b4SPeter Zijlstra 	 *   sys_sched_set_scheduler() / sys_sched_setattr()
920e0aad5b4SPeter Zijlstra 	 *
921e0aad5b4SPeter Zijlstra 	 * or
922e0aad5b4SPeter Zijlstra 	 *
923e0aad5b4SPeter Zijlstra 	 *   DL CBS enforcement advancing the effective deadline.
924e0aad5b4SPeter Zijlstra 	 */
925715f7f9eSPeter Zijlstra 	waiter_update_prio(waiter, task);
926e0aad5b4SPeter Zijlstra 
927fb00aca4SPeter Zijlstra 	rt_mutex_enqueue(lock, waiter);
9281696a8beSPeter Zijlstra 
929*f7853c34SPeter Zijlstra 	/*
930*f7853c34SPeter Zijlstra 	 * [8] Release the (blocking) task in preparation for
931*f7853c34SPeter Zijlstra 	 * taking the owner task in [10].
932*f7853c34SPeter Zijlstra 	 *
933*f7853c34SPeter Zijlstra 	 * Since we hold lock->waiter_lock, task cannot unblock, even if we
934*f7853c34SPeter Zijlstra 	 * release task->pi_lock.
935*f7853c34SPeter Zijlstra 	 */
936b4abf910SThomas Gleixner 	raw_spin_unlock(&task->pi_lock);
9372ffa5a5cSThomas Gleixner 	put_task_struct(task);
9382ffa5a5cSThomas Gleixner 
939a57594a1SThomas Gleixner 	/*
9403eb65aeaSThomas Gleixner 	 * [9] check_exit_conditions_3 protected by lock->wait_lock.
9413eb65aeaSThomas Gleixner 	 *
942a57594a1SThomas Gleixner 	 * We must abort the chain walk if there is no lock owner even
943a57594a1SThomas Gleixner 	 * in the dead lock detection case, as we have nothing to
944a57594a1SThomas Gleixner 	 * follow here. This is the end of the chain we are walking.
945a57594a1SThomas Gleixner 	 */
9461696a8beSPeter Zijlstra 	if (!rt_mutex_owner(lock)) {
9471696a8beSPeter Zijlstra 		/*
9483eb65aeaSThomas Gleixner 		 * If the requeue [7] above changed the top waiter,
9493eb65aeaSThomas Gleixner 		 * then we need to wake the new top waiter up to try
9503eb65aeaSThomas Gleixner 		 * to get the lock.
9511696a8beSPeter Zijlstra 		 */
952db370a8bSWander Lairson Costa 		top_waiter = rt_mutex_top_waiter(lock);
953db370a8bSWander Lairson Costa 		if (prerequeue_top_waiter != top_waiter)
954db370a8bSWander Lairson Costa 			wake_up_state(top_waiter->task, top_waiter->wake_state);
955b4abf910SThomas Gleixner 		raw_spin_unlock_irq(&lock->wait_lock);
9562ffa5a5cSThomas Gleixner 		return 0;
9571696a8beSPeter Zijlstra 	}
9581696a8beSPeter Zijlstra 
959*f7853c34SPeter Zijlstra 	/*
960*f7853c34SPeter Zijlstra 	 * [10] Grab the next task, i.e. the owner of @lock
961*f7853c34SPeter Zijlstra 	 *
962*f7853c34SPeter Zijlstra 	 * Per holding lock->wait_lock and checking for !owner above, there
963*f7853c34SPeter Zijlstra 	 * must be an owner and it cannot go away.
964*f7853c34SPeter Zijlstra 	 */
9657b3c92b8SMatthew Wilcox (Oracle) 	task = get_task_struct(rt_mutex_owner(lock));
966b4abf910SThomas Gleixner 	raw_spin_lock(&task->pi_lock);
9671696a8beSPeter Zijlstra 
9683eb65aeaSThomas Gleixner 	/* [11] requeue the pi waiters if necessary */
9691696a8beSPeter Zijlstra 	if (waiter == rt_mutex_top_waiter(lock)) {
970a57594a1SThomas Gleixner 		/*
971a57594a1SThomas Gleixner 		 * The waiter became the new top (highest priority)
972a57594a1SThomas Gleixner 		 * waiter on the lock. Replace the previous top waiter
9739f40a51aSDavidlohr Bueso 		 * in the owner tasks pi waiters tree with this waiter
974a57594a1SThomas Gleixner 		 * and adjust the priority of the owner.
975a57594a1SThomas Gleixner 		 */
976a57594a1SThomas Gleixner 		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
977*f7853c34SPeter Zijlstra 		waiter_clone_prio(waiter, task);
978fb00aca4SPeter Zijlstra 		rt_mutex_enqueue_pi(task, waiter);
979*f7853c34SPeter Zijlstra 		rt_mutex_adjust_prio(lock, task);
9801696a8beSPeter Zijlstra 
981a57594a1SThomas Gleixner 	} else if (prerequeue_top_waiter == waiter) {
982a57594a1SThomas Gleixner 		/*
983a57594a1SThomas Gleixner 		 * The waiter was the top waiter on the lock, but is
984e2db7592SIngo Molnar 		 * no longer the top priority waiter. Replace waiter in
9859f40a51aSDavidlohr Bueso 		 * the owner tasks pi waiters tree with the new top
986a57594a1SThomas Gleixner 		 * (highest priority) waiter and adjust the priority
987a57594a1SThomas Gleixner 		 * of the owner.
988a57594a1SThomas Gleixner 		 * The new top waiter is stored in @waiter so that
989a57594a1SThomas Gleixner 		 * @waiter == @top_waiter evaluates to true below and
990a57594a1SThomas Gleixner 		 * we continue to deboost the rest of the chain.
991a57594a1SThomas Gleixner 		 */
992fb00aca4SPeter Zijlstra 		rt_mutex_dequeue_pi(task, waiter);
9931696a8beSPeter Zijlstra 		waiter = rt_mutex_top_waiter(lock);
994*f7853c34SPeter Zijlstra 		waiter_clone_prio(waiter, task);
995fb00aca4SPeter Zijlstra 		rt_mutex_enqueue_pi(task, waiter);
996*f7853c34SPeter Zijlstra 		rt_mutex_adjust_prio(lock, task);
997a57594a1SThomas Gleixner 	} else {
998a57594a1SThomas Gleixner 		/*
999a57594a1SThomas Gleixner 		 * Nothing changed. No need to do any priority
1000a57594a1SThomas Gleixner 		 * adjustment.
1001a57594a1SThomas Gleixner 		 */
10021696a8beSPeter Zijlstra 	}
10031696a8beSPeter Zijlstra 
100482084984SThomas Gleixner 	/*
10053eb65aeaSThomas Gleixner 	 * [12] check_exit_conditions_4() protected by task->pi_lock
10063eb65aeaSThomas Gleixner 	 * and lock->wait_lock. The actual decisions are made after we
10073eb65aeaSThomas Gleixner 	 * dropped the locks.
10083eb65aeaSThomas Gleixner 	 *
100982084984SThomas Gleixner 	 * Check whether the task which owns the current lock is pi
101082084984SThomas Gleixner 	 * blocked itself. If yes we store a pointer to the lock for
101182084984SThomas Gleixner 	 * the lock chain change detection above. After we dropped
101282084984SThomas Gleixner 	 * task->pi_lock next_lock cannot be dereferenced anymore.
101382084984SThomas Gleixner 	 */
101482084984SThomas Gleixner 	next_lock = task_blocked_on_lock(task);
1015a57594a1SThomas Gleixner 	/*
1016a57594a1SThomas Gleixner 	 * Store the top waiter of @lock for the end of chain walk
1017a57594a1SThomas Gleixner 	 * decision below.
1018a57594a1SThomas Gleixner 	 */
10191696a8beSPeter Zijlstra 	top_waiter = rt_mutex_top_waiter(lock);
10203eb65aeaSThomas Gleixner 
10213eb65aeaSThomas Gleixner 	/* [13] Drop the locks */
1022b4abf910SThomas Gleixner 	raw_spin_unlock(&task->pi_lock);
1023b4abf910SThomas Gleixner 	raw_spin_unlock_irq(&lock->wait_lock);
10241696a8beSPeter Zijlstra 
102582084984SThomas Gleixner 	/*
10263eb65aeaSThomas Gleixner 	 * Make the actual exit decisions [12], based on the stored
10273eb65aeaSThomas Gleixner 	 * values.
10283eb65aeaSThomas Gleixner 	 *
102982084984SThomas Gleixner 	 * We reached the end of the lock chain. Stop right here. No
103082084984SThomas Gleixner 	 * point to go back just to figure that out.
103182084984SThomas Gleixner 	 */
103282084984SThomas Gleixner 	if (!next_lock)
103382084984SThomas Gleixner 		goto out_put_task;
103482084984SThomas Gleixner 
1035a57594a1SThomas Gleixner 	/*
1036a57594a1SThomas Gleixner 	 * If the current waiter is not the top waiter on the lock,
1037a57594a1SThomas Gleixner 	 * then we can stop the chain walk here if we are not in full
1038a57594a1SThomas Gleixner 	 * deadlock detection mode.
1039a57594a1SThomas Gleixner 	 */
10401696a8beSPeter Zijlstra 	if (!detect_deadlock && waiter != top_waiter)
10411696a8beSPeter Zijlstra 		goto out_put_task;
10421696a8beSPeter Zijlstra 
10431696a8beSPeter Zijlstra 	goto again;
10441696a8beSPeter Zijlstra 
10451696a8beSPeter Zijlstra  out_unlock_pi:
1046b4abf910SThomas Gleixner 	raw_spin_unlock_irq(&task->pi_lock);
10471696a8beSPeter Zijlstra  out_put_task:
10481696a8beSPeter Zijlstra 	put_task_struct(task);
10491696a8beSPeter Zijlstra 
10501696a8beSPeter Zijlstra 	return ret;
10511696a8beSPeter Zijlstra }
10521696a8beSPeter Zijlstra 
10531696a8beSPeter Zijlstra /*
10541696a8beSPeter Zijlstra  * Try to take an rt-mutex
10551696a8beSPeter Zijlstra  *
1056b4abf910SThomas Gleixner  * Must be called with lock->wait_lock held and interrupts disabled
10571696a8beSPeter Zijlstra  *
1058358c331fSThomas Gleixner  * @lock:   The lock to be acquired.
1059358c331fSThomas Gleixner  * @task:   The task which wants to acquire the lock
10609f40a51aSDavidlohr Bueso  * @waiter: The waiter that is queued to the lock's wait tree if the
1061358c331fSThomas Gleixner  *	    callsite called task_blocked_on_lock(), otherwise NULL
10621696a8beSPeter Zijlstra  */
1063d7a2edb8SThomas Gleixner static int __sched
1064830e6accSPeter Zijlstra try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
10651696a8beSPeter Zijlstra 		     struct rt_mutex_waiter *waiter)
10661696a8beSPeter Zijlstra {
1067e0aad5b4SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
1068e0aad5b4SPeter Zijlstra 
10691696a8beSPeter Zijlstra 	/*
1070358c331fSThomas Gleixner 	 * Before testing whether we can acquire @lock, we set the
1071358c331fSThomas Gleixner 	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
1072358c331fSThomas Gleixner 	 * other tasks which try to modify @lock into the slow path
1073358c331fSThomas Gleixner 	 * and they serialize on @lock->wait_lock.
10741696a8beSPeter Zijlstra 	 *
1075358c331fSThomas Gleixner 	 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
1076358c331fSThomas Gleixner 	 * as explained at the top of this file if and only if:
10771696a8beSPeter Zijlstra 	 *
1078358c331fSThomas Gleixner 	 * - There is a lock owner. The caller must fixup the
1079358c331fSThomas Gleixner 	 *   transient state if it does a trylock or leaves the lock
1080358c331fSThomas Gleixner 	 *   function due to a signal or timeout.
1081358c331fSThomas Gleixner 	 *
1082358c331fSThomas Gleixner 	 * - @task acquires the lock and there are no other
1083358c331fSThomas Gleixner 	 *   waiters. This is undone in rt_mutex_set_owner(@task) at
1084358c331fSThomas Gleixner 	 *   the end of this function.
10851696a8beSPeter Zijlstra 	 */
10861696a8beSPeter Zijlstra 	mark_rt_mutex_waiters(lock);
10871696a8beSPeter Zijlstra 
1088358c331fSThomas Gleixner 	/*
1089358c331fSThomas Gleixner 	 * If @lock has an owner, give up.
1090358c331fSThomas Gleixner 	 */
10911696a8beSPeter Zijlstra 	if (rt_mutex_owner(lock))
10921696a8beSPeter Zijlstra 		return 0;
10931696a8beSPeter Zijlstra 
10941696a8beSPeter Zijlstra 	/*
1095358c331fSThomas Gleixner 	 * If @waiter != NULL, @task has already enqueued the waiter
10969f40a51aSDavidlohr Bueso 	 * into @lock waiter tree. If @waiter == NULL then this is a
1097358c331fSThomas Gleixner 	 * trylock attempt.
1098358c331fSThomas Gleixner 	 */
1099358c331fSThomas Gleixner 	if (waiter) {
110048eb3f4fSGregory Haskins 		struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1101358c331fSThomas Gleixner 
1102358c331fSThomas Gleixner 		/*
110348eb3f4fSGregory Haskins 		 * If waiter is the highest priority waiter of @lock,
110448eb3f4fSGregory Haskins 		 * or allowed to steal it, take it over.
110548eb3f4fSGregory Haskins 		 */
110648eb3f4fSGregory Haskins 		if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
110748eb3f4fSGregory Haskins 			/*
1108358c331fSThomas Gleixner 			 * We can acquire the lock. Remove the waiter from the
11099f40a51aSDavidlohr Bueso 			 * lock waiters tree.
1110358c331fSThomas Gleixner 			 */
1111358c331fSThomas Gleixner 			rt_mutex_dequeue(lock, waiter);
111248eb3f4fSGregory Haskins 		} else {
111348eb3f4fSGregory Haskins 			return 0;
111448eb3f4fSGregory Haskins 		}
1115358c331fSThomas Gleixner 	} else {
1116358c331fSThomas Gleixner 		/*
1117358c331fSThomas Gleixner 		 * If the lock has waiters already we check whether @task is
1118358c331fSThomas Gleixner 		 * eligible to take over the lock.
1119358c331fSThomas Gleixner 		 *
1120358c331fSThomas Gleixner 		 * If there are no other waiters, @task can acquire
1121358c331fSThomas Gleixner 		 * the lock.  @task->pi_blocked_on is NULL, so it does
1122358c331fSThomas Gleixner 		 * not need to be dequeued.
11231696a8beSPeter Zijlstra 		 */
11241696a8beSPeter Zijlstra 		if (rt_mutex_has_waiters(lock)) {
112548eb3f4fSGregory Haskins 			/* Check whether the trylock can steal it. */
112648eb3f4fSGregory Haskins 			if (!rt_mutex_steal(task_to_waiter(task),
112719830e55SPeter Zijlstra 					    rt_mutex_top_waiter(lock)))
11281696a8beSPeter Zijlstra 				return 0;
1129358c331fSThomas Gleixner 
1130358c331fSThomas Gleixner 			/*
1131358c331fSThomas Gleixner 			 * The current top waiter stays enqueued. We
1132358c331fSThomas Gleixner 			 * don't have to change anything in the lock
1133358c331fSThomas Gleixner 			 * waiters order.
1134358c331fSThomas Gleixner 			 */
1135358c331fSThomas Gleixner 		} else {
1136358c331fSThomas Gleixner 			/*
1137358c331fSThomas Gleixner 			 * No waiters. Take the lock without the
1138358c331fSThomas Gleixner 			 * pi_lock dance.@task->pi_blocked_on is NULL
1139358c331fSThomas Gleixner 			 * and we have no waiters to enqueue in @task
11409f40a51aSDavidlohr Bueso 			 * pi waiters tree.
1141358c331fSThomas Gleixner 			 */
1142358c331fSThomas Gleixner 			goto takeit;
11431696a8beSPeter Zijlstra 		}
11441696a8beSPeter Zijlstra 	}
11451696a8beSPeter Zijlstra 
11461696a8beSPeter Zijlstra 	/*
1147358c331fSThomas Gleixner 	 * Clear @task->pi_blocked_on. Requires protection by
1148358c331fSThomas Gleixner 	 * @task->pi_lock. Redundant operation for the @waiter == NULL
1149358c331fSThomas Gleixner 	 * case, but conditionals are more expensive than a redundant
1150358c331fSThomas Gleixner 	 * store.
11511696a8beSPeter Zijlstra 	 */
1152b4abf910SThomas Gleixner 	raw_spin_lock(&task->pi_lock);
1153358c331fSThomas Gleixner 	task->pi_blocked_on = NULL;
1154358c331fSThomas Gleixner 	/*
1155358c331fSThomas Gleixner 	 * Finish the lock acquisition. @task is the new owner. If
1156358c331fSThomas Gleixner 	 * other waiters exist we have to insert the highest priority
11579f40a51aSDavidlohr Bueso 	 * waiter into @task->pi_waiters tree.
1158358c331fSThomas Gleixner 	 */
1159358c331fSThomas Gleixner 	if (rt_mutex_has_waiters(lock))
1160358c331fSThomas Gleixner 		rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
1161b4abf910SThomas Gleixner 	raw_spin_unlock(&task->pi_lock);
11621696a8beSPeter Zijlstra 
1163358c331fSThomas Gleixner takeit:
1164358c331fSThomas Gleixner 	/*
1165358c331fSThomas Gleixner 	 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1166358c331fSThomas Gleixner 	 * are still waiters or clears it.
1167358c331fSThomas Gleixner 	 */
11681696a8beSPeter Zijlstra 	rt_mutex_set_owner(lock, task);
11691696a8beSPeter Zijlstra 
11701696a8beSPeter Zijlstra 	return 1;
11711696a8beSPeter Zijlstra }
11721696a8beSPeter Zijlstra 
11731696a8beSPeter Zijlstra /*
11741696a8beSPeter Zijlstra  * Task blocks on lock.
11751696a8beSPeter Zijlstra  *
11761696a8beSPeter Zijlstra  * Prepare waiter and propagate pi chain
11771696a8beSPeter Zijlstra  *
1178b4abf910SThomas Gleixner  * This must be called with lock->wait_lock held and interrupts disabled
11791696a8beSPeter Zijlstra  */
1180830e6accSPeter Zijlstra static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
11811696a8beSPeter Zijlstra 					   struct rt_mutex_waiter *waiter,
11821696a8beSPeter Zijlstra 					   struct task_struct *task,
1183add46132SPeter Zijlstra 					   struct ww_acquire_ctx *ww_ctx,
11848930ed80SThomas Gleixner 					   enum rtmutex_chainwalk chwalk)
11851696a8beSPeter Zijlstra {
11861696a8beSPeter Zijlstra 	struct task_struct *owner = rt_mutex_owner(lock);
11871696a8beSPeter Zijlstra 	struct rt_mutex_waiter *top_waiter = waiter;
1188830e6accSPeter Zijlstra 	struct rt_mutex_base *next_lock;
11891696a8beSPeter Zijlstra 	int chain_walk = 0, res;
11901696a8beSPeter Zijlstra 
1191e0aad5b4SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
1192e0aad5b4SPeter Zijlstra 
1193397335f0SThomas Gleixner 	/*
1194397335f0SThomas Gleixner 	 * Early deadlock detection. We really don't want the task to
1195397335f0SThomas Gleixner 	 * enqueue on itself just to untangle the mess later. It's not
1196397335f0SThomas Gleixner 	 * only an optimization. We drop the locks, so another waiter
1197397335f0SThomas Gleixner 	 * can come in before the chain walk detects the deadlock. So
1198397335f0SThomas Gleixner 	 * the other will detect the deadlock and return -EDEADLOCK,
1199397335f0SThomas Gleixner 	 * which is wrong, as the other waiter is not in a deadlock
1200397335f0SThomas Gleixner 	 * situation.
120102ea9fc9SPeter Zijlstra 	 *
120202ea9fc9SPeter Zijlstra 	 * Except for ww_mutex, in that case the chain walk must already deal
120302ea9fc9SPeter Zijlstra 	 * with spurious cycles, see the comments at [3] and [6].
1204397335f0SThomas Gleixner 	 */
120502ea9fc9SPeter Zijlstra 	if (owner == task && !(build_ww_mutex() && ww_ctx))
1206397335f0SThomas Gleixner 		return -EDEADLK;
1207397335f0SThomas Gleixner 
1208b4abf910SThomas Gleixner 	raw_spin_lock(&task->pi_lock);
12091696a8beSPeter Zijlstra 	waiter->task = task;
12101696a8beSPeter Zijlstra 	waiter->lock = lock;
1211715f7f9eSPeter Zijlstra 	waiter_update_prio(waiter, task);
1212*f7853c34SPeter Zijlstra 	waiter_clone_prio(waiter, task);
12131696a8beSPeter Zijlstra 
12141696a8beSPeter Zijlstra 	/* Get the top priority waiter on the lock */
12151696a8beSPeter Zijlstra 	if (rt_mutex_has_waiters(lock))
12161696a8beSPeter Zijlstra 		top_waiter = rt_mutex_top_waiter(lock);
1217fb00aca4SPeter Zijlstra 	rt_mutex_enqueue(lock, waiter);
12181696a8beSPeter Zijlstra 
12191696a8beSPeter Zijlstra 	task->pi_blocked_on = waiter;
12201696a8beSPeter Zijlstra 
1221b4abf910SThomas Gleixner 	raw_spin_unlock(&task->pi_lock);
12221696a8beSPeter Zijlstra 
1223add46132SPeter Zijlstra 	if (build_ww_mutex() && ww_ctx) {
1224add46132SPeter Zijlstra 		struct rt_mutex *rtm;
1225add46132SPeter Zijlstra 
1226add46132SPeter Zijlstra 		/* Check whether the waiter should back out immediately */
1227add46132SPeter Zijlstra 		rtm = container_of(lock, struct rt_mutex, rtmutex);
1228add46132SPeter Zijlstra 		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
122937e8abffSThomas Gleixner 		if (res) {
123037e8abffSThomas Gleixner 			raw_spin_lock(&task->pi_lock);
123137e8abffSThomas Gleixner 			rt_mutex_dequeue(lock, waiter);
123237e8abffSThomas Gleixner 			task->pi_blocked_on = NULL;
123337e8abffSThomas Gleixner 			raw_spin_unlock(&task->pi_lock);
1234add46132SPeter Zijlstra 			return res;
1235add46132SPeter Zijlstra 		}
123637e8abffSThomas Gleixner 	}
1237add46132SPeter Zijlstra 
12381696a8beSPeter Zijlstra 	if (!owner)
12391696a8beSPeter Zijlstra 		return 0;
12401696a8beSPeter Zijlstra 
1241b4abf910SThomas Gleixner 	raw_spin_lock(&owner->pi_lock);
124282084984SThomas Gleixner 	if (waiter == rt_mutex_top_waiter(lock)) {
1243fb00aca4SPeter Zijlstra 		rt_mutex_dequeue_pi(owner, top_waiter);
1244fb00aca4SPeter Zijlstra 		rt_mutex_enqueue_pi(owner, waiter);
12451696a8beSPeter Zijlstra 
1246*f7853c34SPeter Zijlstra 		rt_mutex_adjust_prio(lock, owner);
12471696a8beSPeter Zijlstra 		if (owner->pi_blocked_on)
12481696a8beSPeter Zijlstra 			chain_walk = 1;
12498930ed80SThomas Gleixner 	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
12501696a8beSPeter Zijlstra 		chain_walk = 1;
125182084984SThomas Gleixner 	}
12521696a8beSPeter Zijlstra 
125382084984SThomas Gleixner 	/* Store the lock on which owner is blocked or NULL */
125482084984SThomas Gleixner 	next_lock = task_blocked_on_lock(owner);
125582084984SThomas Gleixner 
1256b4abf910SThomas Gleixner 	raw_spin_unlock(&owner->pi_lock);
125782084984SThomas Gleixner 	/*
125882084984SThomas Gleixner 	 * Even if full deadlock detection is on, if the owner is not
125982084984SThomas Gleixner 	 * blocked itself, we can avoid finding this out in the chain
126082084984SThomas Gleixner 	 * walk.
126182084984SThomas Gleixner 	 */
126282084984SThomas Gleixner 	if (!chain_walk || !next_lock)
12631696a8beSPeter Zijlstra 		return 0;
12641696a8beSPeter Zijlstra 
12651696a8beSPeter Zijlstra 	/*
12661696a8beSPeter Zijlstra 	 * The owner can't disappear while holding a lock,
12671696a8beSPeter Zijlstra 	 * so the owner struct is protected by wait_lock.
12681696a8beSPeter Zijlstra 	 * Gets dropped in rt_mutex_adjust_prio_chain()!
12691696a8beSPeter Zijlstra 	 */
12701696a8beSPeter Zijlstra 	get_task_struct(owner);
12711696a8beSPeter Zijlstra 
1272b4abf910SThomas Gleixner 	raw_spin_unlock_irq(&lock->wait_lock);
12731696a8beSPeter Zijlstra 
12748930ed80SThomas Gleixner 	res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
127582084984SThomas Gleixner 					 next_lock, waiter, task);
12761696a8beSPeter Zijlstra 
1277b4abf910SThomas Gleixner 	raw_spin_lock_irq(&lock->wait_lock);
12781696a8beSPeter Zijlstra 
12791696a8beSPeter Zijlstra 	return res;
12801696a8beSPeter Zijlstra }
12811696a8beSPeter Zijlstra 
12821696a8beSPeter Zijlstra /*
12839f40a51aSDavidlohr Bueso  * Remove the top waiter from the current tasks pi waiter tree and
128445ab4effSDavidlohr Bueso  * queue it up.
12851696a8beSPeter Zijlstra  *
1286b4abf910SThomas Gleixner  * Called with lock->wait_lock held and interrupts disabled.
12871696a8beSPeter Zijlstra  */
12887980aa39SThomas Gleixner static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1289830e6accSPeter Zijlstra 					    struct rt_mutex_base *lock)
12901696a8beSPeter Zijlstra {
12911696a8beSPeter Zijlstra 	struct rt_mutex_waiter *waiter;
12921696a8beSPeter Zijlstra 
1293*f7853c34SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
1294*f7853c34SPeter Zijlstra 
1295b4abf910SThomas Gleixner 	raw_spin_lock(&current->pi_lock);
12961696a8beSPeter Zijlstra 
12971696a8beSPeter Zijlstra 	waiter = rt_mutex_top_waiter(lock);
12981696a8beSPeter Zijlstra 
12991696a8beSPeter Zijlstra 	/*
1300acd58620SPeter Zijlstra 	 * Remove it from current->pi_waiters and deboost.
1301acd58620SPeter Zijlstra 	 *
1302acd58620SPeter Zijlstra 	 * We must in fact deboost here in order to ensure we call
1303acd58620SPeter Zijlstra 	 * rt_mutex_setprio() to update p->pi_top_task before the
1304acd58620SPeter Zijlstra 	 * task unblocks.
13051696a8beSPeter Zijlstra 	 */
1306fb00aca4SPeter Zijlstra 	rt_mutex_dequeue_pi(current, waiter);
1307*f7853c34SPeter Zijlstra 	rt_mutex_adjust_prio(lock, current);
13081696a8beSPeter Zijlstra 
130927e35715SThomas Gleixner 	/*
131027e35715SThomas Gleixner 	 * As we are waking up the top waiter, and the waiter stays
131127e35715SThomas Gleixner 	 * queued on the lock until it gets the lock, this lock
131227e35715SThomas Gleixner 	 * obviously has waiters. Just set the bit here and this has
131327e35715SThomas Gleixner 	 * the added benefit of forcing all new tasks into the
131427e35715SThomas Gleixner 	 * slow path making sure no task of lower priority than
131527e35715SThomas Gleixner 	 * the top waiter can steal this lock.
131627e35715SThomas Gleixner 	 */
131727e35715SThomas Gleixner 	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
13181696a8beSPeter Zijlstra 
1319acd58620SPeter Zijlstra 	/*
1320acd58620SPeter Zijlstra 	 * We deboosted before waking the top waiter task such that we don't
1321acd58620SPeter Zijlstra 	 * run two tasks with the 'same' priority (and ensure the
1322acd58620SPeter Zijlstra 	 * p->pi_top_task pointer points to a blocked task). This however can
1323acd58620SPeter Zijlstra 	 * lead to priority inversion if we would get preempted after the
1324acd58620SPeter Zijlstra 	 * deboost but before waking our donor task, hence the preempt_disable()
1325acd58620SPeter Zijlstra 	 * before unlock.
1326acd58620SPeter Zijlstra 	 *
13277980aa39SThomas Gleixner 	 * Pairs with preempt_enable() in rt_mutex_wake_up_q();
1328acd58620SPeter Zijlstra 	 */
1329acd58620SPeter Zijlstra 	preempt_disable();
13307980aa39SThomas Gleixner 	rt_mutex_wake_q_add(wqh, waiter);
1331acd58620SPeter Zijlstra 	raw_spin_unlock(&current->pi_lock);
13321696a8beSPeter Zijlstra }
13331696a8beSPeter Zijlstra 
1334e17ba59bSThomas Gleixner static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1335e17ba59bSThomas Gleixner {
1336e17ba59bSThomas Gleixner 	int ret = try_to_take_rt_mutex(lock, current, NULL);
1337e17ba59bSThomas Gleixner 
1338e17ba59bSThomas Gleixner 	/*
1339e17ba59bSThomas Gleixner 	 * try_to_take_rt_mutex() sets the lock waiters bit
1340e17ba59bSThomas Gleixner 	 * unconditionally. Clean this up.
1341e17ba59bSThomas Gleixner 	 */
13421c0908d8SMel Gorman 	fixup_rt_mutex_waiters(lock, true);
1343e17ba59bSThomas Gleixner 
1344e17ba59bSThomas Gleixner 	return ret;
1345e17ba59bSThomas Gleixner }
1346e17ba59bSThomas Gleixner 
1347e17ba59bSThomas Gleixner /*
1348e17ba59bSThomas Gleixner  * Slow path try-lock function:
1349e17ba59bSThomas Gleixner  */
1350e17ba59bSThomas Gleixner static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1351e17ba59bSThomas Gleixner {
1352e17ba59bSThomas Gleixner 	unsigned long flags;
1353e17ba59bSThomas Gleixner 	int ret;
1354e17ba59bSThomas Gleixner 
1355e17ba59bSThomas Gleixner 	/*
1356e17ba59bSThomas Gleixner 	 * If the lock already has an owner we fail to get the lock.
1357e17ba59bSThomas Gleixner 	 * This can be done without taking the @lock->wait_lock as
1358e17ba59bSThomas Gleixner 	 * it is only being read, and this is a trylock anyway.
1359e17ba59bSThomas Gleixner 	 */
1360e17ba59bSThomas Gleixner 	if (rt_mutex_owner(lock))
1361e17ba59bSThomas Gleixner 		return 0;
1362e17ba59bSThomas Gleixner 
1363e17ba59bSThomas Gleixner 	/*
1364e17ba59bSThomas Gleixner 	 * The mutex has currently no owner. Lock the wait lock and try to
1365e17ba59bSThomas Gleixner 	 * acquire the lock. We use irqsave here to support early boot calls.
1366e17ba59bSThomas Gleixner 	 */
1367e17ba59bSThomas Gleixner 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1368e17ba59bSThomas Gleixner 
1369e17ba59bSThomas Gleixner 	ret = __rt_mutex_slowtrylock(lock);
1370e17ba59bSThomas Gleixner 
1371e17ba59bSThomas Gleixner 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1372e17ba59bSThomas Gleixner 
1373e17ba59bSThomas Gleixner 	return ret;
1374e17ba59bSThomas Gleixner }
1375e17ba59bSThomas Gleixner 
1376e17ba59bSThomas Gleixner static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1377e17ba59bSThomas Gleixner {
1378e17ba59bSThomas Gleixner 	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1379e17ba59bSThomas Gleixner 		return 1;
1380e17ba59bSThomas Gleixner 
1381e17ba59bSThomas Gleixner 	return rt_mutex_slowtrylock(lock);
1382e17ba59bSThomas Gleixner }
1383e17ba59bSThomas Gleixner 
1384e17ba59bSThomas Gleixner /*
1385e17ba59bSThomas Gleixner  * Slow path to release a rt-mutex.
1386e17ba59bSThomas Gleixner  */
1387e17ba59bSThomas Gleixner static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1388e17ba59bSThomas Gleixner {
1389e17ba59bSThomas Gleixner 	DEFINE_RT_WAKE_Q(wqh);
1390e17ba59bSThomas Gleixner 	unsigned long flags;
1391e17ba59bSThomas Gleixner 
1392e17ba59bSThomas Gleixner 	/* irqsave required to support early boot calls */
1393e17ba59bSThomas Gleixner 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1394e17ba59bSThomas Gleixner 
1395e17ba59bSThomas Gleixner 	debug_rt_mutex_unlock(lock);
1396e17ba59bSThomas Gleixner 
1397e17ba59bSThomas Gleixner 	/*
1398e17ba59bSThomas Gleixner 	 * We must be careful here if the fast path is enabled. If we
1399e17ba59bSThomas Gleixner 	 * have no waiters queued we cannot set owner to NULL here
1400e17ba59bSThomas Gleixner 	 * because of:
1401e17ba59bSThomas Gleixner 	 *
1402e17ba59bSThomas Gleixner 	 * foo->lock->owner = NULL;
1403e17ba59bSThomas Gleixner 	 *			rtmutex_lock(foo->lock);   <- fast path
1404e17ba59bSThomas Gleixner 	 *			free = atomic_dec_and_test(foo->refcnt);
1405e17ba59bSThomas Gleixner 	 *			rtmutex_unlock(foo->lock); <- fast path
1406e17ba59bSThomas Gleixner 	 *			if (free)
1407e17ba59bSThomas Gleixner 	 *				kfree(foo);
1408e17ba59bSThomas Gleixner 	 * raw_spin_unlock(foo->lock->wait_lock);
1409e17ba59bSThomas Gleixner 	 *
1410e17ba59bSThomas Gleixner 	 * So for the fastpath enabled kernel:
1411e17ba59bSThomas Gleixner 	 *
1412e17ba59bSThomas Gleixner 	 * Nothing can set the waiters bit as long as we hold
1413e17ba59bSThomas Gleixner 	 * lock->wait_lock. So we do the following sequence:
1414e17ba59bSThomas Gleixner 	 *
1415e17ba59bSThomas Gleixner 	 *	owner = rt_mutex_owner(lock);
1416e17ba59bSThomas Gleixner 	 *	clear_rt_mutex_waiters(lock);
1417e17ba59bSThomas Gleixner 	 *	raw_spin_unlock(&lock->wait_lock);
1418e17ba59bSThomas Gleixner 	 *	if (cmpxchg(&lock->owner, owner, 0) == owner)
1419e17ba59bSThomas Gleixner 	 *		return;
1420e17ba59bSThomas Gleixner 	 *	goto retry;
1421e17ba59bSThomas Gleixner 	 *
1422e17ba59bSThomas Gleixner 	 * The fastpath disabled variant is simple as all access to
1423e17ba59bSThomas Gleixner 	 * lock->owner is serialized by lock->wait_lock:
1424e17ba59bSThomas Gleixner 	 *
1425e17ba59bSThomas Gleixner 	 *	lock->owner = NULL;
1426e17ba59bSThomas Gleixner 	 *	raw_spin_unlock(&lock->wait_lock);
1427e17ba59bSThomas Gleixner 	 */
1428e17ba59bSThomas Gleixner 	while (!rt_mutex_has_waiters(lock)) {
1429e17ba59bSThomas Gleixner 		/* Drops lock->wait_lock ! */
1430e17ba59bSThomas Gleixner 		if (unlock_rt_mutex_safe(lock, flags) == true)
1431e17ba59bSThomas Gleixner 			return;
1432e17ba59bSThomas Gleixner 		/* Relock the rtmutex and try again */
1433e17ba59bSThomas Gleixner 		raw_spin_lock_irqsave(&lock->wait_lock, flags);
1434e17ba59bSThomas Gleixner 	}
1435e17ba59bSThomas Gleixner 
1436e17ba59bSThomas Gleixner 	/*
1437e17ba59bSThomas Gleixner 	 * The wakeup next waiter path does not suffer from the above
1438e17ba59bSThomas Gleixner 	 * race. See the comments there.
1439e17ba59bSThomas Gleixner 	 *
1440e17ba59bSThomas Gleixner 	 * Queue the next waiter for wakeup once we release the wait_lock.
1441e17ba59bSThomas Gleixner 	 */
1442e17ba59bSThomas Gleixner 	mark_wakeup_next_waiter(&wqh, lock);
1443e17ba59bSThomas Gleixner 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1444e17ba59bSThomas Gleixner 
1445e17ba59bSThomas Gleixner 	rt_mutex_wake_up_q(&wqh);
1446e17ba59bSThomas Gleixner }
1447e17ba59bSThomas Gleixner 
1448e17ba59bSThomas Gleixner static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1449e17ba59bSThomas Gleixner {
1450e17ba59bSThomas Gleixner 	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1451e17ba59bSThomas Gleixner 		return;
1452e17ba59bSThomas Gleixner 
1453e17ba59bSThomas Gleixner 	rt_mutex_slowunlock(lock);
1454e17ba59bSThomas Gleixner }
1455e17ba59bSThomas Gleixner 
1456992caf7fSSteven Rostedt #ifdef CONFIG_SMP
1457992caf7fSSteven Rostedt static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1458992caf7fSSteven Rostedt 				  struct rt_mutex_waiter *waiter,
1459992caf7fSSteven Rostedt 				  struct task_struct *owner)
1460992caf7fSSteven Rostedt {
1461992caf7fSSteven Rostedt 	bool res = true;
1462992caf7fSSteven Rostedt 
1463992caf7fSSteven Rostedt 	rcu_read_lock();
1464992caf7fSSteven Rostedt 	for (;;) {
1465992caf7fSSteven Rostedt 		/* If owner changed, trylock again. */
1466992caf7fSSteven Rostedt 		if (owner != rt_mutex_owner(lock))
1467992caf7fSSteven Rostedt 			break;
1468992caf7fSSteven Rostedt 		/*
1469992caf7fSSteven Rostedt 		 * Ensure that @owner is dereferenced after checking that
1470992caf7fSSteven Rostedt 		 * the lock owner still matches @owner. If that fails,
1471992caf7fSSteven Rostedt 		 * @owner might point to freed memory. If it still matches,
1472992caf7fSSteven Rostedt 		 * the rcu_read_lock() ensures the memory stays valid.
1473992caf7fSSteven Rostedt 		 */
1474992caf7fSSteven Rostedt 		barrier();
1475992caf7fSSteven Rostedt 		/*
1476992caf7fSSteven Rostedt 		 * Stop spinning when:
1477992caf7fSSteven Rostedt 		 *  - the lock owner has been scheduled out
1478992caf7fSSteven Rostedt 		 *  - current is not longer the top waiter
1479992caf7fSSteven Rostedt 		 *  - current is requested to reschedule (redundant
1480992caf7fSSteven Rostedt 		 *    for CONFIG_PREEMPT_RCU=y)
1481992caf7fSSteven Rostedt 		 *  - the VCPU on which owner runs is preempted
1482992caf7fSSteven Rostedt 		 */
1483c0bed69dSKefeng Wang 		if (!owner_on_cpu(owner) || need_resched() ||
1484f16cc980SThomas Gleixner 		    !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
1485992caf7fSSteven Rostedt 			res = false;
1486992caf7fSSteven Rostedt 			break;
1487992caf7fSSteven Rostedt 		}
1488992caf7fSSteven Rostedt 		cpu_relax();
1489992caf7fSSteven Rostedt 	}
1490992caf7fSSteven Rostedt 	rcu_read_unlock();
1491992caf7fSSteven Rostedt 	return res;
1492992caf7fSSteven Rostedt }
1493992caf7fSSteven Rostedt #else
1494992caf7fSSteven Rostedt static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1495992caf7fSSteven Rostedt 				  struct rt_mutex_waiter *waiter,
1496992caf7fSSteven Rostedt 				  struct task_struct *owner)
1497992caf7fSSteven Rostedt {
1498992caf7fSSteven Rostedt 	return false;
1499992caf7fSSteven Rostedt }
1500992caf7fSSteven Rostedt #endif
1501992caf7fSSteven Rostedt 
1502e17ba59bSThomas Gleixner #ifdef RT_MUTEX_BUILD_MUTEX
1503e17ba59bSThomas Gleixner /*
1504e17ba59bSThomas Gleixner  * Functions required for:
1505e17ba59bSThomas Gleixner  *	- rtmutex, futex on all kernels
1506e17ba59bSThomas Gleixner  *	- mutex and rwsem substitutions on RT kernels
1507e17ba59bSThomas Gleixner  */
1508e17ba59bSThomas Gleixner 
15091696a8beSPeter Zijlstra /*
15101696a8beSPeter Zijlstra  * Remove a waiter from a lock and give up
15111696a8beSPeter Zijlstra  *
1512e17ba59bSThomas Gleixner  * Must be called with lock->wait_lock held and interrupts disabled. It must
15131696a8beSPeter Zijlstra  * have just failed to try_to_take_rt_mutex().
15141696a8beSPeter Zijlstra  */
1515830e6accSPeter Zijlstra static void __sched remove_waiter(struct rt_mutex_base *lock,
15161696a8beSPeter Zijlstra 				  struct rt_mutex_waiter *waiter)
15171696a8beSPeter Zijlstra {
15181ca7b860SThomas Gleixner 	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
15191696a8beSPeter Zijlstra 	struct task_struct *owner = rt_mutex_owner(lock);
1520830e6accSPeter Zijlstra 	struct rt_mutex_base *next_lock;
15211696a8beSPeter Zijlstra 
1522e0aad5b4SPeter Zijlstra 	lockdep_assert_held(&lock->wait_lock);
1523e0aad5b4SPeter Zijlstra 
1524b4abf910SThomas Gleixner 	raw_spin_lock(&current->pi_lock);
1525fb00aca4SPeter Zijlstra 	rt_mutex_dequeue(lock, waiter);
15261696a8beSPeter Zijlstra 	current->pi_blocked_on = NULL;
1527b4abf910SThomas Gleixner 	raw_spin_unlock(&current->pi_lock);
15281696a8beSPeter Zijlstra 
15291ca7b860SThomas Gleixner 	/*
15301ca7b860SThomas Gleixner 	 * Only update priority if the waiter was the highest priority
15311ca7b860SThomas Gleixner 	 * waiter of the lock and there is an owner to update.
15321ca7b860SThomas Gleixner 	 */
15331ca7b860SThomas Gleixner 	if (!owner || !is_top_waiter)
15341696a8beSPeter Zijlstra 		return;
15351696a8beSPeter Zijlstra 
1536b4abf910SThomas Gleixner 	raw_spin_lock(&owner->pi_lock);
15371696a8beSPeter Zijlstra 
1538fb00aca4SPeter Zijlstra 	rt_mutex_dequeue_pi(owner, waiter);
15391696a8beSPeter Zijlstra 
15401ca7b860SThomas Gleixner 	if (rt_mutex_has_waiters(lock))
15411ca7b860SThomas Gleixner 		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
15421696a8beSPeter Zijlstra 
1543*f7853c34SPeter Zijlstra 	rt_mutex_adjust_prio(lock, owner);
15441696a8beSPeter Zijlstra 
154582084984SThomas Gleixner 	/* Store the lock on which owner is blocked or NULL */
154682084984SThomas Gleixner 	next_lock = task_blocked_on_lock(owner);
15471696a8beSPeter Zijlstra 
1548b4abf910SThomas Gleixner 	raw_spin_unlock(&owner->pi_lock);
15491696a8beSPeter Zijlstra 
15501ca7b860SThomas Gleixner 	/*
15511ca7b860SThomas Gleixner 	 * Don't walk the chain, if the owner task is not blocked
15521ca7b860SThomas Gleixner 	 * itself.
15531ca7b860SThomas Gleixner 	 */
155482084984SThomas Gleixner 	if (!next_lock)
15551696a8beSPeter Zijlstra 		return;
15561696a8beSPeter Zijlstra 
15571696a8beSPeter Zijlstra 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
15581696a8beSPeter Zijlstra 	get_task_struct(owner);
15591696a8beSPeter Zijlstra 
1560b4abf910SThomas Gleixner 	raw_spin_unlock_irq(&lock->wait_lock);
15611696a8beSPeter Zijlstra 
15628930ed80SThomas Gleixner 	rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
15638930ed80SThomas Gleixner 				   next_lock, NULL, current);
15641696a8beSPeter Zijlstra 
1565b4abf910SThomas Gleixner 	raw_spin_lock_irq(&lock->wait_lock);
15661696a8beSPeter Zijlstra }
15671696a8beSPeter Zijlstra 
15681696a8beSPeter Zijlstra /**
1569ebbdc41eSThomas Gleixner  * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
15701696a8beSPeter Zijlstra  * @lock:		 the rt_mutex to take
1571add46132SPeter Zijlstra  * @ww_ctx:		 WW mutex context pointer
15721696a8beSPeter Zijlstra  * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
15731696a8beSPeter Zijlstra  *			 or TASK_UNINTERRUPTIBLE)
15741696a8beSPeter Zijlstra  * @timeout:		 the pre-initialized and started timer, or NULL for none
15751696a8beSPeter Zijlstra  * @waiter:		 the pre-initialized rt_mutex_waiter
15761696a8beSPeter Zijlstra  *
1577b4abf910SThomas Gleixner  * Must be called with lock->wait_lock held and interrupts disabled
15781696a8beSPeter Zijlstra  */
1579ebbdc41eSThomas Gleixner static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1580add46132SPeter Zijlstra 					   struct ww_acquire_ctx *ww_ctx,
1581830e6accSPeter Zijlstra 					   unsigned int state,
15821696a8beSPeter Zijlstra 					   struct hrtimer_sleeper *timeout,
15831696a8beSPeter Zijlstra 					   struct rt_mutex_waiter *waiter)
15841696a8beSPeter Zijlstra {
1585add46132SPeter Zijlstra 	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1586992caf7fSSteven Rostedt 	struct task_struct *owner;
15871696a8beSPeter Zijlstra 	int ret = 0;
15881696a8beSPeter Zijlstra 
15891696a8beSPeter Zijlstra 	for (;;) {
15901696a8beSPeter Zijlstra 		/* Try to acquire the lock: */
15911696a8beSPeter Zijlstra 		if (try_to_take_rt_mutex(lock, current, waiter))
15921696a8beSPeter Zijlstra 			break;
15931696a8beSPeter Zijlstra 
1594a51a327fSThomas Gleixner 		if (timeout && !timeout->task) {
15951696a8beSPeter Zijlstra 			ret = -ETIMEDOUT;
1596a51a327fSThomas Gleixner 			break;
1597a51a327fSThomas Gleixner 		}
1598a51a327fSThomas Gleixner 		if (signal_pending_state(state, current)) {
1599a51a327fSThomas Gleixner 			ret = -EINTR;
16001696a8beSPeter Zijlstra 			break;
16011696a8beSPeter Zijlstra 		}
16021696a8beSPeter Zijlstra 
1603add46132SPeter Zijlstra 		if (build_ww_mutex() && ww_ctx) {
1604add46132SPeter Zijlstra 			ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1605add46132SPeter Zijlstra 			if (ret)
1606add46132SPeter Zijlstra 				break;
1607add46132SPeter Zijlstra 		}
1608add46132SPeter Zijlstra 
1609992caf7fSSteven Rostedt 		if (waiter == rt_mutex_top_waiter(lock))
1610992caf7fSSteven Rostedt 			owner = rt_mutex_owner(lock);
1611992caf7fSSteven Rostedt 		else
1612992caf7fSSteven Rostedt 			owner = NULL;
1613b4abf910SThomas Gleixner 		raw_spin_unlock_irq(&lock->wait_lock);
16141696a8beSPeter Zijlstra 
1615992caf7fSSteven Rostedt 		if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
16161b0b7c17SDavidlohr Bueso 			schedule();
16171696a8beSPeter Zijlstra 
1618b4abf910SThomas Gleixner 		raw_spin_lock_irq(&lock->wait_lock);
16191696a8beSPeter Zijlstra 		set_current_state(state);
16201696a8beSPeter Zijlstra 	}
16211696a8beSPeter Zijlstra 
1622afffc6c1SDavidlohr Bueso 	__set_current_state(TASK_RUNNING);
16231696a8beSPeter Zijlstra 	return ret;
16241696a8beSPeter Zijlstra }
16251696a8beSPeter Zijlstra 
1626d7a2edb8SThomas Gleixner static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
16273d5c9340SThomas Gleixner 					     struct rt_mutex_waiter *w)
16283d5c9340SThomas Gleixner {
16293d5c9340SThomas Gleixner 	/*
16303d5c9340SThomas Gleixner 	 * If the result is not -EDEADLOCK or the caller requested
16313d5c9340SThomas Gleixner 	 * deadlock detection, nothing to do here.
16323d5c9340SThomas Gleixner 	 */
16333d5c9340SThomas Gleixner 	if (res != -EDEADLOCK || detect_deadlock)
16343d5c9340SThomas Gleixner 		return;
16353d5c9340SThomas Gleixner 
1636add46132SPeter Zijlstra 	if (build_ww_mutex() && w->ww_ctx)
1637add46132SPeter Zijlstra 		return;
1638add46132SPeter Zijlstra 
16393d5c9340SThomas Gleixner 	/*
1640e2db7592SIngo Molnar 	 * Yell loudly and stop the task right here.
16413d5c9340SThomas Gleixner 	 */
16426d41c675SSebastian Andrzej Siewior 	WARN(1, "rtmutex deadlock detected\n");
16433d5c9340SThomas Gleixner 	while (1) {
16443d5c9340SThomas Gleixner 		set_current_state(TASK_INTERRUPTIBLE);
16453d5c9340SThomas Gleixner 		schedule();
16463d5c9340SThomas Gleixner 	}
16473d5c9340SThomas Gleixner }
16483d5c9340SThomas Gleixner 
1649ebbdc41eSThomas Gleixner /**
1650ebbdc41eSThomas Gleixner  * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1651ebbdc41eSThomas Gleixner  * @lock:	The rtmutex to block lock
1652add46132SPeter Zijlstra  * @ww_ctx:	WW mutex context pointer
1653ebbdc41eSThomas Gleixner  * @state:	The task state for sleeping
1654ebbdc41eSThomas Gleixner  * @chwalk:	Indicator whether full or partial chainwalk is requested
1655ebbdc41eSThomas Gleixner  * @waiter:	Initializer waiter for blocking
16561696a8beSPeter Zijlstra  */
1657ebbdc41eSThomas Gleixner static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1658add46132SPeter Zijlstra 				       struct ww_acquire_ctx *ww_ctx,
1659830e6accSPeter Zijlstra 				       unsigned int state,
1660ebbdc41eSThomas Gleixner 				       enum rtmutex_chainwalk chwalk,
1661ebbdc41eSThomas Gleixner 				       struct rt_mutex_waiter *waiter)
1662ebbdc41eSThomas Gleixner {
1663add46132SPeter Zijlstra 	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1664add46132SPeter Zijlstra 	struct ww_mutex *ww = ww_container_of(rtm);
1665ebbdc41eSThomas Gleixner 	int ret;
1666ebbdc41eSThomas Gleixner 
1667ebbdc41eSThomas Gleixner 	lockdep_assert_held(&lock->wait_lock);
1668ebbdc41eSThomas Gleixner 
1669ebbdc41eSThomas Gleixner 	/* Try to acquire the lock again: */
1670add46132SPeter Zijlstra 	if (try_to_take_rt_mutex(lock, current, NULL)) {
1671add46132SPeter Zijlstra 		if (build_ww_mutex() && ww_ctx) {
1672add46132SPeter Zijlstra 			__ww_mutex_check_waiters(rtm, ww_ctx);
1673add46132SPeter Zijlstra 			ww_mutex_lock_acquired(ww, ww_ctx);
1674add46132SPeter Zijlstra 		}
1675ebbdc41eSThomas Gleixner 		return 0;
1676add46132SPeter Zijlstra 	}
1677ebbdc41eSThomas Gleixner 
1678ebbdc41eSThomas Gleixner 	set_current_state(state);
1679ebbdc41eSThomas Gleixner 
1680ee042be1SNamhyung Kim 	trace_contention_begin(lock, LCB_F_RT);
1681ee042be1SNamhyung Kim 
1682add46132SPeter Zijlstra 	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1683ebbdc41eSThomas Gleixner 	if (likely(!ret))
1684add46132SPeter Zijlstra 		ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
1685ebbdc41eSThomas Gleixner 
1686add46132SPeter Zijlstra 	if (likely(!ret)) {
1687add46132SPeter Zijlstra 		/* acquired the lock */
1688add46132SPeter Zijlstra 		if (build_ww_mutex() && ww_ctx) {
1689add46132SPeter Zijlstra 			if (!ww_ctx->is_wait_die)
1690add46132SPeter Zijlstra 				__ww_mutex_check_waiters(rtm, ww_ctx);
1691add46132SPeter Zijlstra 			ww_mutex_lock_acquired(ww, ww_ctx);
1692add46132SPeter Zijlstra 		}
1693add46132SPeter Zijlstra 	} else {
1694ebbdc41eSThomas Gleixner 		__set_current_state(TASK_RUNNING);
1695ebbdc41eSThomas Gleixner 		remove_waiter(lock, waiter);
1696ebbdc41eSThomas Gleixner 		rt_mutex_handle_deadlock(ret, chwalk, waiter);
1697ebbdc41eSThomas Gleixner 	}
1698ebbdc41eSThomas Gleixner 
1699ebbdc41eSThomas Gleixner 	/*
1700ebbdc41eSThomas Gleixner 	 * try_to_take_rt_mutex() sets the waiter bit
1701ebbdc41eSThomas Gleixner 	 * unconditionally. We might have to fix that up.
1702ebbdc41eSThomas Gleixner 	 */
17031c0908d8SMel Gorman 	fixup_rt_mutex_waiters(lock, true);
1704ee042be1SNamhyung Kim 
1705ee042be1SNamhyung Kim 	trace_contention_end(lock, ret);
1706ee042be1SNamhyung Kim 
1707ebbdc41eSThomas Gleixner 	return ret;
1708ebbdc41eSThomas Gleixner }
1709ebbdc41eSThomas Gleixner 
1710ebbdc41eSThomas Gleixner static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1711add46132SPeter Zijlstra 					     struct ww_acquire_ctx *ww_ctx,
1712ebbdc41eSThomas Gleixner 					     unsigned int state)
17131696a8beSPeter Zijlstra {
17141696a8beSPeter Zijlstra 	struct rt_mutex_waiter waiter;
1715ebbdc41eSThomas Gleixner 	int ret;
17161696a8beSPeter Zijlstra 
171750809358SPeter Zijlstra 	rt_mutex_init_waiter(&waiter);
1718add46132SPeter Zijlstra 	waiter.ww_ctx = ww_ctx;
17191696a8beSPeter Zijlstra 
1720add46132SPeter Zijlstra 	ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1721add46132SPeter Zijlstra 				  &waiter);
1722ebbdc41eSThomas Gleixner 
1723ebbdc41eSThomas Gleixner 	debug_rt_mutex_free_waiter(&waiter);
1724ebbdc41eSThomas Gleixner 	return ret;
1725ebbdc41eSThomas Gleixner }
1726ebbdc41eSThomas Gleixner 
1727ebbdc41eSThomas Gleixner /*
1728ebbdc41eSThomas Gleixner  * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1729ebbdc41eSThomas Gleixner  * @lock:	The rtmutex to block lock
1730add46132SPeter Zijlstra  * @ww_ctx:	WW mutex context pointer
1731ebbdc41eSThomas Gleixner  * @state:	The task state for sleeping
1732ebbdc41eSThomas Gleixner  */
1733ebbdc41eSThomas Gleixner static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1734add46132SPeter Zijlstra 				     struct ww_acquire_ctx *ww_ctx,
1735ebbdc41eSThomas Gleixner 				     unsigned int state)
1736ebbdc41eSThomas Gleixner {
1737ebbdc41eSThomas Gleixner 	unsigned long flags;
1738ebbdc41eSThomas Gleixner 	int ret;
1739ebbdc41eSThomas Gleixner 
1740b4abf910SThomas Gleixner 	/*
1741b4abf910SThomas Gleixner 	 * Technically we could use raw_spin_[un]lock_irq() here, but this can
1742b4abf910SThomas Gleixner 	 * be called in early boot if the cmpxchg() fast path is disabled
1743b4abf910SThomas Gleixner 	 * (debug, no architecture support). In this case we will acquire the
1744b4abf910SThomas Gleixner 	 * rtmutex with lock->wait_lock held. But we cannot unconditionally
1745b4abf910SThomas Gleixner 	 * enable interrupts in that early boot case. So we need to use the
1746b4abf910SThomas Gleixner 	 * irqsave/restore variants.
1747b4abf910SThomas Gleixner 	 */
1748b4abf910SThomas Gleixner 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
1749add46132SPeter Zijlstra 	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1750b4abf910SThomas Gleixner 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
17511696a8beSPeter Zijlstra 
17521696a8beSPeter Zijlstra 	return ret;
17531696a8beSPeter Zijlstra }
17541696a8beSPeter Zijlstra 
1755830e6accSPeter Zijlstra static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1756531ae4b0SThomas Gleixner 					   unsigned int state)
1757531ae4b0SThomas Gleixner {
1758531ae4b0SThomas Gleixner 	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1759531ae4b0SThomas Gleixner 		return 0;
1760531ae4b0SThomas Gleixner 
1761add46132SPeter Zijlstra 	return rt_mutex_slowlock(lock, NULL, state);
1762531ae4b0SThomas Gleixner }
1763e17ba59bSThomas Gleixner #endif /* RT_MUTEX_BUILD_MUTEX */
17641c143c4bSThomas Gleixner 
17651c143c4bSThomas Gleixner #ifdef RT_MUTEX_BUILD_SPINLOCKS
17661c143c4bSThomas Gleixner /*
17671c143c4bSThomas Gleixner  * Functions required for spin/rw_lock substitution on RT kernels
17681c143c4bSThomas Gleixner  */
17691c143c4bSThomas Gleixner 
17701c143c4bSThomas Gleixner /**
17711c143c4bSThomas Gleixner  * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
17721c143c4bSThomas Gleixner  * @lock:	The underlying RT mutex
17731c143c4bSThomas Gleixner  */
17741c143c4bSThomas Gleixner static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
17751c143c4bSThomas Gleixner {
17761c143c4bSThomas Gleixner 	struct rt_mutex_waiter waiter;
1777992caf7fSSteven Rostedt 	struct task_struct *owner;
17781c143c4bSThomas Gleixner 
17791c143c4bSThomas Gleixner 	lockdep_assert_held(&lock->wait_lock);
17801c143c4bSThomas Gleixner 
17811c143c4bSThomas Gleixner 	if (try_to_take_rt_mutex(lock, current, NULL))
17821c143c4bSThomas Gleixner 		return;
17831c143c4bSThomas Gleixner 
17841c143c4bSThomas Gleixner 	rt_mutex_init_rtlock_waiter(&waiter);
17851c143c4bSThomas Gleixner 
17861c143c4bSThomas Gleixner 	/* Save current state and set state to TASK_RTLOCK_WAIT */
17871c143c4bSThomas Gleixner 	current_save_and_set_rtlock_wait_state();
17881c143c4bSThomas Gleixner 
1789ee042be1SNamhyung Kim 	trace_contention_begin(lock, LCB_F_RT);
1790ee042be1SNamhyung Kim 
1791add46132SPeter Zijlstra 	task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
17921c143c4bSThomas Gleixner 
17931c143c4bSThomas Gleixner 	for (;;) {
17941c143c4bSThomas Gleixner 		/* Try to acquire the lock again */
17951c143c4bSThomas Gleixner 		if (try_to_take_rt_mutex(lock, current, &waiter))
17961c143c4bSThomas Gleixner 			break;
17971c143c4bSThomas Gleixner 
1798992caf7fSSteven Rostedt 		if (&waiter == rt_mutex_top_waiter(lock))
1799992caf7fSSteven Rostedt 			owner = rt_mutex_owner(lock);
1800992caf7fSSteven Rostedt 		else
1801992caf7fSSteven Rostedt 			owner = NULL;
18021c143c4bSThomas Gleixner 		raw_spin_unlock_irq(&lock->wait_lock);
18031c143c4bSThomas Gleixner 
1804992caf7fSSteven Rostedt 		if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
18051c143c4bSThomas Gleixner 			schedule_rtlock();
18061c143c4bSThomas Gleixner 
18071c143c4bSThomas Gleixner 		raw_spin_lock_irq(&lock->wait_lock);
18081c143c4bSThomas Gleixner 		set_current_state(TASK_RTLOCK_WAIT);
18091c143c4bSThomas Gleixner 	}
18101c143c4bSThomas Gleixner 
18111c143c4bSThomas Gleixner 	/* Restore the task state */
18121c143c4bSThomas Gleixner 	current_restore_rtlock_saved_state();
18131c143c4bSThomas Gleixner 
18141c143c4bSThomas Gleixner 	/*
18151c143c4bSThomas Gleixner 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally.
18161c143c4bSThomas Gleixner 	 * We might have to fix that up:
18171c143c4bSThomas Gleixner 	 */
18181c0908d8SMel Gorman 	fixup_rt_mutex_waiters(lock, true);
18191c143c4bSThomas Gleixner 	debug_rt_mutex_free_waiter(&waiter);
1820ee042be1SNamhyung Kim 
1821ee042be1SNamhyung Kim 	trace_contention_end(lock, 0);
18221c143c4bSThomas Gleixner }
18231c143c4bSThomas Gleixner 
18241c143c4bSThomas Gleixner static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
18251c143c4bSThomas Gleixner {
18261c143c4bSThomas Gleixner 	unsigned long flags;
18271c143c4bSThomas Gleixner 
18281c143c4bSThomas Gleixner 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
18291c143c4bSThomas Gleixner 	rtlock_slowlock_locked(lock);
18301c143c4bSThomas Gleixner 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
18311c143c4bSThomas Gleixner }
18321c143c4bSThomas Gleixner 
18331c143c4bSThomas Gleixner #endif /* RT_MUTEX_BUILD_SPINLOCKS */
1834