xref: /linux/kernel/locking/rtmutex_api.c (revision 4c16e1cadcbcaf3c82d5fc310fbd34d0f5d0db7c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * rtmutex API
4  */
5 #include <linux/spinlock.h>
6 #include <linux/export.h>
7 
8 #define RT_MUTEX_BUILD_MUTEX
9 #include "rtmutex.c"
10 
11 /*
12  * Max number of times we'll walk the boosting chain:
13  */
14 int max_lock_depth = 1024;
15 
16 /*
17  * Debug aware fast / slowpath lock,trylock,unlock
18  *
19  * The atomic acquire/release ops are compiled away, when either the
20  * architecture does not support cmpxchg or when debugging is enabled.
21  */
22 static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
23 						  unsigned int state,
24 						  struct lockdep_map *nest_lock,
25 						  unsigned int subclass)
26 {
27 	int ret;
28 
29 	might_sleep();
30 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_);
31 	ret = __rt_mutex_lock(&lock->rtmutex, state);
32 	if (ret)
33 		mutex_release(&lock->dep_map, _RET_IP_);
34 	return ret;
35 }
36 
37 void rt_mutex_base_init(struct rt_mutex_base *rtb)
38 {
39 	__rt_mutex_base_init(rtb);
40 }
41 EXPORT_SYMBOL(rt_mutex_base_init);
42 
43 #ifdef CONFIG_DEBUG_LOCK_ALLOC
44 /**
45  * rt_mutex_lock_nested - lock a rt_mutex
46  *
47  * @lock: the rt_mutex to be locked
48  * @subclass: the lockdep subclass
49  */
50 void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
51 {
52 	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);
53 }
54 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
55 
56 void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)
57 {
58 	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);
59 }
60 EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
61 
62 #else /* !CONFIG_DEBUG_LOCK_ALLOC */
63 
64 /**
65  * rt_mutex_lock - lock a rt_mutex
66  *
67  * @lock: the rt_mutex to be locked
68  */
69 void __sched rt_mutex_lock(struct rt_mutex *lock)
70 {
71 	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);
72 }
73 EXPORT_SYMBOL_GPL(rt_mutex_lock);
74 #endif
75 
76 /**
77  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
78  *
79  * @lock:		the rt_mutex to be locked
80  *
81  * Returns:
82  *  0		on success
83  * -EINTR	when interrupted by a signal
84  */
85 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
86 {
87 	return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0);
88 }
89 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
90 
91 /**
92  * rt_mutex_lock_killable - lock a rt_mutex killable
93  *
94  * @lock:		the rt_mutex to be locked
95  *
96  * Returns:
97  *  0		on success
98  * -EINTR	when interrupted by a signal
99  */
100 int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
101 {
102 	return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0);
103 }
104 EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
105 
106 /**
107  * rt_mutex_trylock - try to lock a rt_mutex
108  *
109  * @lock:	the rt_mutex to be locked
110  *
111  * This function can only be called in thread context. It's safe to call it
112  * from atomic regions, but not from hard or soft interrupt context.
113  *
114  * Returns:
115  *  1 on success
116  *  0 on contention
117  */
118 int __sched rt_mutex_trylock(struct rt_mutex *lock)
119 {
120 	int ret;
121 
122 	if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
123 		return 0;
124 
125 	ret = __rt_mutex_trylock(&lock->rtmutex);
126 	if (ret)
127 		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
128 
129 	return ret;
130 }
131 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
132 
133 /**
134  * rt_mutex_unlock - unlock a rt_mutex
135  *
136  * @lock: the rt_mutex to be unlocked
137  */
138 void __sched rt_mutex_unlock(struct rt_mutex *lock)
139 {
140 	mutex_release(&lock->dep_map, _RET_IP_);
141 	__rt_mutex_unlock(&lock->rtmutex);
142 }
143 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
144 
145 /*
146  * Futex variants, must not use fastpath.
147  */
148 int __sched rt_mutex_futex_trylock(struct rt_mutex_base *lock)
149 {
150 	return rt_mutex_slowtrylock(lock);
151 }
152 
153 int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock)
154 {
155 	return __rt_mutex_slowtrylock(lock);
156 }
157 
158 /**
159  * __rt_mutex_futex_unlock - Futex variant, that since futex variants
160  * do not use the fast-path, can be simple and will not need to retry.
161  *
162  * @lock:	The rt_mutex to be unlocked
163  * @wqh:	The wake queue head from which to get the next lock waiter
164  */
165 bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock,
166 				     struct rt_wake_q_head *wqh)
167 {
168 	lockdep_assert_held(&lock->wait_lock);
169 
170 	debug_rt_mutex_unlock(lock);
171 
172 	if (!rt_mutex_has_waiters(lock)) {
173 		lock->owner = NULL;
174 		return false; /* done */
175 	}
176 
177 	/*
178 	 * mark_wakeup_next_waiter() deboosts and retains preemption
179 	 * disabled when dropping the wait_lock, to avoid inversion prior
180 	 * to the wakeup.  preempt_disable() therein pairs with the
181 	 * preempt_enable() in rt_mutex_postunlock().
182 	 */
183 	mark_wakeup_next_waiter(wqh, lock);
184 
185 	return true; /* call postunlock() */
186 }
187 
188 void __sched rt_mutex_futex_unlock(struct rt_mutex_base *lock)
189 {
190 	DEFINE_RT_WAKE_Q(wqh);
191 	unsigned long flags;
192 	bool postunlock;
193 
194 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
195 	postunlock = __rt_mutex_futex_unlock(lock, &wqh);
196 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
197 
198 	if (postunlock)
199 		rt_mutex_postunlock(&wqh);
200 }
201 
202 /**
203  * __rt_mutex_init - initialize the rt_mutex
204  *
205  * @lock:	The rt_mutex to be initialized
206  * @name:	The lock name used for debugging
207  * @key:	The lock class key used for debugging
208  *
209  * Initialize the rt_mutex to unlocked state.
210  *
211  * Initializing of a locked rt_mutex is not allowed
212  */
213 void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name,
214 			     struct lock_class_key *key)
215 {
216 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
217 	__rt_mutex_base_init(&lock->rtmutex);
218 	lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
219 }
220 EXPORT_SYMBOL_GPL(__rt_mutex_init);
221 
222 /**
223  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
224  *				proxy owner
225  *
226  * @lock:	the rt_mutex to be locked
227  * @proxy_owner:the task to set as owner
228  *
229  * No locking. Caller has to do serializing itself
230  *
231  * Special API call for PI-futex support. This initializes the rtmutex and
232  * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not
233  * possible at this point because the pi_state which contains the rtmutex
234  * is not yet visible to other tasks.
235  */
236 void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
237 					struct task_struct *proxy_owner)
238 {
239 	static struct lock_class_key pi_futex_key;
240 
241 	__rt_mutex_base_init(lock);
242 	/*
243 	 * On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping'
244 	 * and rtmutex based. That causes a lockdep false positive, because
245 	 * some of the futex functions invoke spin_unlock(&hb->lock) with
246 	 * the wait_lock of the rtmutex associated to the pi_futex held.
247 	 * spin_unlock() in turn takes wait_lock of the rtmutex on which
248 	 * the spinlock is based, which makes lockdep notice a lock
249 	 * recursion. Give the futex/rtmutex wait_lock a separate key.
250 	 */
251 	lockdep_set_class(&lock->wait_lock, &pi_futex_key);
252 	rt_mutex_set_owner(lock, proxy_owner);
253 }
254 
255 /**
256  * rt_mutex_proxy_unlock - release a lock on behalf of owner
257  *
258  * @lock:	the rt_mutex to be locked
259  *
260  * No locking. Caller has to do serializing itself
261  *
262  * Special API call for PI-futex support. This just cleans up the rtmutex
263  * (debugging) state. Concurrent operations on this rt_mutex are not
264  * possible because it belongs to the pi_state which is about to be freed
265  * and it is not longer visible to other tasks.
266  */
267 void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
268 {
269 	debug_rt_mutex_proxy_unlock(lock);
270 	rt_mutex_clear_owner(lock);
271 }
272 
273 /**
274  * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
275  * @lock:		the rt_mutex to take
276  * @waiter:		the pre-initialized rt_mutex_waiter
277  * @task:		the task to prepare
278  * @wake_q:		the wake_q to wake tasks after we release the wait_lock
279  *
280  * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
281  * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
282  *
283  * NOTE: does _NOT_ remove the @waiter on failure; must either call
284  * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
285  *
286  * Returns:
287  *  0 - task blocked on lock
288  *  1 - acquired the lock for task, caller should wake it up
289  * <0 - error
290  *
291  * Special API call for PI-futex support.
292  */
293 int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
294 					struct rt_mutex_waiter *waiter,
295 					struct task_struct *task,
296 					struct wake_q_head *wake_q)
297 {
298 	int ret;
299 
300 	lockdep_assert_held(&lock->wait_lock);
301 
302 	if (try_to_take_rt_mutex(lock, task, NULL))
303 		return 1;
304 
305 	/* We enforce deadlock detection for futexes */
306 	ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL,
307 				      RT_MUTEX_FULL_CHAINWALK, wake_q);
308 
309 	if (ret && !rt_mutex_owner(lock)) {
310 		/*
311 		 * Reset the return value. We might have
312 		 * returned with -EDEADLK and the owner
313 		 * released the lock while we were walking the
314 		 * pi chain.  Let the waiter sort it out.
315 		 */
316 		ret = 0;
317 	}
318 
319 	return ret;
320 }
321 
322 /**
323  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
324  * @lock:		the rt_mutex to take
325  * @waiter:		the pre-initialized rt_mutex_waiter
326  * @task:		the task to prepare
327  *
328  * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
329  * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
330  *
331  * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
332  * on failure.
333  *
334  * Returns:
335  *  0 - task blocked on lock
336  *  1 - acquired the lock for task, caller should wake it up
337  * <0 - error
338  *
339  * Special API call for PI-futex support.
340  */
341 int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
342 				      struct rt_mutex_waiter *waiter,
343 				      struct task_struct *task)
344 {
345 	int ret;
346 	DEFINE_WAKE_Q(wake_q);
347 
348 	raw_spin_lock_irq(&lock->wait_lock);
349 	ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
350 	if (unlikely(ret))
351 		remove_waiter(lock, waiter);
352 	preempt_disable();
353 	raw_spin_unlock_irq(&lock->wait_lock);
354 	wake_up_q(&wake_q);
355 	preempt_enable();
356 
357 	return ret;
358 }
359 
360 /**
361  * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
362  * @lock:		the rt_mutex we were woken on
363  * @to:			the timeout, null if none. hrtimer should already have
364  *			been started.
365  * @waiter:		the pre-initialized rt_mutex_waiter
366  *
367  * Wait for the lock acquisition started on our behalf by
368  * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
369  * rt_mutex_cleanup_proxy_lock().
370  *
371  * Returns:
372  *  0 - success
373  * <0 - error, one of -EINTR, -ETIMEDOUT
374  *
375  * Special API call for PI-futex support
376  */
377 int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
378 				     struct hrtimer_sleeper *to,
379 				     struct rt_mutex_waiter *waiter)
380 {
381 	int ret;
382 
383 	raw_spin_lock_irq(&lock->wait_lock);
384 	/* sleep on the mutex */
385 	set_current_state(TASK_INTERRUPTIBLE);
386 	ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter, NULL);
387 	/*
388 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
389 	 * have to fix that up.
390 	 */
391 	fixup_rt_mutex_waiters(lock, true);
392 	raw_spin_unlock_irq(&lock->wait_lock);
393 
394 	return ret;
395 }
396 
397 /**
398  * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
399  * @lock:		the rt_mutex we were woken on
400  * @waiter:		the pre-initialized rt_mutex_waiter
401  *
402  * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
403  * rt_mutex_wait_proxy_lock().
404  *
405  * Unless we acquired the lock; we're still enqueued on the wait-list and can
406  * in fact still be granted ownership until we're removed. Therefore we can
407  * find we are in fact the owner and must disregard the
408  * rt_mutex_wait_proxy_lock() failure.
409  *
410  * Returns:
411  *  true  - did the cleanup, we done.
412  *  false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
413  *          caller should disregards its return value.
414  *
415  * Special API call for PI-futex support
416  */
417 bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
418 					 struct rt_mutex_waiter *waiter)
419 {
420 	bool cleanup = false;
421 
422 	raw_spin_lock_irq(&lock->wait_lock);
423 	/*
424 	 * Do an unconditional try-lock, this deals with the lock stealing
425 	 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
426 	 * sets a NULL owner.
427 	 *
428 	 * We're not interested in the return value, because the subsequent
429 	 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
430 	 * we will own the lock and it will have removed the waiter. If we
431 	 * failed the trylock, we're still not owner and we need to remove
432 	 * ourselves.
433 	 */
434 	try_to_take_rt_mutex(lock, current, waiter);
435 	/*
436 	 * Unless we're the owner; we're still enqueued on the wait_list.
437 	 * So check if we became owner, if not, take us off the wait_list.
438 	 */
439 	if (rt_mutex_owner(lock) != current) {
440 		remove_waiter(lock, waiter);
441 		cleanup = true;
442 	}
443 	/*
444 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
445 	 * have to fix that up.
446 	 */
447 	fixup_rt_mutex_waiters(lock, false);
448 
449 	raw_spin_unlock_irq(&lock->wait_lock);
450 
451 	return cleanup;
452 }
453 
454 /*
455  * Recheck the pi chain, in case we got a priority setting
456  *
457  * Called from sched_setscheduler
458  */
459 void __sched rt_mutex_adjust_pi(struct task_struct *task)
460 {
461 	struct rt_mutex_waiter *waiter;
462 	struct rt_mutex_base *next_lock;
463 	unsigned long flags;
464 
465 	raw_spin_lock_irqsave(&task->pi_lock, flags);
466 
467 	waiter = task->pi_blocked_on;
468 	if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
469 		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
470 		return;
471 	}
472 	next_lock = waiter->lock;
473 	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
474 
475 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
476 	get_task_struct(task);
477 
478 	rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
479 				   next_lock, NULL, task);
480 }
481 
482 /*
483  * Performs the wakeup of the top-waiter and re-enables preemption.
484  */
485 void __sched rt_mutex_postunlock(struct rt_wake_q_head *wqh)
486 {
487 	rt_mutex_wake_up_q(wqh);
488 }
489 
490 #ifdef CONFIG_DEBUG_RT_MUTEXES
491 void rt_mutex_debug_task_free(struct task_struct *task)
492 {
493 	DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root));
494 	DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
495 }
496 #endif
497 
498 #ifdef CONFIG_PREEMPT_RT
499 /* Mutexes */
500 void __mutex_rt_init(struct mutex *mutex, const char *name,
501 		     struct lock_class_key *key)
502 {
503 	debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
504 	lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);
505 }
506 EXPORT_SYMBOL(__mutex_rt_init);
507 
508 static __always_inline int __mutex_lock_common(struct mutex *lock,
509 					       unsigned int state,
510 					       unsigned int subclass,
511 					       struct lockdep_map *nest_lock,
512 					       unsigned long ip)
513 {
514 	int ret;
515 
516 	might_sleep();
517 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
518 	ret = __rt_mutex_lock(&lock->rtmutex, state);
519 	if (ret)
520 		mutex_release(&lock->dep_map, ip);
521 	else
522 		lock_acquired(&lock->dep_map, ip);
523 	return ret;
524 }
525 
526 #ifdef CONFIG_DEBUG_LOCK_ALLOC
527 void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass)
528 {
529 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
530 }
531 EXPORT_SYMBOL_GPL(mutex_lock_nested);
532 
533 void __sched _mutex_lock_nest_lock(struct mutex *lock,
534 				   struct lockdep_map *nest_lock)
535 {
536 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest_lock, _RET_IP_);
537 }
538 EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
539 
540 int __sched mutex_lock_interruptible_nested(struct mutex *lock,
541 					    unsigned int subclass)
542 {
543 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_);
544 }
545 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
546 
547 int __sched mutex_lock_killable_nested(struct mutex *lock,
548 					    unsigned int subclass)
549 {
550 	return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
551 }
552 EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
553 
554 void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
555 {
556 	int token;
557 
558 	might_sleep();
559 
560 	token = io_schedule_prepare();
561 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
562 	io_schedule_finish(token);
563 }
564 EXPORT_SYMBOL_GPL(mutex_lock_io_nested);
565 
566 #else /* CONFIG_DEBUG_LOCK_ALLOC */
567 
568 void __sched mutex_lock(struct mutex *lock)
569 {
570 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
571 }
572 EXPORT_SYMBOL(mutex_lock);
573 
574 int __sched mutex_lock_interruptible(struct mutex *lock)
575 {
576 	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
577 }
578 EXPORT_SYMBOL(mutex_lock_interruptible);
579 
580 int __sched mutex_lock_killable(struct mutex *lock)
581 {
582 	return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
583 }
584 EXPORT_SYMBOL(mutex_lock_killable);
585 
586 void __sched mutex_lock_io(struct mutex *lock)
587 {
588 	int token = io_schedule_prepare();
589 
590 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
591 	io_schedule_finish(token);
592 }
593 EXPORT_SYMBOL(mutex_lock_io);
594 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
595 
596 int __sched mutex_trylock(struct mutex *lock)
597 {
598 	int ret;
599 
600 	if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
601 		return 0;
602 
603 	ret = __rt_mutex_trylock(&lock->rtmutex);
604 	if (ret)
605 		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
606 
607 	return ret;
608 }
609 EXPORT_SYMBOL(mutex_trylock);
610 
611 void __sched mutex_unlock(struct mutex *lock)
612 {
613 	mutex_release(&lock->dep_map, _RET_IP_);
614 	__rt_mutex_unlock(&lock->rtmutex);
615 }
616 EXPORT_SYMBOL(mutex_unlock);
617 
618 #endif /* CONFIG_PREEMPT_RT */
619