1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include <trace/events/lock.h> 28 29 #include "rtmutex_common.h" 30 #include "lock_events.h" 31 32 #ifndef WW_RT 33 # define build_ww_mutex() (false) 34 # define ww_container_of(rtm) NULL 35 36 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 37 struct rt_mutex *lock, 38 struct ww_acquire_ctx *ww_ctx, 39 struct wake_q_head *wake_q) 40 { 41 return 0; 42 } 43 44 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 45 struct ww_acquire_ctx *ww_ctx, 46 struct wake_q_head *wake_q) 47 { 48 } 49 50 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 51 struct ww_acquire_ctx *ww_ctx) 52 { 53 } 54 55 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 56 struct rt_mutex_waiter *waiter, 57 struct ww_acquire_ctx *ww_ctx) 58 { 59 return 0; 60 } 61 62 #else 63 # define build_ww_mutex() (true) 64 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 65 # include "ww_mutex.h" 66 #endif 67 68 /* 69 * lock->owner state tracking: 70 * 71 * lock->owner holds the task_struct pointer of the owner. Bit 0 72 * is used to keep track of the "lock has waiters" state. 73 * 74 * owner bit0 75 * NULL 0 lock is free (fast acquire possible) 76 * NULL 1 lock is free and has waiters and the top waiter 77 * is going to take the lock* 78 * taskpointer 0 lock is held (fast release possible) 79 * taskpointer 1 lock is held and has waiters** 80 * 81 * The fast atomic compare exchange based acquire and release is only 82 * possible when bit 0 of lock->owner is 0. 83 * 84 * (*) It also can be a transitional state when grabbing the lock 85 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 86 * we need to set the bit0 before looking at the lock, and the owner may be 87 * NULL in this small time, hence this can be a transitional state. 88 * 89 * (**) There is a small time when bit 0 is set but there are no 90 * waiters. This can happen when grabbing the lock in the slow path. 91 * To prevent a cmpxchg of the owner releasing the lock, we need to 92 * set this bit before looking at the lock. 93 */ 94 95 static __always_inline struct task_struct * 96 rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) 97 __must_hold(&lock->wait_lock) 98 { 99 unsigned long val = (unsigned long)owner; 100 101 if (rt_mutex_has_waiters(lock)) 102 val |= RT_MUTEX_HAS_WAITERS; 103 104 return (struct task_struct *)val; 105 } 106 107 static __always_inline void 108 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 109 __must_hold(&lock->wait_lock) 110 { 111 /* 112 * lock->wait_lock is held but explicit acquire semantics are needed 113 * for a new lock owner so WRITE_ONCE is insufficient. 114 */ 115 xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); 116 } 117 118 static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) 119 __must_hold(&lock->wait_lock) 120 { 121 /* lock->wait_lock is held so the unlock provides release semantics. */ 122 WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); 123 } 124 125 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 126 __must_hold(&lock->wait_lock) 127 { 128 lock->owner = (struct task_struct *) 129 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 130 } 131 132 static __always_inline void 133 fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) 134 __must_hold(&lock->wait_lock) 135 { 136 unsigned long owner, *p = (unsigned long *) &lock->owner; 137 138 if (rt_mutex_has_waiters(lock)) 139 return; 140 141 /* 142 * The rbtree has no waiters enqueued, now make sure that the 143 * lock->owner still has the waiters bit set, otherwise the 144 * following can happen: 145 * 146 * CPU 0 CPU 1 CPU2 147 * l->owner=T1 148 * rt_mutex_lock(l) 149 * lock(l->lock) 150 * l->owner = T1 | HAS_WAITERS; 151 * enqueue(T2) 152 * boost() 153 * unlock(l->lock) 154 * block() 155 * 156 * rt_mutex_lock(l) 157 * lock(l->lock) 158 * l->owner = T1 | HAS_WAITERS; 159 * enqueue(T3) 160 * boost() 161 * unlock(l->lock) 162 * block() 163 * signal(->T2) signal(->T3) 164 * lock(l->lock) 165 * dequeue(T2) 166 * deboost() 167 * unlock(l->lock) 168 * lock(l->lock) 169 * dequeue(T3) 170 * ==> wait list is empty 171 * deboost() 172 * unlock(l->lock) 173 * lock(l->lock) 174 * fixup_rt_mutex_waiters() 175 * if (wait_list_empty(l) { 176 * l->owner = owner 177 * owner = l->owner & ~HAS_WAITERS; 178 * ==> l->owner = T1 179 * } 180 * lock(l->lock) 181 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 182 * if (wait_list_empty(l) { 183 * owner = l->owner & ~HAS_WAITERS; 184 * cmpxchg(l->owner, T1, NULL) 185 * ===> Success (l->owner = NULL) 186 * 187 * l->owner = owner 188 * ==> l->owner = T1 189 * } 190 * 191 * With the check for the waiter bit in place T3 on CPU2 will not 192 * overwrite. All tasks fiddling with the waiters bit are 193 * serialized by l->lock, so nothing else can modify the waiters 194 * bit. If the bit is set then nothing can change l->owner either 195 * so the simple RMW is safe. The cmpxchg() will simply fail if it 196 * happens in the middle of the RMW because the waiters bit is 197 * still set. 198 */ 199 owner = READ_ONCE(*p); 200 if (owner & RT_MUTEX_HAS_WAITERS) { 201 /* 202 * See rt_mutex_set_owner() and rt_mutex_clear_owner() on 203 * why xchg_acquire() is used for updating owner for 204 * locking and WRITE_ONCE() for unlocking. 205 * 206 * WRITE_ONCE() would work for the acquire case too, but 207 * in case that the lock acquisition failed it might 208 * force other lockers into the slow path unnecessarily. 209 */ 210 if (acquire_lock) 211 xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS); 212 else 213 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 214 } 215 } 216 217 /* 218 * We can speed up the acquire/release, if there's no debugging state to be 219 * set up. 220 */ 221 #ifndef CONFIG_DEBUG_RT_MUTEXES 222 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 223 struct task_struct *old, 224 struct task_struct *new) 225 { 226 return try_cmpxchg_acquire(&lock->owner, &old, new); 227 } 228 229 static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) 230 { 231 return rt_mutex_cmpxchg_acquire(lock, NULL, current); 232 } 233 234 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 235 struct task_struct *old, 236 struct task_struct *new) 237 { 238 return try_cmpxchg_release(&lock->owner, &old, new); 239 } 240 241 /* 242 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 243 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 244 * relaxed semantics suffice. 245 */ 246 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 247 { 248 unsigned long *p = (unsigned long *) &lock->owner; 249 unsigned long owner, new; 250 251 owner = READ_ONCE(*p); 252 do { 253 new = owner | RT_MUTEX_HAS_WAITERS; 254 } while (!try_cmpxchg_relaxed(p, &owner, new)); 255 256 /* 257 * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE 258 * operations in the event of contention. Ensure the successful 259 * cmpxchg is visible. 260 */ 261 smp_mb__after_atomic(); 262 } 263 264 /* 265 * Safe fastpath aware unlock: 266 * 1) Clear the waiters bit 267 * 2) Drop lock->wait_lock 268 * 3) Try to unlock the lock with cmpxchg 269 */ 270 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 271 unsigned long flags) 272 __releases(lock->wait_lock) 273 { 274 struct task_struct *owner = rt_mutex_owner(lock); 275 276 clear_rt_mutex_waiters(lock); 277 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 278 /* 279 * If a new waiter comes in between the unlock and the cmpxchg 280 * we have two situations: 281 * 282 * unlock(wait_lock); 283 * lock(wait_lock); 284 * cmpxchg(p, owner, 0) == owner 285 * mark_rt_mutex_waiters(lock); 286 * acquire(lock); 287 * or: 288 * 289 * unlock(wait_lock); 290 * lock(wait_lock); 291 * mark_rt_mutex_waiters(lock); 292 * 293 * cmpxchg(p, owner, 0) != owner 294 * enqueue_waiter(); 295 * unlock(wait_lock); 296 * lock(wait_lock); 297 * wake waiter(); 298 * unlock(wait_lock); 299 * lock(wait_lock); 300 * acquire(lock); 301 */ 302 return rt_mutex_cmpxchg_release(lock, owner, NULL); 303 } 304 305 #else 306 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 307 struct task_struct *old, 308 struct task_struct *new) 309 { 310 return false; 311 312 } 313 314 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); 315 316 static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) 317 { 318 /* 319 * With debug enabled rt_mutex_cmpxchg trylock() will always fail. 320 * 321 * Avoid unconditionally taking the slow path by using 322 * rt_mutex_slow_trylock() which is covered by the debug code and can 323 * acquire a non-contended rtmutex. 324 */ 325 return rt_mutex_slowtrylock(lock); 326 } 327 328 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 329 struct task_struct *old, 330 struct task_struct *new) 331 { 332 return false; 333 } 334 335 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 336 __must_hold(&lock->wait_lock) 337 { 338 lock->owner = (struct task_struct *) 339 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 340 } 341 342 /* 343 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 344 */ 345 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 346 unsigned long flags) 347 __releases(lock->wait_lock) 348 { 349 lock->owner = NULL; 350 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 351 return true; 352 } 353 #endif 354 355 static __always_inline int __waiter_prio(struct task_struct *task) 356 { 357 int prio = task->prio; 358 359 if (!rt_or_dl_prio(prio)) 360 return DEFAULT_PRIO; 361 362 return prio; 363 } 364 365 /* 366 * Update the waiter->tree copy of the sort keys. 367 */ 368 static __always_inline void 369 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 370 { 371 lockdep_assert_held(&waiter->lock->wait_lock); 372 lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry)); 373 374 waiter->tree.prio = __waiter_prio(task); 375 waiter->tree.deadline = task->dl.deadline; 376 } 377 378 /* 379 * Update the waiter->pi_tree copy of the sort keys (from the tree copy). 380 */ 381 static __always_inline void 382 waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 383 { 384 lockdep_assert_held(&waiter->lock->wait_lock); 385 lockdep_assert_held(&task->pi_lock); 386 lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry)); 387 388 waiter->pi_tree.prio = waiter->tree.prio; 389 waiter->pi_tree.deadline = waiter->tree.deadline; 390 } 391 392 /* 393 * Only use with rt_waiter_node_{less,equal}() 394 */ 395 #define task_to_waiter_node(p) \ 396 &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 397 #define task_to_waiter(p) \ 398 &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) } 399 400 static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left, 401 struct rt_waiter_node *right) 402 { 403 if (left->prio < right->prio) 404 return 1; 405 406 /* 407 * If both waiters have dl_prio(), we check the deadlines of the 408 * associated tasks. 409 * If left waiter has a dl_prio(), and we didn't return 1 above, 410 * then right waiter has a dl_prio() too. 411 */ 412 if (dl_prio(left->prio)) 413 return dl_time_before(left->deadline, right->deadline); 414 415 return 0; 416 } 417 418 static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left, 419 struct rt_waiter_node *right) 420 { 421 if (left->prio != right->prio) 422 return 0; 423 424 /* 425 * If both waiters have dl_prio(), we check the deadlines of the 426 * associated tasks. 427 * If left waiter has a dl_prio(), and we didn't return 0 above, 428 * then right waiter has a dl_prio() too. 429 */ 430 if (dl_prio(left->prio)) 431 return left->deadline == right->deadline; 432 433 return 1; 434 } 435 436 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 437 struct rt_mutex_waiter *top_waiter) 438 { 439 if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree)) 440 return true; 441 442 #ifdef RT_MUTEX_BUILD_SPINLOCKS 443 /* 444 * Note that RT tasks are excluded from same priority (lateral) 445 * steals to prevent the introduction of an unbounded latency. 446 */ 447 if (rt_or_dl_prio(waiter->tree.prio)) 448 return false; 449 450 return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); 451 #else 452 return false; 453 #endif 454 } 455 456 #define __node_2_waiter(node) \ 457 rb_entry((node), struct rt_mutex_waiter, tree.entry) 458 459 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 460 { 461 struct rt_mutex_waiter *aw = __node_2_waiter(a); 462 struct rt_mutex_waiter *bw = __node_2_waiter(b); 463 464 if (rt_waiter_node_less(&aw->tree, &bw->tree)) 465 return 1; 466 467 if (!build_ww_mutex()) 468 return 0; 469 470 if (rt_waiter_node_less(&bw->tree, &aw->tree)) 471 return 0; 472 473 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 474 if (aw->ww_ctx) { 475 if (!bw->ww_ctx) 476 return 1; 477 478 return (signed long)(aw->ww_ctx->stamp - 479 bw->ww_ctx->stamp) < 0; 480 } 481 482 return 0; 483 } 484 485 static __always_inline void 486 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 487 { 488 lockdep_assert_held(&lock->wait_lock); 489 490 rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less); 491 } 492 493 static __always_inline void 494 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 495 { 496 lockdep_assert_held(&lock->wait_lock); 497 498 if (RB_EMPTY_NODE(&waiter->tree.entry)) 499 return; 500 501 rb_erase_cached(&waiter->tree.entry, &lock->waiters); 502 RB_CLEAR_NODE(&waiter->tree.entry); 503 } 504 505 #define __node_2_rt_node(node) \ 506 rb_entry((node), struct rt_waiter_node, entry) 507 508 static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 509 { 510 return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b)); 511 } 512 513 static __always_inline void 514 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 515 { 516 lockdep_assert_held(&task->pi_lock); 517 518 rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less); 519 } 520 521 static __always_inline void 522 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 523 { 524 lockdep_assert_held(&task->pi_lock); 525 526 if (RB_EMPTY_NODE(&waiter->pi_tree.entry)) 527 return; 528 529 rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters); 530 RB_CLEAR_NODE(&waiter->pi_tree.entry); 531 } 532 533 static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock, 534 struct task_struct *p) 535 { 536 struct task_struct *pi_task = NULL; 537 538 lockdep_assert_held(&lock->wait_lock); 539 lockdep_assert(rt_mutex_owner(lock) == p); 540 lockdep_assert_held(&p->pi_lock); 541 542 if (task_has_pi_waiters(p)) 543 pi_task = task_top_pi_waiter(p)->task; 544 545 rt_mutex_setprio(p, pi_task); 546 } 547 548 /* RT mutex specific wake_q wrappers */ 549 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, 550 struct task_struct *task, 551 unsigned int wake_state) 552 { 553 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { 554 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 555 WARN_ON_ONCE(wqh->rtlock_task); 556 get_task_struct(task); 557 wqh->rtlock_task = task; 558 } else { 559 wake_q_add(&wqh->head, task); 560 } 561 } 562 563 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 564 struct rt_mutex_waiter *w) 565 { 566 rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); 567 } 568 569 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 570 { 571 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 572 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 573 put_task_struct(wqh->rtlock_task); 574 wqh->rtlock_task = NULL; 575 } 576 577 if (!wake_q_empty(&wqh->head)) 578 wake_up_q(&wqh->head); 579 580 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 581 preempt_enable(); 582 } 583 584 /* 585 * Deadlock detection is conditional: 586 * 587 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 588 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 589 * 590 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 591 * conducted independent of the detect argument. 592 * 593 * If the waiter argument is NULL this indicates the deboost path and 594 * deadlock detection is disabled independent of the detect argument 595 * and the config settings. 596 */ 597 static __always_inline bool 598 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 599 enum rtmutex_chainwalk chwalk) 600 { 601 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 602 return waiter != NULL; 603 return chwalk == RT_MUTEX_FULL_CHAINWALK; 604 } 605 606 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 607 { 608 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 609 } 610 611 /* 612 * Adjust the priority chain. Also used for deadlock detection. 613 * Decreases task's usage by one - may thus free the task. 614 * 615 * @task: the task owning the mutex (owner) for which a chain walk is 616 * probably needed 617 * @chwalk: do we have to carry out deadlock detection? 618 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 619 * things for a task that has just got its priority adjusted, and 620 * is waiting on a mutex) 621 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 622 * we dropped its pi_lock. Is never dereferenced, only used for 623 * comparison to detect lock chain changes. 624 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 625 * its priority to the mutex owner (can be NULL in the case 626 * depicted above or if the top waiter is gone away and we are 627 * actually deboosting the owner) 628 * @top_task: the current top waiter 629 * 630 * Returns 0 or -EDEADLK. 631 * 632 * Chain walk basics and protection scope 633 * 634 * [R] refcount on task 635 * [Pn] task->pi_lock held 636 * [L] rtmutex->wait_lock held 637 * 638 * Normal locking order: 639 * 640 * rtmutex->wait_lock 641 * task->pi_lock 642 * 643 * Step Description Protected by 644 * function arguments: 645 * @task [R] 646 * @orig_lock if != NULL @top_task is blocked on it 647 * @next_lock Unprotected. Cannot be 648 * dereferenced. Only used for 649 * comparison. 650 * @orig_waiter if != NULL @top_task is blocked on it 651 * @top_task current, or in case of proxy 652 * locking protected by calling 653 * code 654 * again: 655 * loop_sanity_check(); 656 * retry: 657 * [1] lock(task->pi_lock); [R] acquire [P1] 658 * [2] waiter = task->pi_blocked_on; [P1] 659 * [3] check_exit_conditions_1(); [P1] 660 * [4] lock = waiter->lock; [P1] 661 * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L] 662 * unlock(task->pi_lock); release [P1] 663 * goto retry; 664 * } 665 * [6] check_exit_conditions_2(); [P1] + [L] 666 * [7] requeue_lock_waiter(lock, waiter); [P1] + [L] 667 * [8] unlock(task->pi_lock); release [P1] 668 * put_task_struct(task); release [R] 669 * [9] check_exit_conditions_3(); [L] 670 * [10] task = owner(lock); [L] 671 * get_task_struct(task); [L] acquire [R] 672 * lock(task->pi_lock); [L] acquire [P2] 673 * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L] 674 * [12] check_exit_conditions_4(); [P2] + [L] 675 * [13] unlock(task->pi_lock); release [P2] 676 * unlock(lock->wait_lock); release [L] 677 * goto again; 678 * 679 * Where P1 is the blocking task and P2 is the lock owner; going up one step 680 * the owner becomes the next blocked task etc.. 681 * 682 * 683 */ 684 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 685 enum rtmutex_chainwalk chwalk, 686 struct rt_mutex_base *orig_lock, 687 struct rt_mutex_base *next_lock, 688 struct rt_mutex_waiter *orig_waiter, 689 struct task_struct *top_task) 690 { 691 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 692 struct rt_mutex_waiter *prerequeue_top_waiter; 693 int ret = 0, depth = 0; 694 struct rt_mutex_base *lock; 695 bool detect_deadlock; 696 bool requeue = true; 697 698 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 699 700 /* 701 * The (de)boosting is a step by step approach with a lot of 702 * pitfalls. We want this to be preemptible and we want hold a 703 * maximum of two locks per step. So we have to check 704 * carefully whether things change under us. 705 */ 706 again: 707 /* 708 * We limit the lock chain length for each invocation. 709 */ 710 if (++depth > max_lock_depth) { 711 static int prev_max; 712 713 /* 714 * Print this only once. If the admin changes the limit, 715 * print a new message when reaching the limit again. 716 */ 717 if (prev_max != max_lock_depth) { 718 prev_max = max_lock_depth; 719 printk(KERN_WARNING "Maximum lock depth %d reached " 720 "task: %s (%d)\n", max_lock_depth, 721 top_task->comm, task_pid_nr(top_task)); 722 } 723 put_task_struct(task); 724 725 return -EDEADLK; 726 } 727 728 /* 729 * We are fully preemptible here and only hold the refcount on 730 * @task. So everything can have changed under us since the 731 * caller or our own code below (goto retry/again) dropped all 732 * locks. 733 */ 734 retry: 735 /* 736 * [1] Task cannot go away as we did a get_task() before ! 737 */ 738 raw_spin_lock_irq(&task->pi_lock); 739 740 /* 741 * [2] Get the waiter on which @task is blocked on. 742 */ 743 waiter = task->pi_blocked_on; 744 745 /* 746 * [3] check_exit_conditions_1() protected by task->pi_lock. 747 */ 748 749 /* 750 * Check whether the end of the boosting chain has been 751 * reached or the state of the chain has changed while we 752 * dropped the locks. 753 */ 754 if (!waiter) 755 goto out_unlock_pi; 756 757 /* 758 * Check the orig_waiter state. After we dropped the locks, 759 * the previous owner of the lock might have released the lock. 760 */ 761 if (orig_waiter && !rt_mutex_owner(orig_lock)) 762 goto out_unlock_pi; 763 764 /* 765 * We dropped all locks after taking a refcount on @task, so 766 * the task might have moved on in the lock chain or even left 767 * the chain completely and blocks now on an unrelated lock or 768 * on @orig_lock. 769 * 770 * We stored the lock on which @task was blocked in @next_lock, 771 * so we can detect the chain change. 772 */ 773 if (next_lock != waiter->lock) 774 goto out_unlock_pi; 775 776 /* 777 * There could be 'spurious' loops in the lock graph due to ww_mutex, 778 * consider: 779 * 780 * P1: A, ww_A, ww_B 781 * P2: ww_B, ww_A 782 * P3: A 783 * 784 * P3 should not return -EDEADLK because it gets trapped in the cycle 785 * created by P1 and P2 (which will resolve -- and runs into 786 * max_lock_depth above). Therefore disable detect_deadlock such that 787 * the below termination condition can trigger once all relevant tasks 788 * are boosted. 789 * 790 * Even when we start with ww_mutex we can disable deadlock detection, 791 * since we would supress a ww_mutex induced deadlock at [6] anyway. 792 * Supressing it here however is not sufficient since we might still 793 * hit [6] due to adjustment driven iteration. 794 * 795 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 796 * utterly fail to report it; lockdep should. 797 */ 798 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 799 detect_deadlock = false; 800 801 /* 802 * Drop out, when the task has no waiters. Note, 803 * top_waiter can be NULL, when we are in the deboosting 804 * mode! 805 */ 806 if (top_waiter) { 807 if (!task_has_pi_waiters(task)) 808 goto out_unlock_pi; 809 /* 810 * If deadlock detection is off, we stop here if we 811 * are not the top pi waiter of the task. If deadlock 812 * detection is enabled we continue, but stop the 813 * requeueing in the chain walk. 814 */ 815 if (top_waiter != task_top_pi_waiter(task)) { 816 if (!detect_deadlock) 817 goto out_unlock_pi; 818 else 819 requeue = false; 820 } 821 } 822 823 /* 824 * If the waiter priority is the same as the task priority 825 * then there is no further priority adjustment necessary. If 826 * deadlock detection is off, we stop the chain walk. If its 827 * enabled we continue, but stop the requeueing in the chain 828 * walk. 829 */ 830 if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { 831 if (!detect_deadlock) 832 goto out_unlock_pi; 833 else 834 requeue = false; 835 } 836 837 /* 838 * [4] Get the next lock; per holding task->pi_lock we can't unblock 839 * and guarantee @lock's existence. 840 */ 841 lock = waiter->lock; 842 /* 843 * [5] We need to trylock here as we are holding task->pi_lock, 844 * which is the reverse lock order versus the other rtmutex 845 * operations. 846 * 847 * Per the above, holding task->pi_lock guarantees lock exists, so 848 * inverting this lock order is infeasible from a life-time 849 * perspective. 850 */ 851 if (!raw_spin_trylock(&lock->wait_lock)) { 852 raw_spin_unlock_irq(&task->pi_lock); 853 cpu_relax(); 854 goto retry; 855 } 856 857 /* 858 * [6] check_exit_conditions_2() protected by task->pi_lock and 859 * lock->wait_lock. 860 * 861 * Deadlock detection. If the lock is the same as the original 862 * lock which caused us to walk the lock chain or if the 863 * current lock is owned by the task which initiated the chain 864 * walk, we detected a deadlock. 865 */ 866 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 867 ret = -EDEADLK; 868 869 /* 870 * When the deadlock is due to ww_mutex; also see above. Don't 871 * report the deadlock and instead let the ww_mutex wound/die 872 * logic pick which of the contending threads gets -EDEADLK. 873 * 874 * NOTE: assumes the cycle only contains a single ww_class; any 875 * other configuration and we fail to report; also, see 876 * lockdep. 877 */ 878 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 879 ret = 0; 880 881 raw_spin_unlock(&lock->wait_lock); 882 goto out_unlock_pi; 883 } 884 885 /* 886 * If we just follow the lock chain for deadlock detection, no 887 * need to do all the requeue operations. To avoid a truckload 888 * of conditionals around the various places below, just do the 889 * minimum chain walk checks. 890 */ 891 if (!requeue) { 892 /* 893 * No requeue[7] here. Just release @task [8] 894 */ 895 raw_spin_unlock(&task->pi_lock); 896 put_task_struct(task); 897 898 /* 899 * [9] check_exit_conditions_3 protected by lock->wait_lock. 900 * If there is no owner of the lock, end of chain. 901 */ 902 if (!rt_mutex_owner(lock)) { 903 raw_spin_unlock_irq(&lock->wait_lock); 904 return 0; 905 } 906 907 /* [10] Grab the next task, i.e. owner of @lock */ 908 task = get_task_struct(rt_mutex_owner(lock)); 909 raw_spin_lock(&task->pi_lock); 910 911 /* 912 * No requeue [11] here. We just do deadlock detection. 913 * 914 * [12] Store whether owner is blocked 915 * itself. Decision is made after dropping the locks 916 */ 917 next_lock = task_blocked_on_lock(task); 918 /* 919 * Get the top waiter for the next iteration 920 */ 921 top_waiter = rt_mutex_top_waiter(lock); 922 923 /* [13] Drop locks */ 924 raw_spin_unlock(&task->pi_lock); 925 raw_spin_unlock_irq(&lock->wait_lock); 926 927 /* If owner is not blocked, end of chain. */ 928 if (!next_lock) 929 goto out_put_task; 930 goto again; 931 } 932 933 /* 934 * Store the current top waiter before doing the requeue 935 * operation on @lock. We need it for the boost/deboost 936 * decision below. 937 */ 938 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 939 940 /* [7] Requeue the waiter in the lock waiter tree. */ 941 rt_mutex_dequeue(lock, waiter); 942 943 /* 944 * Update the waiter prio fields now that we're dequeued. 945 * 946 * These values can have changed through either: 947 * 948 * sys_sched_set_scheduler() / sys_sched_setattr() 949 * 950 * or 951 * 952 * DL CBS enforcement advancing the effective deadline. 953 */ 954 waiter_update_prio(waiter, task); 955 956 rt_mutex_enqueue(lock, waiter); 957 958 /* 959 * [8] Release the (blocking) task in preparation for 960 * taking the owner task in [10]. 961 * 962 * Since we hold lock->waiter_lock, task cannot unblock, even if we 963 * release task->pi_lock. 964 */ 965 raw_spin_unlock(&task->pi_lock); 966 put_task_struct(task); 967 968 /* 969 * [9] check_exit_conditions_3 protected by lock->wait_lock. 970 * 971 * We must abort the chain walk if there is no lock owner even 972 * in the dead lock detection case, as we have nothing to 973 * follow here. This is the end of the chain we are walking. 974 */ 975 if (!rt_mutex_owner(lock)) { 976 /* 977 * If the requeue [7] above changed the top waiter, 978 * then we need to wake the new top waiter up to try 979 * to get the lock. 980 */ 981 top_waiter = rt_mutex_top_waiter(lock); 982 if (prerequeue_top_waiter != top_waiter) 983 wake_up_state(top_waiter->task, top_waiter->wake_state); 984 raw_spin_unlock_irq(&lock->wait_lock); 985 return 0; 986 } 987 988 /* 989 * [10] Grab the next task, i.e. the owner of @lock 990 * 991 * Per holding lock->wait_lock and checking for !owner above, there 992 * must be an owner and it cannot go away. 993 */ 994 task = get_task_struct(rt_mutex_owner(lock)); 995 raw_spin_lock(&task->pi_lock); 996 997 /* [11] requeue the pi waiters if necessary */ 998 if (waiter == rt_mutex_top_waiter(lock)) { 999 /* 1000 * The waiter became the new top (highest priority) 1001 * waiter on the lock. Replace the previous top waiter 1002 * in the owner tasks pi waiters tree with this waiter 1003 * and adjust the priority of the owner. 1004 */ 1005 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 1006 waiter_clone_prio(waiter, task); 1007 rt_mutex_enqueue_pi(task, waiter); 1008 rt_mutex_adjust_prio(lock, task); 1009 1010 } else if (prerequeue_top_waiter == waiter) { 1011 /* 1012 * The waiter was the top waiter on the lock, but is 1013 * no longer the top priority waiter. Replace waiter in 1014 * the owner tasks pi waiters tree with the new top 1015 * (highest priority) waiter and adjust the priority 1016 * of the owner. 1017 * The new top waiter is stored in @waiter so that 1018 * @waiter == @top_waiter evaluates to true below and 1019 * we continue to deboost the rest of the chain. 1020 */ 1021 rt_mutex_dequeue_pi(task, waiter); 1022 waiter = rt_mutex_top_waiter(lock); 1023 waiter_clone_prio(waiter, task); 1024 rt_mutex_enqueue_pi(task, waiter); 1025 rt_mutex_adjust_prio(lock, task); 1026 } else { 1027 /* 1028 * Nothing changed. No need to do any priority 1029 * adjustment. 1030 */ 1031 } 1032 1033 /* 1034 * [12] check_exit_conditions_4() protected by task->pi_lock 1035 * and lock->wait_lock. The actual decisions are made after we 1036 * dropped the locks. 1037 * 1038 * Check whether the task which owns the current lock is pi 1039 * blocked itself. If yes we store a pointer to the lock for 1040 * the lock chain change detection above. After we dropped 1041 * task->pi_lock next_lock cannot be dereferenced anymore. 1042 */ 1043 next_lock = task_blocked_on_lock(task); 1044 /* 1045 * Store the top waiter of @lock for the end of chain walk 1046 * decision below. 1047 */ 1048 top_waiter = rt_mutex_top_waiter(lock); 1049 1050 /* [13] Drop the locks */ 1051 raw_spin_unlock(&task->pi_lock); 1052 raw_spin_unlock_irq(&lock->wait_lock); 1053 1054 /* 1055 * Make the actual exit decisions [12], based on the stored 1056 * values. 1057 * 1058 * We reached the end of the lock chain. Stop right here. No 1059 * point to go back just to figure that out. 1060 */ 1061 if (!next_lock) 1062 goto out_put_task; 1063 1064 /* 1065 * If the current waiter is not the top waiter on the lock, 1066 * then we can stop the chain walk here if we are not in full 1067 * deadlock detection mode. 1068 */ 1069 if (!detect_deadlock && waiter != top_waiter) 1070 goto out_put_task; 1071 1072 goto again; 1073 1074 out_unlock_pi: 1075 raw_spin_unlock_irq(&task->pi_lock); 1076 out_put_task: 1077 put_task_struct(task); 1078 1079 return ret; 1080 } 1081 1082 /* 1083 * Try to take an rt-mutex 1084 * 1085 * Must be called with lock->wait_lock held and interrupts disabled 1086 * 1087 * @lock: The lock to be acquired. 1088 * @task: The task which wants to acquire the lock 1089 * @waiter: The waiter that is queued to the lock's wait tree if the 1090 * callsite called task_blocked_on_lock(), otherwise NULL 1091 */ 1092 static int __sched 1093 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 1094 struct rt_mutex_waiter *waiter) 1095 { 1096 lockdep_assert_held(&lock->wait_lock); 1097 1098 /* 1099 * Before testing whether we can acquire @lock, we set the 1100 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 1101 * other tasks which try to modify @lock into the slow path 1102 * and they serialize on @lock->wait_lock. 1103 * 1104 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 1105 * as explained at the top of this file if and only if: 1106 * 1107 * - There is a lock owner. The caller must fixup the 1108 * transient state if it does a trylock or leaves the lock 1109 * function due to a signal or timeout. 1110 * 1111 * - @task acquires the lock and there are no other 1112 * waiters. This is undone in rt_mutex_set_owner(@task) at 1113 * the end of this function. 1114 */ 1115 mark_rt_mutex_waiters(lock); 1116 1117 /* 1118 * If @lock has an owner, give up. 1119 */ 1120 if (rt_mutex_owner(lock)) 1121 return 0; 1122 1123 /* 1124 * If @waiter != NULL, @task has already enqueued the waiter 1125 * into @lock waiter tree. If @waiter == NULL then this is a 1126 * trylock attempt. 1127 */ 1128 if (waiter) { 1129 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 1130 1131 /* 1132 * If waiter is the highest priority waiter of @lock, 1133 * or allowed to steal it, take it over. 1134 */ 1135 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1136 /* 1137 * We can acquire the lock. Remove the waiter from the 1138 * lock waiters tree. 1139 */ 1140 rt_mutex_dequeue(lock, waiter); 1141 } else { 1142 return 0; 1143 } 1144 } else { 1145 /* 1146 * If the lock has waiters already we check whether @task is 1147 * eligible to take over the lock. 1148 * 1149 * If there are no other waiters, @task can acquire 1150 * the lock. @task->pi_blocked_on is NULL, so it does 1151 * not need to be dequeued. 1152 */ 1153 if (rt_mutex_has_waiters(lock)) { 1154 /* Check whether the trylock can steal it. */ 1155 if (!rt_mutex_steal(task_to_waiter(task), 1156 rt_mutex_top_waiter(lock))) 1157 return 0; 1158 1159 /* 1160 * The current top waiter stays enqueued. We 1161 * don't have to change anything in the lock 1162 * waiters order. 1163 */ 1164 } else { 1165 /* 1166 * No waiters. Take the lock without the 1167 * pi_lock dance.@task->pi_blocked_on is NULL 1168 * and we have no waiters to enqueue in @task 1169 * pi waiters tree. 1170 */ 1171 goto takeit; 1172 } 1173 } 1174 1175 /* 1176 * Clear @task->pi_blocked_on. Requires protection by 1177 * @task->pi_lock. Redundant operation for the @waiter == NULL 1178 * case, but conditionals are more expensive than a redundant 1179 * store. 1180 */ 1181 raw_spin_lock(&task->pi_lock); 1182 task->pi_blocked_on = NULL; 1183 /* 1184 * Finish the lock acquisition. @task is the new owner. If 1185 * other waiters exist we have to insert the highest priority 1186 * waiter into @task->pi_waiters tree. 1187 */ 1188 if (rt_mutex_has_waiters(lock)) 1189 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1190 raw_spin_unlock(&task->pi_lock); 1191 1192 takeit: 1193 /* 1194 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1195 * are still waiters or clears it. 1196 */ 1197 rt_mutex_set_owner(lock, task); 1198 1199 return 1; 1200 } 1201 1202 /* 1203 * Task blocks on lock. 1204 * 1205 * Prepare waiter and propagate pi chain 1206 * 1207 * This must be called with lock->wait_lock held and interrupts disabled 1208 */ 1209 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1210 struct rt_mutex_waiter *waiter, 1211 struct task_struct *task, 1212 struct ww_acquire_ctx *ww_ctx, 1213 enum rtmutex_chainwalk chwalk, 1214 struct wake_q_head *wake_q) 1215 __must_hold(&lock->wait_lock) 1216 { 1217 struct task_struct *owner = rt_mutex_owner(lock); 1218 struct rt_mutex_waiter *top_waiter = waiter; 1219 struct rt_mutex_base *next_lock; 1220 int chain_walk = 0, res; 1221 1222 lockdep_assert_held(&lock->wait_lock); 1223 1224 /* 1225 * Early deadlock detection. We really don't want the task to 1226 * enqueue on itself just to untangle the mess later. It's not 1227 * only an optimization. We drop the locks, so another waiter 1228 * can come in before the chain walk detects the deadlock. So 1229 * the other will detect the deadlock and return -EDEADLOCK, 1230 * which is wrong, as the other waiter is not in a deadlock 1231 * situation. 1232 * 1233 * Except for ww_mutex, in that case the chain walk must already deal 1234 * with spurious cycles, see the comments at [3] and [6]. 1235 */ 1236 if (owner == task && !(build_ww_mutex() && ww_ctx)) 1237 return -EDEADLK; 1238 1239 raw_spin_lock(&task->pi_lock); 1240 waiter->task = task; 1241 waiter->lock = lock; 1242 waiter_update_prio(waiter, task); 1243 waiter_clone_prio(waiter, task); 1244 1245 /* Get the top priority waiter on the lock */ 1246 if (rt_mutex_has_waiters(lock)) 1247 top_waiter = rt_mutex_top_waiter(lock); 1248 rt_mutex_enqueue(lock, waiter); 1249 1250 task->pi_blocked_on = waiter; 1251 1252 raw_spin_unlock(&task->pi_lock); 1253 1254 if (build_ww_mutex() && ww_ctx) { 1255 struct rt_mutex *rtm; 1256 1257 /* Check whether the waiter should back out immediately */ 1258 rtm = container_of(lock, struct rt_mutex, rtmutex); 1259 __assume_ctx_lock(&rtm->rtmutex.wait_lock); 1260 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q); 1261 if (res) { 1262 raw_spin_lock(&task->pi_lock); 1263 rt_mutex_dequeue(lock, waiter); 1264 task->pi_blocked_on = NULL; 1265 raw_spin_unlock(&task->pi_lock); 1266 return res; 1267 } 1268 } 1269 1270 if (!owner) 1271 return 0; 1272 1273 raw_spin_lock(&owner->pi_lock); 1274 if (waiter == rt_mutex_top_waiter(lock)) { 1275 rt_mutex_dequeue_pi(owner, top_waiter); 1276 rt_mutex_enqueue_pi(owner, waiter); 1277 1278 rt_mutex_adjust_prio(lock, owner); 1279 if (owner->pi_blocked_on) 1280 chain_walk = 1; 1281 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1282 chain_walk = 1; 1283 } 1284 1285 /* Store the lock on which owner is blocked or NULL */ 1286 next_lock = task_blocked_on_lock(owner); 1287 1288 raw_spin_unlock(&owner->pi_lock); 1289 /* 1290 * Even if full deadlock detection is on, if the owner is not 1291 * blocked itself, we can avoid finding this out in the chain 1292 * walk. 1293 */ 1294 if (!chain_walk || !next_lock) 1295 return 0; 1296 1297 /* 1298 * The owner can't disappear while holding a lock, 1299 * so the owner struct is protected by wait_lock. 1300 * Gets dropped in rt_mutex_adjust_prio_chain()! 1301 */ 1302 get_task_struct(owner); 1303 1304 raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q); 1305 1306 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1307 next_lock, waiter, task); 1308 1309 raw_spin_lock_irq(&lock->wait_lock); 1310 1311 return res; 1312 } 1313 1314 /* 1315 * Remove the top waiter from the current tasks pi waiter tree and 1316 * queue it up. 1317 * 1318 * Called with lock->wait_lock held and interrupts disabled. 1319 */ 1320 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1321 struct rt_mutex_base *lock) 1322 { 1323 struct rt_mutex_waiter *waiter; 1324 1325 lockdep_assert_held(&lock->wait_lock); 1326 1327 raw_spin_lock(¤t->pi_lock); 1328 1329 waiter = rt_mutex_top_waiter(lock); 1330 1331 /* 1332 * Remove it from current->pi_waiters and deboost. 1333 * 1334 * We must in fact deboost here in order to ensure we call 1335 * rt_mutex_setprio() to update p->pi_top_task before the 1336 * task unblocks. 1337 */ 1338 rt_mutex_dequeue_pi(current, waiter); 1339 rt_mutex_adjust_prio(lock, current); 1340 1341 /* 1342 * As we are waking up the top waiter, and the waiter stays 1343 * queued on the lock until it gets the lock, this lock 1344 * obviously has waiters. Just set the bit here and this has 1345 * the added benefit of forcing all new tasks into the 1346 * slow path making sure no task of lower priority than 1347 * the top waiter can steal this lock. 1348 */ 1349 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1350 1351 /* 1352 * We deboosted before waking the top waiter task such that we don't 1353 * run two tasks with the 'same' priority (and ensure the 1354 * p->pi_top_task pointer points to a blocked task). This however can 1355 * lead to priority inversion if we would get preempted after the 1356 * deboost but before waking our donor task, hence the preempt_disable() 1357 * before unlock. 1358 * 1359 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1360 */ 1361 preempt_disable(); 1362 rt_mutex_wake_q_add(wqh, waiter); 1363 raw_spin_unlock(¤t->pi_lock); 1364 } 1365 1366 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1367 __must_hold(&lock->wait_lock) 1368 { 1369 int ret = try_to_take_rt_mutex(lock, current, NULL); 1370 1371 /* 1372 * try_to_take_rt_mutex() sets the lock waiters bit 1373 * unconditionally. Clean this up. 1374 */ 1375 fixup_rt_mutex_waiters(lock, true); 1376 1377 return ret; 1378 } 1379 1380 /* 1381 * Slow path try-lock function: 1382 */ 1383 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1384 { 1385 unsigned long flags; 1386 int ret; 1387 1388 /* 1389 * If the lock already has an owner we fail to get the lock. 1390 * This can be done without taking the @lock->wait_lock as 1391 * it is only being read, and this is a trylock anyway. 1392 */ 1393 if (rt_mutex_owner(lock)) 1394 return 0; 1395 1396 /* 1397 * The mutex has currently no owner. Lock the wait lock and try to 1398 * acquire the lock. We use irqsave here to support early boot calls. 1399 */ 1400 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1401 1402 ret = __rt_mutex_slowtrylock(lock); 1403 1404 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1405 1406 return ret; 1407 } 1408 1409 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1410 { 1411 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1412 return 1; 1413 1414 return rt_mutex_slowtrylock(lock); 1415 } 1416 1417 /* 1418 * Slow path to release a rt-mutex. 1419 */ 1420 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1421 { 1422 DEFINE_RT_WAKE_Q(wqh); 1423 unsigned long flags; 1424 1425 /* irqsave required to support early boot calls */ 1426 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1427 1428 debug_rt_mutex_unlock(lock); 1429 1430 /* 1431 * We must be careful here if the fast path is enabled. If we 1432 * have no waiters queued we cannot set owner to NULL here 1433 * because of: 1434 * 1435 * foo->lock->owner = NULL; 1436 * rtmutex_lock(foo->lock); <- fast path 1437 * free = atomic_dec_and_test(foo->refcnt); 1438 * rtmutex_unlock(foo->lock); <- fast path 1439 * if (free) 1440 * kfree(foo); 1441 * raw_spin_unlock(foo->lock->wait_lock); 1442 * 1443 * So for the fastpath enabled kernel: 1444 * 1445 * Nothing can set the waiters bit as long as we hold 1446 * lock->wait_lock. So we do the following sequence: 1447 * 1448 * owner = rt_mutex_owner(lock); 1449 * clear_rt_mutex_waiters(lock); 1450 * raw_spin_unlock(&lock->wait_lock); 1451 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1452 * return; 1453 * goto retry; 1454 * 1455 * The fastpath disabled variant is simple as all access to 1456 * lock->owner is serialized by lock->wait_lock: 1457 * 1458 * lock->owner = NULL; 1459 * raw_spin_unlock(&lock->wait_lock); 1460 */ 1461 while (!rt_mutex_has_waiters(lock)) { 1462 /* Drops lock->wait_lock ! */ 1463 if (unlock_rt_mutex_safe(lock, flags) == true) 1464 return; 1465 /* Relock the rtmutex and try again */ 1466 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1467 } 1468 1469 /* 1470 * The wakeup next waiter path does not suffer from the above 1471 * race. See the comments there. 1472 * 1473 * Queue the next waiter for wakeup once we release the wait_lock. 1474 */ 1475 mark_wakeup_next_waiter(&wqh, lock); 1476 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1477 1478 rt_mutex_wake_up_q(&wqh); 1479 } 1480 1481 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1482 { 1483 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1484 return; 1485 1486 rt_mutex_slowunlock(lock); 1487 } 1488 1489 #ifdef CONFIG_SMP 1490 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1491 struct rt_mutex_waiter *waiter, 1492 struct task_struct *owner) 1493 { 1494 bool res = true; 1495 1496 rcu_read_lock(); 1497 for (;;) { 1498 /* If owner changed, trylock again. */ 1499 if (owner != rt_mutex_owner(lock)) 1500 break; 1501 /* 1502 * Ensure that @owner is dereferenced after checking that 1503 * the lock owner still matches @owner. If that fails, 1504 * @owner might point to freed memory. If it still matches, 1505 * the rcu_read_lock() ensures the memory stays valid. 1506 */ 1507 barrier(); 1508 /* 1509 * Stop spinning when: 1510 * - the lock owner has been scheduled out 1511 * - current is not longer the top waiter 1512 * - current is requested to reschedule (redundant 1513 * for CONFIG_PREEMPT_RCU=y) 1514 * - the VCPU on which owner runs is preempted 1515 */ 1516 if (!owner_on_cpu(owner) || need_resched() || 1517 !data_race(rt_mutex_waiter_is_top_waiter(lock, waiter))) { 1518 res = false; 1519 break; 1520 } 1521 cpu_relax(); 1522 } 1523 rcu_read_unlock(); 1524 return res; 1525 } 1526 #else 1527 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1528 struct rt_mutex_waiter *waiter, 1529 struct task_struct *owner) 1530 { 1531 return false; 1532 } 1533 #endif 1534 1535 #ifdef RT_MUTEX_BUILD_MUTEX 1536 /* 1537 * Functions required for: 1538 * - rtmutex, futex on all kernels 1539 * - mutex and rwsem substitutions on RT kernels 1540 */ 1541 1542 /* 1543 * Remove a waiter from a lock and give up 1544 * 1545 * Must be called with lock->wait_lock held and interrupts disabled. It must 1546 * have just failed to try_to_take_rt_mutex(). 1547 */ 1548 static void __sched remove_waiter(struct rt_mutex_base *lock, 1549 struct rt_mutex_waiter *waiter) 1550 __must_hold(&lock->wait_lock) 1551 { 1552 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1553 struct task_struct *owner = rt_mutex_owner(lock); 1554 struct rt_mutex_base *next_lock; 1555 1556 lockdep_assert_held(&lock->wait_lock); 1557 1558 raw_spin_lock(¤t->pi_lock); 1559 rt_mutex_dequeue(lock, waiter); 1560 current->pi_blocked_on = NULL; 1561 raw_spin_unlock(¤t->pi_lock); 1562 1563 /* 1564 * Only update priority if the waiter was the highest priority 1565 * waiter of the lock and there is an owner to update. 1566 */ 1567 if (!owner || !is_top_waiter) 1568 return; 1569 1570 raw_spin_lock(&owner->pi_lock); 1571 1572 rt_mutex_dequeue_pi(owner, waiter); 1573 1574 if (rt_mutex_has_waiters(lock)) 1575 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1576 1577 rt_mutex_adjust_prio(lock, owner); 1578 1579 /* Store the lock on which owner is blocked or NULL */ 1580 next_lock = task_blocked_on_lock(owner); 1581 1582 raw_spin_unlock(&owner->pi_lock); 1583 1584 /* 1585 * Don't walk the chain, if the owner task is not blocked 1586 * itself. 1587 */ 1588 if (!next_lock) 1589 return; 1590 1591 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1592 get_task_struct(owner); 1593 1594 raw_spin_unlock_irq(&lock->wait_lock); 1595 1596 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1597 next_lock, NULL, current); 1598 1599 raw_spin_lock_irq(&lock->wait_lock); 1600 } 1601 1602 /** 1603 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1604 * @lock: the rt_mutex to take 1605 * @ww_ctx: WW mutex context pointer 1606 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1607 * or TASK_UNINTERRUPTIBLE) 1608 * @timeout: the pre-initialized and started timer, or NULL for none 1609 * @waiter: the pre-initialized rt_mutex_waiter 1610 * @wake_q: wake_q of tasks to wake when we drop the lock->wait_lock 1611 * 1612 * Must be called with lock->wait_lock held and interrupts disabled 1613 */ 1614 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1615 struct ww_acquire_ctx *ww_ctx, 1616 unsigned int state, 1617 struct hrtimer_sleeper *timeout, 1618 struct rt_mutex_waiter *waiter, 1619 struct wake_q_head *wake_q) 1620 __releases(&lock->wait_lock) __acquires(&lock->wait_lock) 1621 { 1622 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1623 struct task_struct *owner; 1624 int ret = 0; 1625 1626 __assume_ctx_lock(&rtm->rtmutex.wait_lock); 1627 1628 lockevent_inc(rtmutex_slow_block); 1629 for (;;) { 1630 /* Try to acquire the lock: */ 1631 if (try_to_take_rt_mutex(lock, current, waiter)) { 1632 lockevent_inc(rtmutex_slow_acq3); 1633 break; 1634 } 1635 1636 if (timeout && !timeout->task) { 1637 ret = -ETIMEDOUT; 1638 break; 1639 } 1640 if (signal_pending_state(state, current)) { 1641 ret = -EINTR; 1642 break; 1643 } 1644 1645 if (build_ww_mutex() && ww_ctx) { 1646 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1647 if (ret) 1648 break; 1649 } 1650 1651 if (waiter == rt_mutex_top_waiter(lock)) 1652 owner = rt_mutex_owner(lock); 1653 else 1654 owner = NULL; 1655 raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q); 1656 1657 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) { 1658 lockevent_inc(rtmutex_slow_sleep); 1659 rt_mutex_schedule(); 1660 } 1661 1662 raw_spin_lock_irq(&lock->wait_lock); 1663 set_current_state(state); 1664 } 1665 1666 __set_current_state(TASK_RUNNING); 1667 return ret; 1668 } 1669 1670 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1671 struct rt_mutex_base *lock, 1672 struct rt_mutex_waiter *w) 1673 __must_hold(&lock->wait_lock) 1674 { 1675 /* 1676 * If the result is not -EDEADLOCK or the caller requested 1677 * deadlock detection, nothing to do here. 1678 */ 1679 if (res != -EDEADLOCK || detect_deadlock) 1680 return; 1681 1682 if (build_ww_mutex() && w->ww_ctx) 1683 return; 1684 1685 raw_spin_unlock_irq(&lock->wait_lock); 1686 1687 WARN(1, "rtmutex deadlock detected\n"); 1688 1689 while (1) { 1690 set_current_state(TASK_INTERRUPTIBLE); 1691 rt_mutex_schedule(); 1692 } 1693 } 1694 1695 /** 1696 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1697 * @lock: The rtmutex to block lock 1698 * @ww_ctx: WW mutex context pointer 1699 * @state: The task state for sleeping 1700 * @chwalk: Indicator whether full or partial chainwalk is requested 1701 * @waiter: Initializer waiter for blocking 1702 * @wake_q: The wake_q to wake tasks after we release the wait_lock 1703 */ 1704 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1705 struct ww_acquire_ctx *ww_ctx, 1706 unsigned int state, 1707 enum rtmutex_chainwalk chwalk, 1708 struct rt_mutex_waiter *waiter, 1709 struct wake_q_head *wake_q) 1710 __must_hold(&lock->wait_lock) 1711 { 1712 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1713 struct ww_mutex *ww = ww_container_of(rtm); 1714 int ret; 1715 1716 __assume_ctx_lock(&rtm->rtmutex.wait_lock); 1717 lockdep_assert_held(&lock->wait_lock); 1718 lockevent_inc(rtmutex_slowlock); 1719 1720 /* Try to acquire the lock again: */ 1721 if (try_to_take_rt_mutex(lock, current, NULL)) { 1722 if (build_ww_mutex() && ww_ctx) { 1723 __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); 1724 ww_mutex_lock_acquired(ww, ww_ctx); 1725 } 1726 lockevent_inc(rtmutex_slow_acq1); 1727 return 0; 1728 } 1729 1730 set_current_state(state); 1731 1732 trace_contention_begin(lock, LCB_F_RT); 1733 1734 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q); 1735 if (likely(!ret)) 1736 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter, wake_q); 1737 1738 if (likely(!ret)) { 1739 /* acquired the lock */ 1740 if (build_ww_mutex() && ww_ctx) { 1741 if (!ww_ctx->is_wait_die) 1742 __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); 1743 ww_mutex_lock_acquired(ww, ww_ctx); 1744 } 1745 lockevent_inc(rtmutex_slow_acq2); 1746 } else { 1747 __set_current_state(TASK_RUNNING); 1748 remove_waiter(lock, waiter); 1749 rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); 1750 lockevent_inc(rtmutex_deadlock); 1751 } 1752 1753 /* 1754 * try_to_take_rt_mutex() sets the waiter bit 1755 * unconditionally. We might have to fix that up. 1756 */ 1757 fixup_rt_mutex_waiters(lock, true); 1758 1759 trace_contention_end(lock, ret); 1760 1761 return ret; 1762 } 1763 1764 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1765 struct ww_acquire_ctx *ww_ctx, 1766 unsigned int state, 1767 struct wake_q_head *wake_q) 1768 __must_hold(&lock->wait_lock) 1769 { 1770 struct rt_mutex_waiter waiter; 1771 int ret; 1772 1773 rt_mutex_init_waiter(&waiter); 1774 waiter.ww_ctx = ww_ctx; 1775 1776 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1777 &waiter, wake_q); 1778 1779 debug_rt_mutex_free_waiter(&waiter); 1780 lockevent_cond_inc(rtmutex_slow_wake, !wake_q_empty(wake_q)); 1781 return ret; 1782 } 1783 1784 /* 1785 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1786 * @lock: The rtmutex to block lock 1787 * @ww_ctx: WW mutex context pointer 1788 * @state: The task state for sleeping 1789 */ 1790 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1791 struct ww_acquire_ctx *ww_ctx, 1792 unsigned int state) 1793 { 1794 DEFINE_WAKE_Q(wake_q); 1795 unsigned long flags; 1796 int ret; 1797 1798 /* 1799 * Do all pre-schedule work here, before we queue a waiter and invoke 1800 * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would 1801 * otherwise recurse back into task_blocks_on_rt_mutex() through 1802 * rtlock_slowlock() and will then enqueue a second waiter for this 1803 * same task and things get really confusing real fast. 1804 */ 1805 rt_mutex_pre_schedule(); 1806 1807 /* 1808 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1809 * be called in early boot if the cmpxchg() fast path is disabled 1810 * (debug, no architecture support). In this case we will acquire the 1811 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1812 * enable interrupts in that early boot case. So we need to use the 1813 * irqsave/restore variants. 1814 */ 1815 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1816 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q); 1817 raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); 1818 rt_mutex_post_schedule(); 1819 1820 return ret; 1821 } 1822 1823 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1824 unsigned int state) 1825 { 1826 lockdep_assert(!current->pi_blocked_on); 1827 1828 if (likely(rt_mutex_try_acquire(lock))) 1829 return 0; 1830 1831 return rt_mutex_slowlock(lock, NULL, state); 1832 } 1833 #endif /* RT_MUTEX_BUILD_MUTEX */ 1834 1835 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1836 /* 1837 * Functions required for spin/rw_lock substitution on RT kernels 1838 */ 1839 1840 /** 1841 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1842 * @lock: The underlying RT mutex 1843 * @wake_q: The wake_q to wake tasks after we release the wait_lock 1844 */ 1845 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock, 1846 struct wake_q_head *wake_q) 1847 __releases(&lock->wait_lock) __acquires(&lock->wait_lock) 1848 { 1849 struct rt_mutex_waiter waiter; 1850 struct task_struct *owner; 1851 1852 lockdep_assert_held(&lock->wait_lock); 1853 lockevent_inc(rtlock_slowlock); 1854 1855 if (try_to_take_rt_mutex(lock, current, NULL)) { 1856 lockevent_inc(rtlock_slow_acq1); 1857 return; 1858 } 1859 1860 rt_mutex_init_rtlock_waiter(&waiter); 1861 1862 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1863 current_save_and_set_rtlock_wait_state(); 1864 1865 trace_contention_begin(lock, LCB_F_RT); 1866 1867 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q); 1868 1869 for (;;) { 1870 /* Try to acquire the lock again */ 1871 if (try_to_take_rt_mutex(lock, current, &waiter)) { 1872 lockevent_inc(rtlock_slow_acq2); 1873 break; 1874 } 1875 1876 if (&waiter == rt_mutex_top_waiter(lock)) 1877 owner = rt_mutex_owner(lock); 1878 else 1879 owner = NULL; 1880 raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q); 1881 1882 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) { 1883 lockevent_inc(rtlock_slow_sleep); 1884 schedule_rtlock(); 1885 } 1886 1887 raw_spin_lock_irq(&lock->wait_lock); 1888 set_current_state(TASK_RTLOCK_WAIT); 1889 } 1890 1891 /* Restore the task state */ 1892 current_restore_rtlock_saved_state(); 1893 1894 /* 1895 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1896 * We might have to fix that up: 1897 */ 1898 fixup_rt_mutex_waiters(lock, true); 1899 debug_rt_mutex_free_waiter(&waiter); 1900 1901 trace_contention_end(lock, 0); 1902 lockevent_cond_inc(rtlock_slow_wake, !wake_q_empty(wake_q)); 1903 } 1904 1905 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1906 { 1907 unsigned long flags; 1908 DEFINE_WAKE_Q(wake_q); 1909 1910 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1911 rtlock_slowlock_locked(lock, &wake_q); 1912 raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); 1913 } 1914 1915 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1916