1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include <trace/events/lock.h> 28 29 #include "rtmutex_common.h" 30 31 #ifndef WW_RT 32 # define build_ww_mutex() (false) 33 # define ww_container_of(rtm) NULL 34 35 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 36 struct rt_mutex *lock, 37 struct ww_acquire_ctx *ww_ctx) 38 { 39 return 0; 40 } 41 42 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 43 struct ww_acquire_ctx *ww_ctx) 44 { 45 } 46 47 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 48 struct ww_acquire_ctx *ww_ctx) 49 { 50 } 51 52 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 53 struct rt_mutex_waiter *waiter, 54 struct ww_acquire_ctx *ww_ctx) 55 { 56 return 0; 57 } 58 59 #else 60 # define build_ww_mutex() (true) 61 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 62 # include "ww_mutex.h" 63 #endif 64 65 /* 66 * lock->owner state tracking: 67 * 68 * lock->owner holds the task_struct pointer of the owner. Bit 0 69 * is used to keep track of the "lock has waiters" state. 70 * 71 * owner bit0 72 * NULL 0 lock is free (fast acquire possible) 73 * NULL 1 lock is free and has waiters and the top waiter 74 * is going to take the lock* 75 * taskpointer 0 lock is held (fast release possible) 76 * taskpointer 1 lock is held and has waiters** 77 * 78 * The fast atomic compare exchange based acquire and release is only 79 * possible when bit 0 of lock->owner is 0. 80 * 81 * (*) It also can be a transitional state when grabbing the lock 82 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 83 * we need to set the bit0 before looking at the lock, and the owner may be 84 * NULL in this small time, hence this can be a transitional state. 85 * 86 * (**) There is a small time when bit 0 is set but there are no 87 * waiters. This can happen when grabbing the lock in the slow path. 88 * To prevent a cmpxchg of the owner releasing the lock, we need to 89 * set this bit before looking at the lock. 90 */ 91 92 static __always_inline struct task_struct * 93 rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) 94 { 95 unsigned long val = (unsigned long)owner; 96 97 if (rt_mutex_has_waiters(lock)) 98 val |= RT_MUTEX_HAS_WAITERS; 99 100 return (struct task_struct *)val; 101 } 102 103 static __always_inline void 104 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 105 { 106 /* 107 * lock->wait_lock is held but explicit acquire semantics are needed 108 * for a new lock owner so WRITE_ONCE is insufficient. 109 */ 110 xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); 111 } 112 113 static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) 114 { 115 /* lock->wait_lock is held so the unlock provides release semantics. */ 116 WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); 117 } 118 119 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 120 { 121 lock->owner = (struct task_struct *) 122 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 123 } 124 125 static __always_inline void 126 fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) 127 { 128 unsigned long owner, *p = (unsigned long *) &lock->owner; 129 130 if (rt_mutex_has_waiters(lock)) 131 return; 132 133 /* 134 * The rbtree has no waiters enqueued, now make sure that the 135 * lock->owner still has the waiters bit set, otherwise the 136 * following can happen: 137 * 138 * CPU 0 CPU 1 CPU2 139 * l->owner=T1 140 * rt_mutex_lock(l) 141 * lock(l->lock) 142 * l->owner = T1 | HAS_WAITERS; 143 * enqueue(T2) 144 * boost() 145 * unlock(l->lock) 146 * block() 147 * 148 * rt_mutex_lock(l) 149 * lock(l->lock) 150 * l->owner = T1 | HAS_WAITERS; 151 * enqueue(T3) 152 * boost() 153 * unlock(l->lock) 154 * block() 155 * signal(->T2) signal(->T3) 156 * lock(l->lock) 157 * dequeue(T2) 158 * deboost() 159 * unlock(l->lock) 160 * lock(l->lock) 161 * dequeue(T3) 162 * ==> wait list is empty 163 * deboost() 164 * unlock(l->lock) 165 * lock(l->lock) 166 * fixup_rt_mutex_waiters() 167 * if (wait_list_empty(l) { 168 * l->owner = owner 169 * owner = l->owner & ~HAS_WAITERS; 170 * ==> l->owner = T1 171 * } 172 * lock(l->lock) 173 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 174 * if (wait_list_empty(l) { 175 * owner = l->owner & ~HAS_WAITERS; 176 * cmpxchg(l->owner, T1, NULL) 177 * ===> Success (l->owner = NULL) 178 * 179 * l->owner = owner 180 * ==> l->owner = T1 181 * } 182 * 183 * With the check for the waiter bit in place T3 on CPU2 will not 184 * overwrite. All tasks fiddling with the waiters bit are 185 * serialized by l->lock, so nothing else can modify the waiters 186 * bit. If the bit is set then nothing can change l->owner either 187 * so the simple RMW is safe. The cmpxchg() will simply fail if it 188 * happens in the middle of the RMW because the waiters bit is 189 * still set. 190 */ 191 owner = READ_ONCE(*p); 192 if (owner & RT_MUTEX_HAS_WAITERS) { 193 /* 194 * See rt_mutex_set_owner() and rt_mutex_clear_owner() on 195 * why xchg_acquire() is used for updating owner for 196 * locking and WRITE_ONCE() for unlocking. 197 * 198 * WRITE_ONCE() would work for the acquire case too, but 199 * in case that the lock acquisition failed it might 200 * force other lockers into the slow path unnecessarily. 201 */ 202 if (acquire_lock) 203 xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS); 204 else 205 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 206 } 207 } 208 209 /* 210 * We can speed up the acquire/release, if there's no debugging state to be 211 * set up. 212 */ 213 #ifndef CONFIG_DEBUG_RT_MUTEXES 214 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 215 struct task_struct *old, 216 struct task_struct *new) 217 { 218 return try_cmpxchg_acquire(&lock->owner, &old, new); 219 } 220 221 static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) 222 { 223 return rt_mutex_cmpxchg_acquire(lock, NULL, current); 224 } 225 226 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 227 struct task_struct *old, 228 struct task_struct *new) 229 { 230 return try_cmpxchg_release(&lock->owner, &old, new); 231 } 232 233 /* 234 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 235 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 236 * relaxed semantics suffice. 237 */ 238 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 239 { 240 unsigned long *p = (unsigned long *) &lock->owner; 241 unsigned long owner, new; 242 243 owner = READ_ONCE(*p); 244 do { 245 new = owner | RT_MUTEX_HAS_WAITERS; 246 } while (!try_cmpxchg_relaxed(p, &owner, new)); 247 248 /* 249 * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE 250 * operations in the event of contention. Ensure the successful 251 * cmpxchg is visible. 252 */ 253 smp_mb__after_atomic(); 254 } 255 256 /* 257 * Safe fastpath aware unlock: 258 * 1) Clear the waiters bit 259 * 2) Drop lock->wait_lock 260 * 3) Try to unlock the lock with cmpxchg 261 */ 262 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 263 unsigned long flags) 264 __releases(lock->wait_lock) 265 { 266 struct task_struct *owner = rt_mutex_owner(lock); 267 268 clear_rt_mutex_waiters(lock); 269 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 270 /* 271 * If a new waiter comes in between the unlock and the cmpxchg 272 * we have two situations: 273 * 274 * unlock(wait_lock); 275 * lock(wait_lock); 276 * cmpxchg(p, owner, 0) == owner 277 * mark_rt_mutex_waiters(lock); 278 * acquire(lock); 279 * or: 280 * 281 * unlock(wait_lock); 282 * lock(wait_lock); 283 * mark_rt_mutex_waiters(lock); 284 * 285 * cmpxchg(p, owner, 0) != owner 286 * enqueue_waiter(); 287 * unlock(wait_lock); 288 * lock(wait_lock); 289 * wake waiter(); 290 * unlock(wait_lock); 291 * lock(wait_lock); 292 * acquire(lock); 293 */ 294 return rt_mutex_cmpxchg_release(lock, owner, NULL); 295 } 296 297 #else 298 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 299 struct task_struct *old, 300 struct task_struct *new) 301 { 302 return false; 303 304 } 305 306 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); 307 308 static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) 309 { 310 /* 311 * With debug enabled rt_mutex_cmpxchg trylock() will always fail. 312 * 313 * Avoid unconditionally taking the slow path by using 314 * rt_mutex_slow_trylock() which is covered by the debug code and can 315 * acquire a non-contended rtmutex. 316 */ 317 return rt_mutex_slowtrylock(lock); 318 } 319 320 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 321 struct task_struct *old, 322 struct task_struct *new) 323 { 324 return false; 325 } 326 327 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 328 { 329 lock->owner = (struct task_struct *) 330 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 331 } 332 333 /* 334 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 335 */ 336 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 337 unsigned long flags) 338 __releases(lock->wait_lock) 339 { 340 lock->owner = NULL; 341 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 342 return true; 343 } 344 #endif 345 346 static __always_inline int __waiter_prio(struct task_struct *task) 347 { 348 int prio = task->prio; 349 350 if (!rt_or_dl_prio(prio)) 351 return DEFAULT_PRIO; 352 353 return prio; 354 } 355 356 /* 357 * Update the waiter->tree copy of the sort keys. 358 */ 359 static __always_inline void 360 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 361 { 362 lockdep_assert_held(&waiter->lock->wait_lock); 363 lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry)); 364 365 waiter->tree.prio = __waiter_prio(task); 366 waiter->tree.deadline = task->dl.deadline; 367 } 368 369 /* 370 * Update the waiter->pi_tree copy of the sort keys (from the tree copy). 371 */ 372 static __always_inline void 373 waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 374 { 375 lockdep_assert_held(&waiter->lock->wait_lock); 376 lockdep_assert_held(&task->pi_lock); 377 lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry)); 378 379 waiter->pi_tree.prio = waiter->tree.prio; 380 waiter->pi_tree.deadline = waiter->tree.deadline; 381 } 382 383 /* 384 * Only use with rt_waiter_node_{less,equal}() 385 */ 386 #define task_to_waiter_node(p) \ 387 &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 388 #define task_to_waiter(p) \ 389 &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) } 390 391 static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left, 392 struct rt_waiter_node *right) 393 { 394 if (left->prio < right->prio) 395 return 1; 396 397 /* 398 * If both waiters have dl_prio(), we check the deadlines of the 399 * associated tasks. 400 * If left waiter has a dl_prio(), and we didn't return 1 above, 401 * then right waiter has a dl_prio() too. 402 */ 403 if (dl_prio(left->prio)) 404 return dl_time_before(left->deadline, right->deadline); 405 406 return 0; 407 } 408 409 static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left, 410 struct rt_waiter_node *right) 411 { 412 if (left->prio != right->prio) 413 return 0; 414 415 /* 416 * If both waiters have dl_prio(), we check the deadlines of the 417 * associated tasks. 418 * If left waiter has a dl_prio(), and we didn't return 0 above, 419 * then right waiter has a dl_prio() too. 420 */ 421 if (dl_prio(left->prio)) 422 return left->deadline == right->deadline; 423 424 return 1; 425 } 426 427 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 428 struct rt_mutex_waiter *top_waiter) 429 { 430 if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree)) 431 return true; 432 433 #ifdef RT_MUTEX_BUILD_SPINLOCKS 434 /* 435 * Note that RT tasks are excluded from same priority (lateral) 436 * steals to prevent the introduction of an unbounded latency. 437 */ 438 if (rt_or_dl_prio(waiter->tree.prio)) 439 return false; 440 441 return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); 442 #else 443 return false; 444 #endif 445 } 446 447 #define __node_2_waiter(node) \ 448 rb_entry((node), struct rt_mutex_waiter, tree.entry) 449 450 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 451 { 452 struct rt_mutex_waiter *aw = __node_2_waiter(a); 453 struct rt_mutex_waiter *bw = __node_2_waiter(b); 454 455 if (rt_waiter_node_less(&aw->tree, &bw->tree)) 456 return 1; 457 458 if (!build_ww_mutex()) 459 return 0; 460 461 if (rt_waiter_node_less(&bw->tree, &aw->tree)) 462 return 0; 463 464 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 465 if (aw->ww_ctx) { 466 if (!bw->ww_ctx) 467 return 1; 468 469 return (signed long)(aw->ww_ctx->stamp - 470 bw->ww_ctx->stamp) < 0; 471 } 472 473 return 0; 474 } 475 476 static __always_inline void 477 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 478 { 479 lockdep_assert_held(&lock->wait_lock); 480 481 rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less); 482 } 483 484 static __always_inline void 485 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 486 { 487 lockdep_assert_held(&lock->wait_lock); 488 489 if (RB_EMPTY_NODE(&waiter->tree.entry)) 490 return; 491 492 rb_erase_cached(&waiter->tree.entry, &lock->waiters); 493 RB_CLEAR_NODE(&waiter->tree.entry); 494 } 495 496 #define __node_2_rt_node(node) \ 497 rb_entry((node), struct rt_waiter_node, entry) 498 499 static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 500 { 501 return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b)); 502 } 503 504 static __always_inline void 505 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 506 { 507 lockdep_assert_held(&task->pi_lock); 508 509 rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less); 510 } 511 512 static __always_inline void 513 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 514 { 515 lockdep_assert_held(&task->pi_lock); 516 517 if (RB_EMPTY_NODE(&waiter->pi_tree.entry)) 518 return; 519 520 rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters); 521 RB_CLEAR_NODE(&waiter->pi_tree.entry); 522 } 523 524 static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock, 525 struct task_struct *p) 526 { 527 struct task_struct *pi_task = NULL; 528 529 lockdep_assert_held(&lock->wait_lock); 530 lockdep_assert(rt_mutex_owner(lock) == p); 531 lockdep_assert_held(&p->pi_lock); 532 533 if (task_has_pi_waiters(p)) 534 pi_task = task_top_pi_waiter(p)->task; 535 536 rt_mutex_setprio(p, pi_task); 537 } 538 539 /* RT mutex specific wake_q wrappers */ 540 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, 541 struct task_struct *task, 542 unsigned int wake_state) 543 { 544 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { 545 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 546 WARN_ON_ONCE(wqh->rtlock_task); 547 get_task_struct(task); 548 wqh->rtlock_task = task; 549 } else { 550 wake_q_add(&wqh->head, task); 551 } 552 } 553 554 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 555 struct rt_mutex_waiter *w) 556 { 557 rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); 558 } 559 560 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 561 { 562 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 563 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 564 put_task_struct(wqh->rtlock_task); 565 wqh->rtlock_task = NULL; 566 } 567 568 if (!wake_q_empty(&wqh->head)) 569 wake_up_q(&wqh->head); 570 571 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 572 preempt_enable(); 573 } 574 575 /* 576 * Deadlock detection is conditional: 577 * 578 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 579 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 580 * 581 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 582 * conducted independent of the detect argument. 583 * 584 * If the waiter argument is NULL this indicates the deboost path and 585 * deadlock detection is disabled independent of the detect argument 586 * and the config settings. 587 */ 588 static __always_inline bool 589 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 590 enum rtmutex_chainwalk chwalk) 591 { 592 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 593 return waiter != NULL; 594 return chwalk == RT_MUTEX_FULL_CHAINWALK; 595 } 596 597 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 598 { 599 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 600 } 601 602 /* 603 * Adjust the priority chain. Also used for deadlock detection. 604 * Decreases task's usage by one - may thus free the task. 605 * 606 * @task: the task owning the mutex (owner) for which a chain walk is 607 * probably needed 608 * @chwalk: do we have to carry out deadlock detection? 609 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 610 * things for a task that has just got its priority adjusted, and 611 * is waiting on a mutex) 612 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 613 * we dropped its pi_lock. Is never dereferenced, only used for 614 * comparison to detect lock chain changes. 615 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 616 * its priority to the mutex owner (can be NULL in the case 617 * depicted above or if the top waiter is gone away and we are 618 * actually deboosting the owner) 619 * @top_task: the current top waiter 620 * 621 * Returns 0 or -EDEADLK. 622 * 623 * Chain walk basics and protection scope 624 * 625 * [R] refcount on task 626 * [Pn] task->pi_lock held 627 * [L] rtmutex->wait_lock held 628 * 629 * Normal locking order: 630 * 631 * rtmutex->wait_lock 632 * task->pi_lock 633 * 634 * Step Description Protected by 635 * function arguments: 636 * @task [R] 637 * @orig_lock if != NULL @top_task is blocked on it 638 * @next_lock Unprotected. Cannot be 639 * dereferenced. Only used for 640 * comparison. 641 * @orig_waiter if != NULL @top_task is blocked on it 642 * @top_task current, or in case of proxy 643 * locking protected by calling 644 * code 645 * again: 646 * loop_sanity_check(); 647 * retry: 648 * [1] lock(task->pi_lock); [R] acquire [P1] 649 * [2] waiter = task->pi_blocked_on; [P1] 650 * [3] check_exit_conditions_1(); [P1] 651 * [4] lock = waiter->lock; [P1] 652 * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L] 653 * unlock(task->pi_lock); release [P1] 654 * goto retry; 655 * } 656 * [6] check_exit_conditions_2(); [P1] + [L] 657 * [7] requeue_lock_waiter(lock, waiter); [P1] + [L] 658 * [8] unlock(task->pi_lock); release [P1] 659 * put_task_struct(task); release [R] 660 * [9] check_exit_conditions_3(); [L] 661 * [10] task = owner(lock); [L] 662 * get_task_struct(task); [L] acquire [R] 663 * lock(task->pi_lock); [L] acquire [P2] 664 * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L] 665 * [12] check_exit_conditions_4(); [P2] + [L] 666 * [13] unlock(task->pi_lock); release [P2] 667 * unlock(lock->wait_lock); release [L] 668 * goto again; 669 * 670 * Where P1 is the blocking task and P2 is the lock owner; going up one step 671 * the owner becomes the next blocked task etc.. 672 * 673 * 674 */ 675 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 676 enum rtmutex_chainwalk chwalk, 677 struct rt_mutex_base *orig_lock, 678 struct rt_mutex_base *next_lock, 679 struct rt_mutex_waiter *orig_waiter, 680 struct task_struct *top_task) 681 { 682 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 683 struct rt_mutex_waiter *prerequeue_top_waiter; 684 int ret = 0, depth = 0; 685 struct rt_mutex_base *lock; 686 bool detect_deadlock; 687 bool requeue = true; 688 689 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 690 691 /* 692 * The (de)boosting is a step by step approach with a lot of 693 * pitfalls. We want this to be preemptible and we want hold a 694 * maximum of two locks per step. So we have to check 695 * carefully whether things change under us. 696 */ 697 again: 698 /* 699 * We limit the lock chain length for each invocation. 700 */ 701 if (++depth > max_lock_depth) { 702 static int prev_max; 703 704 /* 705 * Print this only once. If the admin changes the limit, 706 * print a new message when reaching the limit again. 707 */ 708 if (prev_max != max_lock_depth) { 709 prev_max = max_lock_depth; 710 printk(KERN_WARNING "Maximum lock depth %d reached " 711 "task: %s (%d)\n", max_lock_depth, 712 top_task->comm, task_pid_nr(top_task)); 713 } 714 put_task_struct(task); 715 716 return -EDEADLK; 717 } 718 719 /* 720 * We are fully preemptible here and only hold the refcount on 721 * @task. So everything can have changed under us since the 722 * caller or our own code below (goto retry/again) dropped all 723 * locks. 724 */ 725 retry: 726 /* 727 * [1] Task cannot go away as we did a get_task() before ! 728 */ 729 raw_spin_lock_irq(&task->pi_lock); 730 731 /* 732 * [2] Get the waiter on which @task is blocked on. 733 */ 734 waiter = task->pi_blocked_on; 735 736 /* 737 * [3] check_exit_conditions_1() protected by task->pi_lock. 738 */ 739 740 /* 741 * Check whether the end of the boosting chain has been 742 * reached or the state of the chain has changed while we 743 * dropped the locks. 744 */ 745 if (!waiter) 746 goto out_unlock_pi; 747 748 /* 749 * Check the orig_waiter state. After we dropped the locks, 750 * the previous owner of the lock might have released the lock. 751 */ 752 if (orig_waiter && !rt_mutex_owner(orig_lock)) 753 goto out_unlock_pi; 754 755 /* 756 * We dropped all locks after taking a refcount on @task, so 757 * the task might have moved on in the lock chain or even left 758 * the chain completely and blocks now on an unrelated lock or 759 * on @orig_lock. 760 * 761 * We stored the lock on which @task was blocked in @next_lock, 762 * so we can detect the chain change. 763 */ 764 if (next_lock != waiter->lock) 765 goto out_unlock_pi; 766 767 /* 768 * There could be 'spurious' loops in the lock graph due to ww_mutex, 769 * consider: 770 * 771 * P1: A, ww_A, ww_B 772 * P2: ww_B, ww_A 773 * P3: A 774 * 775 * P3 should not return -EDEADLK because it gets trapped in the cycle 776 * created by P1 and P2 (which will resolve -- and runs into 777 * max_lock_depth above). Therefore disable detect_deadlock such that 778 * the below termination condition can trigger once all relevant tasks 779 * are boosted. 780 * 781 * Even when we start with ww_mutex we can disable deadlock detection, 782 * since we would supress a ww_mutex induced deadlock at [6] anyway. 783 * Supressing it here however is not sufficient since we might still 784 * hit [6] due to adjustment driven iteration. 785 * 786 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 787 * utterly fail to report it; lockdep should. 788 */ 789 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 790 detect_deadlock = false; 791 792 /* 793 * Drop out, when the task has no waiters. Note, 794 * top_waiter can be NULL, when we are in the deboosting 795 * mode! 796 */ 797 if (top_waiter) { 798 if (!task_has_pi_waiters(task)) 799 goto out_unlock_pi; 800 /* 801 * If deadlock detection is off, we stop here if we 802 * are not the top pi waiter of the task. If deadlock 803 * detection is enabled we continue, but stop the 804 * requeueing in the chain walk. 805 */ 806 if (top_waiter != task_top_pi_waiter(task)) { 807 if (!detect_deadlock) 808 goto out_unlock_pi; 809 else 810 requeue = false; 811 } 812 } 813 814 /* 815 * If the waiter priority is the same as the task priority 816 * then there is no further priority adjustment necessary. If 817 * deadlock detection is off, we stop the chain walk. If its 818 * enabled we continue, but stop the requeueing in the chain 819 * walk. 820 */ 821 if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { 822 if (!detect_deadlock) 823 goto out_unlock_pi; 824 else 825 requeue = false; 826 } 827 828 /* 829 * [4] Get the next lock; per holding task->pi_lock we can't unblock 830 * and guarantee @lock's existence. 831 */ 832 lock = waiter->lock; 833 /* 834 * [5] We need to trylock here as we are holding task->pi_lock, 835 * which is the reverse lock order versus the other rtmutex 836 * operations. 837 * 838 * Per the above, holding task->pi_lock guarantees lock exists, so 839 * inverting this lock order is infeasible from a life-time 840 * perspective. 841 */ 842 if (!raw_spin_trylock(&lock->wait_lock)) { 843 raw_spin_unlock_irq(&task->pi_lock); 844 cpu_relax(); 845 goto retry; 846 } 847 848 /* 849 * [6] check_exit_conditions_2() protected by task->pi_lock and 850 * lock->wait_lock. 851 * 852 * Deadlock detection. If the lock is the same as the original 853 * lock which caused us to walk the lock chain or if the 854 * current lock is owned by the task which initiated the chain 855 * walk, we detected a deadlock. 856 */ 857 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 858 ret = -EDEADLK; 859 860 /* 861 * When the deadlock is due to ww_mutex; also see above. Don't 862 * report the deadlock and instead let the ww_mutex wound/die 863 * logic pick which of the contending threads gets -EDEADLK. 864 * 865 * NOTE: assumes the cycle only contains a single ww_class; any 866 * other configuration and we fail to report; also, see 867 * lockdep. 868 */ 869 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 870 ret = 0; 871 872 raw_spin_unlock(&lock->wait_lock); 873 goto out_unlock_pi; 874 } 875 876 /* 877 * If we just follow the lock chain for deadlock detection, no 878 * need to do all the requeue operations. To avoid a truckload 879 * of conditionals around the various places below, just do the 880 * minimum chain walk checks. 881 */ 882 if (!requeue) { 883 /* 884 * No requeue[7] here. Just release @task [8] 885 */ 886 raw_spin_unlock(&task->pi_lock); 887 put_task_struct(task); 888 889 /* 890 * [9] check_exit_conditions_3 protected by lock->wait_lock. 891 * If there is no owner of the lock, end of chain. 892 */ 893 if (!rt_mutex_owner(lock)) { 894 raw_spin_unlock_irq(&lock->wait_lock); 895 return 0; 896 } 897 898 /* [10] Grab the next task, i.e. owner of @lock */ 899 task = get_task_struct(rt_mutex_owner(lock)); 900 raw_spin_lock(&task->pi_lock); 901 902 /* 903 * No requeue [11] here. We just do deadlock detection. 904 * 905 * [12] Store whether owner is blocked 906 * itself. Decision is made after dropping the locks 907 */ 908 next_lock = task_blocked_on_lock(task); 909 /* 910 * Get the top waiter for the next iteration 911 */ 912 top_waiter = rt_mutex_top_waiter(lock); 913 914 /* [13] Drop locks */ 915 raw_spin_unlock(&task->pi_lock); 916 raw_spin_unlock_irq(&lock->wait_lock); 917 918 /* If owner is not blocked, end of chain. */ 919 if (!next_lock) 920 goto out_put_task; 921 goto again; 922 } 923 924 /* 925 * Store the current top waiter before doing the requeue 926 * operation on @lock. We need it for the boost/deboost 927 * decision below. 928 */ 929 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 930 931 /* [7] Requeue the waiter in the lock waiter tree. */ 932 rt_mutex_dequeue(lock, waiter); 933 934 /* 935 * Update the waiter prio fields now that we're dequeued. 936 * 937 * These values can have changed through either: 938 * 939 * sys_sched_set_scheduler() / sys_sched_setattr() 940 * 941 * or 942 * 943 * DL CBS enforcement advancing the effective deadline. 944 */ 945 waiter_update_prio(waiter, task); 946 947 rt_mutex_enqueue(lock, waiter); 948 949 /* 950 * [8] Release the (blocking) task in preparation for 951 * taking the owner task in [10]. 952 * 953 * Since we hold lock->waiter_lock, task cannot unblock, even if we 954 * release task->pi_lock. 955 */ 956 raw_spin_unlock(&task->pi_lock); 957 put_task_struct(task); 958 959 /* 960 * [9] check_exit_conditions_3 protected by lock->wait_lock. 961 * 962 * We must abort the chain walk if there is no lock owner even 963 * in the dead lock detection case, as we have nothing to 964 * follow here. This is the end of the chain we are walking. 965 */ 966 if (!rt_mutex_owner(lock)) { 967 /* 968 * If the requeue [7] above changed the top waiter, 969 * then we need to wake the new top waiter up to try 970 * to get the lock. 971 */ 972 top_waiter = rt_mutex_top_waiter(lock); 973 if (prerequeue_top_waiter != top_waiter) 974 wake_up_state(top_waiter->task, top_waiter->wake_state); 975 raw_spin_unlock_irq(&lock->wait_lock); 976 return 0; 977 } 978 979 /* 980 * [10] Grab the next task, i.e. the owner of @lock 981 * 982 * Per holding lock->wait_lock and checking for !owner above, there 983 * must be an owner and it cannot go away. 984 */ 985 task = get_task_struct(rt_mutex_owner(lock)); 986 raw_spin_lock(&task->pi_lock); 987 988 /* [11] requeue the pi waiters if necessary */ 989 if (waiter == rt_mutex_top_waiter(lock)) { 990 /* 991 * The waiter became the new top (highest priority) 992 * waiter on the lock. Replace the previous top waiter 993 * in the owner tasks pi waiters tree with this waiter 994 * and adjust the priority of the owner. 995 */ 996 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 997 waiter_clone_prio(waiter, task); 998 rt_mutex_enqueue_pi(task, waiter); 999 rt_mutex_adjust_prio(lock, task); 1000 1001 } else if (prerequeue_top_waiter == waiter) { 1002 /* 1003 * The waiter was the top waiter on the lock, but is 1004 * no longer the top priority waiter. Replace waiter in 1005 * the owner tasks pi waiters tree with the new top 1006 * (highest priority) waiter and adjust the priority 1007 * of the owner. 1008 * The new top waiter is stored in @waiter so that 1009 * @waiter == @top_waiter evaluates to true below and 1010 * we continue to deboost the rest of the chain. 1011 */ 1012 rt_mutex_dequeue_pi(task, waiter); 1013 waiter = rt_mutex_top_waiter(lock); 1014 waiter_clone_prio(waiter, task); 1015 rt_mutex_enqueue_pi(task, waiter); 1016 rt_mutex_adjust_prio(lock, task); 1017 } else { 1018 /* 1019 * Nothing changed. No need to do any priority 1020 * adjustment. 1021 */ 1022 } 1023 1024 /* 1025 * [12] check_exit_conditions_4() protected by task->pi_lock 1026 * and lock->wait_lock. The actual decisions are made after we 1027 * dropped the locks. 1028 * 1029 * Check whether the task which owns the current lock is pi 1030 * blocked itself. If yes we store a pointer to the lock for 1031 * the lock chain change detection above. After we dropped 1032 * task->pi_lock next_lock cannot be dereferenced anymore. 1033 */ 1034 next_lock = task_blocked_on_lock(task); 1035 /* 1036 * Store the top waiter of @lock for the end of chain walk 1037 * decision below. 1038 */ 1039 top_waiter = rt_mutex_top_waiter(lock); 1040 1041 /* [13] Drop the locks */ 1042 raw_spin_unlock(&task->pi_lock); 1043 raw_spin_unlock_irq(&lock->wait_lock); 1044 1045 /* 1046 * Make the actual exit decisions [12], based on the stored 1047 * values. 1048 * 1049 * We reached the end of the lock chain. Stop right here. No 1050 * point to go back just to figure that out. 1051 */ 1052 if (!next_lock) 1053 goto out_put_task; 1054 1055 /* 1056 * If the current waiter is not the top waiter on the lock, 1057 * then we can stop the chain walk here if we are not in full 1058 * deadlock detection mode. 1059 */ 1060 if (!detect_deadlock && waiter != top_waiter) 1061 goto out_put_task; 1062 1063 goto again; 1064 1065 out_unlock_pi: 1066 raw_spin_unlock_irq(&task->pi_lock); 1067 out_put_task: 1068 put_task_struct(task); 1069 1070 return ret; 1071 } 1072 1073 /* 1074 * Try to take an rt-mutex 1075 * 1076 * Must be called with lock->wait_lock held and interrupts disabled 1077 * 1078 * @lock: The lock to be acquired. 1079 * @task: The task which wants to acquire the lock 1080 * @waiter: The waiter that is queued to the lock's wait tree if the 1081 * callsite called task_blocked_on_lock(), otherwise NULL 1082 */ 1083 static int __sched 1084 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 1085 struct rt_mutex_waiter *waiter) 1086 { 1087 lockdep_assert_held(&lock->wait_lock); 1088 1089 /* 1090 * Before testing whether we can acquire @lock, we set the 1091 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 1092 * other tasks which try to modify @lock into the slow path 1093 * and they serialize on @lock->wait_lock. 1094 * 1095 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 1096 * as explained at the top of this file if and only if: 1097 * 1098 * - There is a lock owner. The caller must fixup the 1099 * transient state if it does a trylock or leaves the lock 1100 * function due to a signal or timeout. 1101 * 1102 * - @task acquires the lock and there are no other 1103 * waiters. This is undone in rt_mutex_set_owner(@task) at 1104 * the end of this function. 1105 */ 1106 mark_rt_mutex_waiters(lock); 1107 1108 /* 1109 * If @lock has an owner, give up. 1110 */ 1111 if (rt_mutex_owner(lock)) 1112 return 0; 1113 1114 /* 1115 * If @waiter != NULL, @task has already enqueued the waiter 1116 * into @lock waiter tree. If @waiter == NULL then this is a 1117 * trylock attempt. 1118 */ 1119 if (waiter) { 1120 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 1121 1122 /* 1123 * If waiter is the highest priority waiter of @lock, 1124 * or allowed to steal it, take it over. 1125 */ 1126 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1127 /* 1128 * We can acquire the lock. Remove the waiter from the 1129 * lock waiters tree. 1130 */ 1131 rt_mutex_dequeue(lock, waiter); 1132 } else { 1133 return 0; 1134 } 1135 } else { 1136 /* 1137 * If the lock has waiters already we check whether @task is 1138 * eligible to take over the lock. 1139 * 1140 * If there are no other waiters, @task can acquire 1141 * the lock. @task->pi_blocked_on is NULL, so it does 1142 * not need to be dequeued. 1143 */ 1144 if (rt_mutex_has_waiters(lock)) { 1145 /* Check whether the trylock can steal it. */ 1146 if (!rt_mutex_steal(task_to_waiter(task), 1147 rt_mutex_top_waiter(lock))) 1148 return 0; 1149 1150 /* 1151 * The current top waiter stays enqueued. We 1152 * don't have to change anything in the lock 1153 * waiters order. 1154 */ 1155 } else { 1156 /* 1157 * No waiters. Take the lock without the 1158 * pi_lock dance.@task->pi_blocked_on is NULL 1159 * and we have no waiters to enqueue in @task 1160 * pi waiters tree. 1161 */ 1162 goto takeit; 1163 } 1164 } 1165 1166 /* 1167 * Clear @task->pi_blocked_on. Requires protection by 1168 * @task->pi_lock. Redundant operation for the @waiter == NULL 1169 * case, but conditionals are more expensive than a redundant 1170 * store. 1171 */ 1172 raw_spin_lock(&task->pi_lock); 1173 task->pi_blocked_on = NULL; 1174 /* 1175 * Finish the lock acquisition. @task is the new owner. If 1176 * other waiters exist we have to insert the highest priority 1177 * waiter into @task->pi_waiters tree. 1178 */ 1179 if (rt_mutex_has_waiters(lock)) 1180 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1181 raw_spin_unlock(&task->pi_lock); 1182 1183 takeit: 1184 /* 1185 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1186 * are still waiters or clears it. 1187 */ 1188 rt_mutex_set_owner(lock, task); 1189 1190 return 1; 1191 } 1192 1193 /* 1194 * Task blocks on lock. 1195 * 1196 * Prepare waiter and propagate pi chain 1197 * 1198 * This must be called with lock->wait_lock held and interrupts disabled 1199 */ 1200 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1201 struct rt_mutex_waiter *waiter, 1202 struct task_struct *task, 1203 struct ww_acquire_ctx *ww_ctx, 1204 enum rtmutex_chainwalk chwalk) 1205 { 1206 struct task_struct *owner = rt_mutex_owner(lock); 1207 struct rt_mutex_waiter *top_waiter = waiter; 1208 struct rt_mutex_base *next_lock; 1209 int chain_walk = 0, res; 1210 1211 lockdep_assert_held(&lock->wait_lock); 1212 1213 /* 1214 * Early deadlock detection. We really don't want the task to 1215 * enqueue on itself just to untangle the mess later. It's not 1216 * only an optimization. We drop the locks, so another waiter 1217 * can come in before the chain walk detects the deadlock. So 1218 * the other will detect the deadlock and return -EDEADLOCK, 1219 * which is wrong, as the other waiter is not in a deadlock 1220 * situation. 1221 * 1222 * Except for ww_mutex, in that case the chain walk must already deal 1223 * with spurious cycles, see the comments at [3] and [6]. 1224 */ 1225 if (owner == task && !(build_ww_mutex() && ww_ctx)) 1226 return -EDEADLK; 1227 1228 raw_spin_lock(&task->pi_lock); 1229 waiter->task = task; 1230 waiter->lock = lock; 1231 waiter_update_prio(waiter, task); 1232 waiter_clone_prio(waiter, task); 1233 1234 /* Get the top priority waiter on the lock */ 1235 if (rt_mutex_has_waiters(lock)) 1236 top_waiter = rt_mutex_top_waiter(lock); 1237 rt_mutex_enqueue(lock, waiter); 1238 1239 task->pi_blocked_on = waiter; 1240 1241 raw_spin_unlock(&task->pi_lock); 1242 1243 if (build_ww_mutex() && ww_ctx) { 1244 struct rt_mutex *rtm; 1245 1246 /* Check whether the waiter should back out immediately */ 1247 rtm = container_of(lock, struct rt_mutex, rtmutex); 1248 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1249 if (res) { 1250 raw_spin_lock(&task->pi_lock); 1251 rt_mutex_dequeue(lock, waiter); 1252 task->pi_blocked_on = NULL; 1253 raw_spin_unlock(&task->pi_lock); 1254 return res; 1255 } 1256 } 1257 1258 if (!owner) 1259 return 0; 1260 1261 raw_spin_lock(&owner->pi_lock); 1262 if (waiter == rt_mutex_top_waiter(lock)) { 1263 rt_mutex_dequeue_pi(owner, top_waiter); 1264 rt_mutex_enqueue_pi(owner, waiter); 1265 1266 rt_mutex_adjust_prio(lock, owner); 1267 if (owner->pi_blocked_on) 1268 chain_walk = 1; 1269 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1270 chain_walk = 1; 1271 } 1272 1273 /* Store the lock on which owner is blocked or NULL */ 1274 next_lock = task_blocked_on_lock(owner); 1275 1276 raw_spin_unlock(&owner->pi_lock); 1277 /* 1278 * Even if full deadlock detection is on, if the owner is not 1279 * blocked itself, we can avoid finding this out in the chain 1280 * walk. 1281 */ 1282 if (!chain_walk || !next_lock) 1283 return 0; 1284 1285 /* 1286 * The owner can't disappear while holding a lock, 1287 * so the owner struct is protected by wait_lock. 1288 * Gets dropped in rt_mutex_adjust_prio_chain()! 1289 */ 1290 get_task_struct(owner); 1291 1292 raw_spin_unlock_irq(&lock->wait_lock); 1293 1294 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1295 next_lock, waiter, task); 1296 1297 raw_spin_lock_irq(&lock->wait_lock); 1298 1299 return res; 1300 } 1301 1302 /* 1303 * Remove the top waiter from the current tasks pi waiter tree and 1304 * queue it up. 1305 * 1306 * Called with lock->wait_lock held and interrupts disabled. 1307 */ 1308 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1309 struct rt_mutex_base *lock) 1310 { 1311 struct rt_mutex_waiter *waiter; 1312 1313 lockdep_assert_held(&lock->wait_lock); 1314 1315 raw_spin_lock(¤t->pi_lock); 1316 1317 waiter = rt_mutex_top_waiter(lock); 1318 1319 /* 1320 * Remove it from current->pi_waiters and deboost. 1321 * 1322 * We must in fact deboost here in order to ensure we call 1323 * rt_mutex_setprio() to update p->pi_top_task before the 1324 * task unblocks. 1325 */ 1326 rt_mutex_dequeue_pi(current, waiter); 1327 rt_mutex_adjust_prio(lock, current); 1328 1329 /* 1330 * As we are waking up the top waiter, and the waiter stays 1331 * queued on the lock until it gets the lock, this lock 1332 * obviously has waiters. Just set the bit here and this has 1333 * the added benefit of forcing all new tasks into the 1334 * slow path making sure no task of lower priority than 1335 * the top waiter can steal this lock. 1336 */ 1337 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1338 1339 /* 1340 * We deboosted before waking the top waiter task such that we don't 1341 * run two tasks with the 'same' priority (and ensure the 1342 * p->pi_top_task pointer points to a blocked task). This however can 1343 * lead to priority inversion if we would get preempted after the 1344 * deboost but before waking our donor task, hence the preempt_disable() 1345 * before unlock. 1346 * 1347 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1348 */ 1349 preempt_disable(); 1350 rt_mutex_wake_q_add(wqh, waiter); 1351 raw_spin_unlock(¤t->pi_lock); 1352 } 1353 1354 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1355 { 1356 int ret = try_to_take_rt_mutex(lock, current, NULL); 1357 1358 /* 1359 * try_to_take_rt_mutex() sets the lock waiters bit 1360 * unconditionally. Clean this up. 1361 */ 1362 fixup_rt_mutex_waiters(lock, true); 1363 1364 return ret; 1365 } 1366 1367 /* 1368 * Slow path try-lock function: 1369 */ 1370 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1371 { 1372 unsigned long flags; 1373 int ret; 1374 1375 /* 1376 * If the lock already has an owner we fail to get the lock. 1377 * This can be done without taking the @lock->wait_lock as 1378 * it is only being read, and this is a trylock anyway. 1379 */ 1380 if (rt_mutex_owner(lock)) 1381 return 0; 1382 1383 /* 1384 * The mutex has currently no owner. Lock the wait lock and try to 1385 * acquire the lock. We use irqsave here to support early boot calls. 1386 */ 1387 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1388 1389 ret = __rt_mutex_slowtrylock(lock); 1390 1391 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1392 1393 return ret; 1394 } 1395 1396 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1397 { 1398 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1399 return 1; 1400 1401 return rt_mutex_slowtrylock(lock); 1402 } 1403 1404 /* 1405 * Slow path to release a rt-mutex. 1406 */ 1407 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1408 { 1409 DEFINE_RT_WAKE_Q(wqh); 1410 unsigned long flags; 1411 1412 /* irqsave required to support early boot calls */ 1413 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1414 1415 debug_rt_mutex_unlock(lock); 1416 1417 /* 1418 * We must be careful here if the fast path is enabled. If we 1419 * have no waiters queued we cannot set owner to NULL here 1420 * because of: 1421 * 1422 * foo->lock->owner = NULL; 1423 * rtmutex_lock(foo->lock); <- fast path 1424 * free = atomic_dec_and_test(foo->refcnt); 1425 * rtmutex_unlock(foo->lock); <- fast path 1426 * if (free) 1427 * kfree(foo); 1428 * raw_spin_unlock(foo->lock->wait_lock); 1429 * 1430 * So for the fastpath enabled kernel: 1431 * 1432 * Nothing can set the waiters bit as long as we hold 1433 * lock->wait_lock. So we do the following sequence: 1434 * 1435 * owner = rt_mutex_owner(lock); 1436 * clear_rt_mutex_waiters(lock); 1437 * raw_spin_unlock(&lock->wait_lock); 1438 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1439 * return; 1440 * goto retry; 1441 * 1442 * The fastpath disabled variant is simple as all access to 1443 * lock->owner is serialized by lock->wait_lock: 1444 * 1445 * lock->owner = NULL; 1446 * raw_spin_unlock(&lock->wait_lock); 1447 */ 1448 while (!rt_mutex_has_waiters(lock)) { 1449 /* Drops lock->wait_lock ! */ 1450 if (unlock_rt_mutex_safe(lock, flags) == true) 1451 return; 1452 /* Relock the rtmutex and try again */ 1453 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1454 } 1455 1456 /* 1457 * The wakeup next waiter path does not suffer from the above 1458 * race. See the comments there. 1459 * 1460 * Queue the next waiter for wakeup once we release the wait_lock. 1461 */ 1462 mark_wakeup_next_waiter(&wqh, lock); 1463 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1464 1465 rt_mutex_wake_up_q(&wqh); 1466 } 1467 1468 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1469 { 1470 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1471 return; 1472 1473 rt_mutex_slowunlock(lock); 1474 } 1475 1476 #ifdef CONFIG_SMP 1477 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1478 struct rt_mutex_waiter *waiter, 1479 struct task_struct *owner) 1480 { 1481 bool res = true; 1482 1483 rcu_read_lock(); 1484 for (;;) { 1485 /* If owner changed, trylock again. */ 1486 if (owner != rt_mutex_owner(lock)) 1487 break; 1488 /* 1489 * Ensure that @owner is dereferenced after checking that 1490 * the lock owner still matches @owner. If that fails, 1491 * @owner might point to freed memory. If it still matches, 1492 * the rcu_read_lock() ensures the memory stays valid. 1493 */ 1494 barrier(); 1495 /* 1496 * Stop spinning when: 1497 * - the lock owner has been scheduled out 1498 * - current is not longer the top waiter 1499 * - current is requested to reschedule (redundant 1500 * for CONFIG_PREEMPT_RCU=y) 1501 * - the VCPU on which owner runs is preempted 1502 */ 1503 if (!owner_on_cpu(owner) || need_resched() || 1504 !rt_mutex_waiter_is_top_waiter(lock, waiter)) { 1505 res = false; 1506 break; 1507 } 1508 cpu_relax(); 1509 } 1510 rcu_read_unlock(); 1511 return res; 1512 } 1513 #else 1514 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1515 struct rt_mutex_waiter *waiter, 1516 struct task_struct *owner) 1517 { 1518 return false; 1519 } 1520 #endif 1521 1522 #ifdef RT_MUTEX_BUILD_MUTEX 1523 /* 1524 * Functions required for: 1525 * - rtmutex, futex on all kernels 1526 * - mutex and rwsem substitutions on RT kernels 1527 */ 1528 1529 /* 1530 * Remove a waiter from a lock and give up 1531 * 1532 * Must be called with lock->wait_lock held and interrupts disabled. It must 1533 * have just failed to try_to_take_rt_mutex(). 1534 */ 1535 static void __sched remove_waiter(struct rt_mutex_base *lock, 1536 struct rt_mutex_waiter *waiter) 1537 { 1538 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1539 struct task_struct *owner = rt_mutex_owner(lock); 1540 struct rt_mutex_base *next_lock; 1541 1542 lockdep_assert_held(&lock->wait_lock); 1543 1544 raw_spin_lock(¤t->pi_lock); 1545 rt_mutex_dequeue(lock, waiter); 1546 current->pi_blocked_on = NULL; 1547 raw_spin_unlock(¤t->pi_lock); 1548 1549 /* 1550 * Only update priority if the waiter was the highest priority 1551 * waiter of the lock and there is an owner to update. 1552 */ 1553 if (!owner || !is_top_waiter) 1554 return; 1555 1556 raw_spin_lock(&owner->pi_lock); 1557 1558 rt_mutex_dequeue_pi(owner, waiter); 1559 1560 if (rt_mutex_has_waiters(lock)) 1561 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1562 1563 rt_mutex_adjust_prio(lock, owner); 1564 1565 /* Store the lock on which owner is blocked or NULL */ 1566 next_lock = task_blocked_on_lock(owner); 1567 1568 raw_spin_unlock(&owner->pi_lock); 1569 1570 /* 1571 * Don't walk the chain, if the owner task is not blocked 1572 * itself. 1573 */ 1574 if (!next_lock) 1575 return; 1576 1577 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1578 get_task_struct(owner); 1579 1580 raw_spin_unlock_irq(&lock->wait_lock); 1581 1582 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1583 next_lock, NULL, current); 1584 1585 raw_spin_lock_irq(&lock->wait_lock); 1586 } 1587 1588 /** 1589 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1590 * @lock: the rt_mutex to take 1591 * @ww_ctx: WW mutex context pointer 1592 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1593 * or TASK_UNINTERRUPTIBLE) 1594 * @timeout: the pre-initialized and started timer, or NULL for none 1595 * @waiter: the pre-initialized rt_mutex_waiter 1596 * 1597 * Must be called with lock->wait_lock held and interrupts disabled 1598 */ 1599 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1600 struct ww_acquire_ctx *ww_ctx, 1601 unsigned int state, 1602 struct hrtimer_sleeper *timeout, 1603 struct rt_mutex_waiter *waiter) 1604 { 1605 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1606 struct task_struct *owner; 1607 int ret = 0; 1608 1609 for (;;) { 1610 /* Try to acquire the lock: */ 1611 if (try_to_take_rt_mutex(lock, current, waiter)) 1612 break; 1613 1614 if (timeout && !timeout->task) { 1615 ret = -ETIMEDOUT; 1616 break; 1617 } 1618 if (signal_pending_state(state, current)) { 1619 ret = -EINTR; 1620 break; 1621 } 1622 1623 if (build_ww_mutex() && ww_ctx) { 1624 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1625 if (ret) 1626 break; 1627 } 1628 1629 if (waiter == rt_mutex_top_waiter(lock)) 1630 owner = rt_mutex_owner(lock); 1631 else 1632 owner = NULL; 1633 raw_spin_unlock_irq(&lock->wait_lock); 1634 1635 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1636 rt_mutex_schedule(); 1637 1638 raw_spin_lock_irq(&lock->wait_lock); 1639 set_current_state(state); 1640 } 1641 1642 __set_current_state(TASK_RUNNING); 1643 return ret; 1644 } 1645 1646 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1647 struct rt_mutex_base *lock, 1648 struct rt_mutex_waiter *w) 1649 { 1650 /* 1651 * If the result is not -EDEADLOCK or the caller requested 1652 * deadlock detection, nothing to do here. 1653 */ 1654 if (res != -EDEADLOCK || detect_deadlock) 1655 return; 1656 1657 if (build_ww_mutex() && w->ww_ctx) 1658 return; 1659 1660 raw_spin_unlock_irq(&lock->wait_lock); 1661 1662 WARN(1, "rtmutex deadlock detected\n"); 1663 1664 while (1) { 1665 set_current_state(TASK_INTERRUPTIBLE); 1666 rt_mutex_schedule(); 1667 } 1668 } 1669 1670 /** 1671 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1672 * @lock: The rtmutex to block lock 1673 * @ww_ctx: WW mutex context pointer 1674 * @state: The task state for sleeping 1675 * @chwalk: Indicator whether full or partial chainwalk is requested 1676 * @waiter: Initializer waiter for blocking 1677 */ 1678 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1679 struct ww_acquire_ctx *ww_ctx, 1680 unsigned int state, 1681 enum rtmutex_chainwalk chwalk, 1682 struct rt_mutex_waiter *waiter) 1683 { 1684 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1685 struct ww_mutex *ww = ww_container_of(rtm); 1686 int ret; 1687 1688 lockdep_assert_held(&lock->wait_lock); 1689 1690 /* Try to acquire the lock again: */ 1691 if (try_to_take_rt_mutex(lock, current, NULL)) { 1692 if (build_ww_mutex() && ww_ctx) { 1693 __ww_mutex_check_waiters(rtm, ww_ctx); 1694 ww_mutex_lock_acquired(ww, ww_ctx); 1695 } 1696 return 0; 1697 } 1698 1699 set_current_state(state); 1700 1701 trace_contention_begin(lock, LCB_F_RT); 1702 1703 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1704 if (likely(!ret)) 1705 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1706 1707 if (likely(!ret)) { 1708 /* acquired the lock */ 1709 if (build_ww_mutex() && ww_ctx) { 1710 if (!ww_ctx->is_wait_die) 1711 __ww_mutex_check_waiters(rtm, ww_ctx); 1712 ww_mutex_lock_acquired(ww, ww_ctx); 1713 } 1714 } else { 1715 __set_current_state(TASK_RUNNING); 1716 remove_waiter(lock, waiter); 1717 rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); 1718 } 1719 1720 /* 1721 * try_to_take_rt_mutex() sets the waiter bit 1722 * unconditionally. We might have to fix that up. 1723 */ 1724 fixup_rt_mutex_waiters(lock, true); 1725 1726 trace_contention_end(lock, ret); 1727 1728 return ret; 1729 } 1730 1731 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1732 struct ww_acquire_ctx *ww_ctx, 1733 unsigned int state) 1734 { 1735 struct rt_mutex_waiter waiter; 1736 int ret; 1737 1738 rt_mutex_init_waiter(&waiter); 1739 waiter.ww_ctx = ww_ctx; 1740 1741 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1742 &waiter); 1743 1744 debug_rt_mutex_free_waiter(&waiter); 1745 return ret; 1746 } 1747 1748 /* 1749 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1750 * @lock: The rtmutex to block lock 1751 * @ww_ctx: WW mutex context pointer 1752 * @state: The task state for sleeping 1753 */ 1754 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1755 struct ww_acquire_ctx *ww_ctx, 1756 unsigned int state) 1757 { 1758 unsigned long flags; 1759 int ret; 1760 1761 /* 1762 * Do all pre-schedule work here, before we queue a waiter and invoke 1763 * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would 1764 * otherwise recurse back into task_blocks_on_rt_mutex() through 1765 * rtlock_slowlock() and will then enqueue a second waiter for this 1766 * same task and things get really confusing real fast. 1767 */ 1768 rt_mutex_pre_schedule(); 1769 1770 /* 1771 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1772 * be called in early boot if the cmpxchg() fast path is disabled 1773 * (debug, no architecture support). In this case we will acquire the 1774 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1775 * enable interrupts in that early boot case. So we need to use the 1776 * irqsave/restore variants. 1777 */ 1778 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1779 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1780 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1781 rt_mutex_post_schedule(); 1782 1783 return ret; 1784 } 1785 1786 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1787 unsigned int state) 1788 { 1789 lockdep_assert(!current->pi_blocked_on); 1790 1791 if (likely(rt_mutex_try_acquire(lock))) 1792 return 0; 1793 1794 return rt_mutex_slowlock(lock, NULL, state); 1795 } 1796 #endif /* RT_MUTEX_BUILD_MUTEX */ 1797 1798 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1799 /* 1800 * Functions required for spin/rw_lock substitution on RT kernels 1801 */ 1802 1803 /** 1804 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1805 * @lock: The underlying RT mutex 1806 */ 1807 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1808 { 1809 struct rt_mutex_waiter waiter; 1810 struct task_struct *owner; 1811 1812 lockdep_assert_held(&lock->wait_lock); 1813 1814 if (try_to_take_rt_mutex(lock, current, NULL)) 1815 return; 1816 1817 rt_mutex_init_rtlock_waiter(&waiter); 1818 1819 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1820 current_save_and_set_rtlock_wait_state(); 1821 1822 trace_contention_begin(lock, LCB_F_RT); 1823 1824 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1825 1826 for (;;) { 1827 /* Try to acquire the lock again */ 1828 if (try_to_take_rt_mutex(lock, current, &waiter)) 1829 break; 1830 1831 if (&waiter == rt_mutex_top_waiter(lock)) 1832 owner = rt_mutex_owner(lock); 1833 else 1834 owner = NULL; 1835 raw_spin_unlock_irq(&lock->wait_lock); 1836 1837 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1838 schedule_rtlock(); 1839 1840 raw_spin_lock_irq(&lock->wait_lock); 1841 set_current_state(TASK_RTLOCK_WAIT); 1842 } 1843 1844 /* Restore the task state */ 1845 current_restore_rtlock_saved_state(); 1846 1847 /* 1848 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1849 * We might have to fix that up: 1850 */ 1851 fixup_rt_mutex_waiters(lock, true); 1852 debug_rt_mutex_free_waiter(&waiter); 1853 1854 trace_contention_end(lock, 0); 1855 } 1856 1857 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1858 { 1859 unsigned long flags; 1860 1861 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1862 rtlock_slowlock_locked(lock); 1863 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1864 } 1865 1866 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1867