1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include <trace/events/lock.h> 28 29 #include "rtmutex_common.h" 30 31 #ifndef WW_RT 32 # define build_ww_mutex() (false) 33 # define ww_container_of(rtm) NULL 34 35 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 36 struct rt_mutex *lock, 37 struct ww_acquire_ctx *ww_ctx) 38 { 39 return 0; 40 } 41 42 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 43 struct ww_acquire_ctx *ww_ctx) 44 { 45 } 46 47 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 48 struct ww_acquire_ctx *ww_ctx) 49 { 50 } 51 52 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 53 struct rt_mutex_waiter *waiter, 54 struct ww_acquire_ctx *ww_ctx) 55 { 56 return 0; 57 } 58 59 #else 60 # define build_ww_mutex() (true) 61 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 62 # include "ww_mutex.h" 63 #endif 64 65 /* 66 * lock->owner state tracking: 67 * 68 * lock->owner holds the task_struct pointer of the owner. Bit 0 69 * is used to keep track of the "lock has waiters" state. 70 * 71 * owner bit0 72 * NULL 0 lock is free (fast acquire possible) 73 * NULL 1 lock is free and has waiters and the top waiter 74 * is going to take the lock* 75 * taskpointer 0 lock is held (fast release possible) 76 * taskpointer 1 lock is held and has waiters** 77 * 78 * The fast atomic compare exchange based acquire and release is only 79 * possible when bit 0 of lock->owner is 0. 80 * 81 * (*) It also can be a transitional state when grabbing the lock 82 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 83 * we need to set the bit0 before looking at the lock, and the owner may be 84 * NULL in this small time, hence this can be a transitional state. 85 * 86 * (**) There is a small time when bit 0 is set but there are no 87 * waiters. This can happen when grabbing the lock in the slow path. 88 * To prevent a cmpxchg of the owner releasing the lock, we need to 89 * set this bit before looking at the lock. 90 */ 91 92 static __always_inline struct task_struct * 93 rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) 94 { 95 unsigned long val = (unsigned long)owner; 96 97 if (rt_mutex_has_waiters(lock)) 98 val |= RT_MUTEX_HAS_WAITERS; 99 100 return (struct task_struct *)val; 101 } 102 103 static __always_inline void 104 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 105 { 106 /* 107 * lock->wait_lock is held but explicit acquire semantics are needed 108 * for a new lock owner so WRITE_ONCE is insufficient. 109 */ 110 xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); 111 } 112 113 static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) 114 { 115 /* lock->wait_lock is held so the unlock provides release semantics. */ 116 WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); 117 } 118 119 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 120 { 121 lock->owner = (struct task_struct *) 122 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 123 } 124 125 static __always_inline void 126 fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) 127 { 128 unsigned long owner, *p = (unsigned long *) &lock->owner; 129 130 if (rt_mutex_has_waiters(lock)) 131 return; 132 133 /* 134 * The rbtree has no waiters enqueued, now make sure that the 135 * lock->owner still has the waiters bit set, otherwise the 136 * following can happen: 137 * 138 * CPU 0 CPU 1 CPU2 139 * l->owner=T1 140 * rt_mutex_lock(l) 141 * lock(l->lock) 142 * l->owner = T1 | HAS_WAITERS; 143 * enqueue(T2) 144 * boost() 145 * unlock(l->lock) 146 * block() 147 * 148 * rt_mutex_lock(l) 149 * lock(l->lock) 150 * l->owner = T1 | HAS_WAITERS; 151 * enqueue(T3) 152 * boost() 153 * unlock(l->lock) 154 * block() 155 * signal(->T2) signal(->T3) 156 * lock(l->lock) 157 * dequeue(T2) 158 * deboost() 159 * unlock(l->lock) 160 * lock(l->lock) 161 * dequeue(T3) 162 * ==> wait list is empty 163 * deboost() 164 * unlock(l->lock) 165 * lock(l->lock) 166 * fixup_rt_mutex_waiters() 167 * if (wait_list_empty(l) { 168 * l->owner = owner 169 * owner = l->owner & ~HAS_WAITERS; 170 * ==> l->owner = T1 171 * } 172 * lock(l->lock) 173 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 174 * if (wait_list_empty(l) { 175 * owner = l->owner & ~HAS_WAITERS; 176 * cmpxchg(l->owner, T1, NULL) 177 * ===> Success (l->owner = NULL) 178 * 179 * l->owner = owner 180 * ==> l->owner = T1 181 * } 182 * 183 * With the check for the waiter bit in place T3 on CPU2 will not 184 * overwrite. All tasks fiddling with the waiters bit are 185 * serialized by l->lock, so nothing else can modify the waiters 186 * bit. If the bit is set then nothing can change l->owner either 187 * so the simple RMW is safe. The cmpxchg() will simply fail if it 188 * happens in the middle of the RMW because the waiters bit is 189 * still set. 190 */ 191 owner = READ_ONCE(*p); 192 if (owner & RT_MUTEX_HAS_WAITERS) { 193 /* 194 * See rt_mutex_set_owner() and rt_mutex_clear_owner() on 195 * why xchg_acquire() is used for updating owner for 196 * locking and WRITE_ONCE() for unlocking. 197 * 198 * WRITE_ONCE() would work for the acquire case too, but 199 * in case that the lock acquisition failed it might 200 * force other lockers into the slow path unnecessarily. 201 */ 202 if (acquire_lock) 203 xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS); 204 else 205 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 206 } 207 } 208 209 /* 210 * We can speed up the acquire/release, if there's no debugging state to be 211 * set up. 212 */ 213 #ifndef CONFIG_DEBUG_RT_MUTEXES 214 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 215 struct task_struct *old, 216 struct task_struct *new) 217 { 218 return try_cmpxchg_acquire(&lock->owner, &old, new); 219 } 220 221 static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) 222 { 223 return rt_mutex_cmpxchg_acquire(lock, NULL, current); 224 } 225 226 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 227 struct task_struct *old, 228 struct task_struct *new) 229 { 230 return try_cmpxchg_release(&lock->owner, &old, new); 231 } 232 233 /* 234 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 235 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 236 * relaxed semantics suffice. 237 */ 238 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 239 { 240 unsigned long owner, *p = (unsigned long *) &lock->owner; 241 242 do { 243 owner = *p; 244 } while (cmpxchg_relaxed(p, owner, 245 owner | RT_MUTEX_HAS_WAITERS) != owner); 246 247 /* 248 * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE 249 * operations in the event of contention. Ensure the successful 250 * cmpxchg is visible. 251 */ 252 smp_mb__after_atomic(); 253 } 254 255 /* 256 * Safe fastpath aware unlock: 257 * 1) Clear the waiters bit 258 * 2) Drop lock->wait_lock 259 * 3) Try to unlock the lock with cmpxchg 260 */ 261 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 262 unsigned long flags) 263 __releases(lock->wait_lock) 264 { 265 struct task_struct *owner = rt_mutex_owner(lock); 266 267 clear_rt_mutex_waiters(lock); 268 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 269 /* 270 * If a new waiter comes in between the unlock and the cmpxchg 271 * we have two situations: 272 * 273 * unlock(wait_lock); 274 * lock(wait_lock); 275 * cmpxchg(p, owner, 0) == owner 276 * mark_rt_mutex_waiters(lock); 277 * acquire(lock); 278 * or: 279 * 280 * unlock(wait_lock); 281 * lock(wait_lock); 282 * mark_rt_mutex_waiters(lock); 283 * 284 * cmpxchg(p, owner, 0) != owner 285 * enqueue_waiter(); 286 * unlock(wait_lock); 287 * lock(wait_lock); 288 * wake waiter(); 289 * unlock(wait_lock); 290 * lock(wait_lock); 291 * acquire(lock); 292 */ 293 return rt_mutex_cmpxchg_release(lock, owner, NULL); 294 } 295 296 #else 297 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 298 struct task_struct *old, 299 struct task_struct *new) 300 { 301 return false; 302 303 } 304 305 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); 306 307 static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) 308 { 309 /* 310 * With debug enabled rt_mutex_cmpxchg trylock() will always fail. 311 * 312 * Avoid unconditionally taking the slow path by using 313 * rt_mutex_slow_trylock() which is covered by the debug code and can 314 * acquire a non-contended rtmutex. 315 */ 316 return rt_mutex_slowtrylock(lock); 317 } 318 319 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 320 struct task_struct *old, 321 struct task_struct *new) 322 { 323 return false; 324 } 325 326 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 327 { 328 lock->owner = (struct task_struct *) 329 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 330 } 331 332 /* 333 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 334 */ 335 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 336 unsigned long flags) 337 __releases(lock->wait_lock) 338 { 339 lock->owner = NULL; 340 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 341 return true; 342 } 343 #endif 344 345 static __always_inline int __waiter_prio(struct task_struct *task) 346 { 347 int prio = task->prio; 348 349 if (!rt_prio(prio)) 350 return DEFAULT_PRIO; 351 352 return prio; 353 } 354 355 /* 356 * Update the waiter->tree copy of the sort keys. 357 */ 358 static __always_inline void 359 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 360 { 361 lockdep_assert_held(&waiter->lock->wait_lock); 362 lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry)); 363 364 waiter->tree.prio = __waiter_prio(task); 365 waiter->tree.deadline = task->dl.deadline; 366 } 367 368 /* 369 * Update the waiter->pi_tree copy of the sort keys (from the tree copy). 370 */ 371 static __always_inline void 372 waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 373 { 374 lockdep_assert_held(&waiter->lock->wait_lock); 375 lockdep_assert_held(&task->pi_lock); 376 lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry)); 377 378 waiter->pi_tree.prio = waiter->tree.prio; 379 waiter->pi_tree.deadline = waiter->tree.deadline; 380 } 381 382 /* 383 * Only use with rt_waiter_node_{less,equal}() 384 */ 385 #define task_to_waiter_node(p) \ 386 &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 387 #define task_to_waiter(p) \ 388 &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) } 389 390 static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left, 391 struct rt_waiter_node *right) 392 { 393 if (left->prio < right->prio) 394 return 1; 395 396 /* 397 * If both waiters have dl_prio(), we check the deadlines of the 398 * associated tasks. 399 * If left waiter has a dl_prio(), and we didn't return 1 above, 400 * then right waiter has a dl_prio() too. 401 */ 402 if (dl_prio(left->prio)) 403 return dl_time_before(left->deadline, right->deadline); 404 405 return 0; 406 } 407 408 static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left, 409 struct rt_waiter_node *right) 410 { 411 if (left->prio != right->prio) 412 return 0; 413 414 /* 415 * If both waiters have dl_prio(), we check the deadlines of the 416 * associated tasks. 417 * If left waiter has a dl_prio(), and we didn't return 0 above, 418 * then right waiter has a dl_prio() too. 419 */ 420 if (dl_prio(left->prio)) 421 return left->deadline == right->deadline; 422 423 return 1; 424 } 425 426 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 427 struct rt_mutex_waiter *top_waiter) 428 { 429 if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree)) 430 return true; 431 432 #ifdef RT_MUTEX_BUILD_SPINLOCKS 433 /* 434 * Note that RT tasks are excluded from same priority (lateral) 435 * steals to prevent the introduction of an unbounded latency. 436 */ 437 if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) 438 return false; 439 440 return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); 441 #else 442 return false; 443 #endif 444 } 445 446 #define __node_2_waiter(node) \ 447 rb_entry((node), struct rt_mutex_waiter, tree.entry) 448 449 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 450 { 451 struct rt_mutex_waiter *aw = __node_2_waiter(a); 452 struct rt_mutex_waiter *bw = __node_2_waiter(b); 453 454 if (rt_waiter_node_less(&aw->tree, &bw->tree)) 455 return 1; 456 457 if (!build_ww_mutex()) 458 return 0; 459 460 if (rt_waiter_node_less(&bw->tree, &aw->tree)) 461 return 0; 462 463 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 464 if (aw->ww_ctx) { 465 if (!bw->ww_ctx) 466 return 1; 467 468 return (signed long)(aw->ww_ctx->stamp - 469 bw->ww_ctx->stamp) < 0; 470 } 471 472 return 0; 473 } 474 475 static __always_inline void 476 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 477 { 478 lockdep_assert_held(&lock->wait_lock); 479 480 rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less); 481 } 482 483 static __always_inline void 484 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 485 { 486 lockdep_assert_held(&lock->wait_lock); 487 488 if (RB_EMPTY_NODE(&waiter->tree.entry)) 489 return; 490 491 rb_erase_cached(&waiter->tree.entry, &lock->waiters); 492 RB_CLEAR_NODE(&waiter->tree.entry); 493 } 494 495 #define __node_2_rt_node(node) \ 496 rb_entry((node), struct rt_waiter_node, entry) 497 498 static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 499 { 500 return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b)); 501 } 502 503 static __always_inline void 504 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 505 { 506 lockdep_assert_held(&task->pi_lock); 507 508 rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less); 509 } 510 511 static __always_inline void 512 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 513 { 514 lockdep_assert_held(&task->pi_lock); 515 516 if (RB_EMPTY_NODE(&waiter->pi_tree.entry)) 517 return; 518 519 rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters); 520 RB_CLEAR_NODE(&waiter->pi_tree.entry); 521 } 522 523 static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock, 524 struct task_struct *p) 525 { 526 struct task_struct *pi_task = NULL; 527 528 lockdep_assert_held(&lock->wait_lock); 529 lockdep_assert(rt_mutex_owner(lock) == p); 530 lockdep_assert_held(&p->pi_lock); 531 532 if (task_has_pi_waiters(p)) 533 pi_task = task_top_pi_waiter(p)->task; 534 535 rt_mutex_setprio(p, pi_task); 536 } 537 538 /* RT mutex specific wake_q wrappers */ 539 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, 540 struct task_struct *task, 541 unsigned int wake_state) 542 { 543 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { 544 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 545 WARN_ON_ONCE(wqh->rtlock_task); 546 get_task_struct(task); 547 wqh->rtlock_task = task; 548 } else { 549 wake_q_add(&wqh->head, task); 550 } 551 } 552 553 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 554 struct rt_mutex_waiter *w) 555 { 556 rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); 557 } 558 559 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 560 { 561 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 562 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 563 put_task_struct(wqh->rtlock_task); 564 wqh->rtlock_task = NULL; 565 } 566 567 if (!wake_q_empty(&wqh->head)) 568 wake_up_q(&wqh->head); 569 570 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 571 preempt_enable(); 572 } 573 574 /* 575 * Deadlock detection is conditional: 576 * 577 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 578 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 579 * 580 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 581 * conducted independent of the detect argument. 582 * 583 * If the waiter argument is NULL this indicates the deboost path and 584 * deadlock detection is disabled independent of the detect argument 585 * and the config settings. 586 */ 587 static __always_inline bool 588 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 589 enum rtmutex_chainwalk chwalk) 590 { 591 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 592 return waiter != NULL; 593 return chwalk == RT_MUTEX_FULL_CHAINWALK; 594 } 595 596 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 597 { 598 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 599 } 600 601 /* 602 * Adjust the priority chain. Also used for deadlock detection. 603 * Decreases task's usage by one - may thus free the task. 604 * 605 * @task: the task owning the mutex (owner) for which a chain walk is 606 * probably needed 607 * @chwalk: do we have to carry out deadlock detection? 608 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 609 * things for a task that has just got its priority adjusted, and 610 * is waiting on a mutex) 611 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 612 * we dropped its pi_lock. Is never dereferenced, only used for 613 * comparison to detect lock chain changes. 614 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 615 * its priority to the mutex owner (can be NULL in the case 616 * depicted above or if the top waiter is gone away and we are 617 * actually deboosting the owner) 618 * @top_task: the current top waiter 619 * 620 * Returns 0 or -EDEADLK. 621 * 622 * Chain walk basics and protection scope 623 * 624 * [R] refcount on task 625 * [Pn] task->pi_lock held 626 * [L] rtmutex->wait_lock held 627 * 628 * Normal locking order: 629 * 630 * rtmutex->wait_lock 631 * task->pi_lock 632 * 633 * Step Description Protected by 634 * function arguments: 635 * @task [R] 636 * @orig_lock if != NULL @top_task is blocked on it 637 * @next_lock Unprotected. Cannot be 638 * dereferenced. Only used for 639 * comparison. 640 * @orig_waiter if != NULL @top_task is blocked on it 641 * @top_task current, or in case of proxy 642 * locking protected by calling 643 * code 644 * again: 645 * loop_sanity_check(); 646 * retry: 647 * [1] lock(task->pi_lock); [R] acquire [P1] 648 * [2] waiter = task->pi_blocked_on; [P1] 649 * [3] check_exit_conditions_1(); [P1] 650 * [4] lock = waiter->lock; [P1] 651 * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L] 652 * unlock(task->pi_lock); release [P1] 653 * goto retry; 654 * } 655 * [6] check_exit_conditions_2(); [P1] + [L] 656 * [7] requeue_lock_waiter(lock, waiter); [P1] + [L] 657 * [8] unlock(task->pi_lock); release [P1] 658 * put_task_struct(task); release [R] 659 * [9] check_exit_conditions_3(); [L] 660 * [10] task = owner(lock); [L] 661 * get_task_struct(task); [L] acquire [R] 662 * lock(task->pi_lock); [L] acquire [P2] 663 * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L] 664 * [12] check_exit_conditions_4(); [P2] + [L] 665 * [13] unlock(task->pi_lock); release [P2] 666 * unlock(lock->wait_lock); release [L] 667 * goto again; 668 * 669 * Where P1 is the blocking task and P2 is the lock owner; going up one step 670 * the owner becomes the next blocked task etc.. 671 * 672 * 673 */ 674 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 675 enum rtmutex_chainwalk chwalk, 676 struct rt_mutex_base *orig_lock, 677 struct rt_mutex_base *next_lock, 678 struct rt_mutex_waiter *orig_waiter, 679 struct task_struct *top_task) 680 { 681 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 682 struct rt_mutex_waiter *prerequeue_top_waiter; 683 int ret = 0, depth = 0; 684 struct rt_mutex_base *lock; 685 bool detect_deadlock; 686 bool requeue = true; 687 688 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 689 690 /* 691 * The (de)boosting is a step by step approach with a lot of 692 * pitfalls. We want this to be preemptible and we want hold a 693 * maximum of two locks per step. So we have to check 694 * carefully whether things change under us. 695 */ 696 again: 697 /* 698 * We limit the lock chain length for each invocation. 699 */ 700 if (++depth > max_lock_depth) { 701 static int prev_max; 702 703 /* 704 * Print this only once. If the admin changes the limit, 705 * print a new message when reaching the limit again. 706 */ 707 if (prev_max != max_lock_depth) { 708 prev_max = max_lock_depth; 709 printk(KERN_WARNING "Maximum lock depth %d reached " 710 "task: %s (%d)\n", max_lock_depth, 711 top_task->comm, task_pid_nr(top_task)); 712 } 713 put_task_struct(task); 714 715 return -EDEADLK; 716 } 717 718 /* 719 * We are fully preemptible here and only hold the refcount on 720 * @task. So everything can have changed under us since the 721 * caller or our own code below (goto retry/again) dropped all 722 * locks. 723 */ 724 retry: 725 /* 726 * [1] Task cannot go away as we did a get_task() before ! 727 */ 728 raw_spin_lock_irq(&task->pi_lock); 729 730 /* 731 * [2] Get the waiter on which @task is blocked on. 732 */ 733 waiter = task->pi_blocked_on; 734 735 /* 736 * [3] check_exit_conditions_1() protected by task->pi_lock. 737 */ 738 739 /* 740 * Check whether the end of the boosting chain has been 741 * reached or the state of the chain has changed while we 742 * dropped the locks. 743 */ 744 if (!waiter) 745 goto out_unlock_pi; 746 747 /* 748 * Check the orig_waiter state. After we dropped the locks, 749 * the previous owner of the lock might have released the lock. 750 */ 751 if (orig_waiter && !rt_mutex_owner(orig_lock)) 752 goto out_unlock_pi; 753 754 /* 755 * We dropped all locks after taking a refcount on @task, so 756 * the task might have moved on in the lock chain or even left 757 * the chain completely and blocks now on an unrelated lock or 758 * on @orig_lock. 759 * 760 * We stored the lock on which @task was blocked in @next_lock, 761 * so we can detect the chain change. 762 */ 763 if (next_lock != waiter->lock) 764 goto out_unlock_pi; 765 766 /* 767 * There could be 'spurious' loops in the lock graph due to ww_mutex, 768 * consider: 769 * 770 * P1: A, ww_A, ww_B 771 * P2: ww_B, ww_A 772 * P3: A 773 * 774 * P3 should not return -EDEADLK because it gets trapped in the cycle 775 * created by P1 and P2 (which will resolve -- and runs into 776 * max_lock_depth above). Therefore disable detect_deadlock such that 777 * the below termination condition can trigger once all relevant tasks 778 * are boosted. 779 * 780 * Even when we start with ww_mutex we can disable deadlock detection, 781 * since we would supress a ww_mutex induced deadlock at [6] anyway. 782 * Supressing it here however is not sufficient since we might still 783 * hit [6] due to adjustment driven iteration. 784 * 785 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 786 * utterly fail to report it; lockdep should. 787 */ 788 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 789 detect_deadlock = false; 790 791 /* 792 * Drop out, when the task has no waiters. Note, 793 * top_waiter can be NULL, when we are in the deboosting 794 * mode! 795 */ 796 if (top_waiter) { 797 if (!task_has_pi_waiters(task)) 798 goto out_unlock_pi; 799 /* 800 * If deadlock detection is off, we stop here if we 801 * are not the top pi waiter of the task. If deadlock 802 * detection is enabled we continue, but stop the 803 * requeueing in the chain walk. 804 */ 805 if (top_waiter != task_top_pi_waiter(task)) { 806 if (!detect_deadlock) 807 goto out_unlock_pi; 808 else 809 requeue = false; 810 } 811 } 812 813 /* 814 * If the waiter priority is the same as the task priority 815 * then there is no further priority adjustment necessary. If 816 * deadlock detection is off, we stop the chain walk. If its 817 * enabled we continue, but stop the requeueing in the chain 818 * walk. 819 */ 820 if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { 821 if (!detect_deadlock) 822 goto out_unlock_pi; 823 else 824 requeue = false; 825 } 826 827 /* 828 * [4] Get the next lock; per holding task->pi_lock we can't unblock 829 * and guarantee @lock's existence. 830 */ 831 lock = waiter->lock; 832 /* 833 * [5] We need to trylock here as we are holding task->pi_lock, 834 * which is the reverse lock order versus the other rtmutex 835 * operations. 836 * 837 * Per the above, holding task->pi_lock guarantees lock exists, so 838 * inverting this lock order is infeasible from a life-time 839 * perspective. 840 */ 841 if (!raw_spin_trylock(&lock->wait_lock)) { 842 raw_spin_unlock_irq(&task->pi_lock); 843 cpu_relax(); 844 goto retry; 845 } 846 847 /* 848 * [6] check_exit_conditions_2() protected by task->pi_lock and 849 * lock->wait_lock. 850 * 851 * Deadlock detection. If the lock is the same as the original 852 * lock which caused us to walk the lock chain or if the 853 * current lock is owned by the task which initiated the chain 854 * walk, we detected a deadlock. 855 */ 856 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 857 ret = -EDEADLK; 858 859 /* 860 * When the deadlock is due to ww_mutex; also see above. Don't 861 * report the deadlock and instead let the ww_mutex wound/die 862 * logic pick which of the contending threads gets -EDEADLK. 863 * 864 * NOTE: assumes the cycle only contains a single ww_class; any 865 * other configuration and we fail to report; also, see 866 * lockdep. 867 */ 868 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 869 ret = 0; 870 871 raw_spin_unlock(&lock->wait_lock); 872 goto out_unlock_pi; 873 } 874 875 /* 876 * If we just follow the lock chain for deadlock detection, no 877 * need to do all the requeue operations. To avoid a truckload 878 * of conditionals around the various places below, just do the 879 * minimum chain walk checks. 880 */ 881 if (!requeue) { 882 /* 883 * No requeue[7] here. Just release @task [8] 884 */ 885 raw_spin_unlock(&task->pi_lock); 886 put_task_struct(task); 887 888 /* 889 * [9] check_exit_conditions_3 protected by lock->wait_lock. 890 * If there is no owner of the lock, end of chain. 891 */ 892 if (!rt_mutex_owner(lock)) { 893 raw_spin_unlock_irq(&lock->wait_lock); 894 return 0; 895 } 896 897 /* [10] Grab the next task, i.e. owner of @lock */ 898 task = get_task_struct(rt_mutex_owner(lock)); 899 raw_spin_lock(&task->pi_lock); 900 901 /* 902 * No requeue [11] here. We just do deadlock detection. 903 * 904 * [12] Store whether owner is blocked 905 * itself. Decision is made after dropping the locks 906 */ 907 next_lock = task_blocked_on_lock(task); 908 /* 909 * Get the top waiter for the next iteration 910 */ 911 top_waiter = rt_mutex_top_waiter(lock); 912 913 /* [13] Drop locks */ 914 raw_spin_unlock(&task->pi_lock); 915 raw_spin_unlock_irq(&lock->wait_lock); 916 917 /* If owner is not blocked, end of chain. */ 918 if (!next_lock) 919 goto out_put_task; 920 goto again; 921 } 922 923 /* 924 * Store the current top waiter before doing the requeue 925 * operation on @lock. We need it for the boost/deboost 926 * decision below. 927 */ 928 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 929 930 /* [7] Requeue the waiter in the lock waiter tree. */ 931 rt_mutex_dequeue(lock, waiter); 932 933 /* 934 * Update the waiter prio fields now that we're dequeued. 935 * 936 * These values can have changed through either: 937 * 938 * sys_sched_set_scheduler() / sys_sched_setattr() 939 * 940 * or 941 * 942 * DL CBS enforcement advancing the effective deadline. 943 */ 944 waiter_update_prio(waiter, task); 945 946 rt_mutex_enqueue(lock, waiter); 947 948 /* 949 * [8] Release the (blocking) task in preparation for 950 * taking the owner task in [10]. 951 * 952 * Since we hold lock->waiter_lock, task cannot unblock, even if we 953 * release task->pi_lock. 954 */ 955 raw_spin_unlock(&task->pi_lock); 956 put_task_struct(task); 957 958 /* 959 * [9] check_exit_conditions_3 protected by lock->wait_lock. 960 * 961 * We must abort the chain walk if there is no lock owner even 962 * in the dead lock detection case, as we have nothing to 963 * follow here. This is the end of the chain we are walking. 964 */ 965 if (!rt_mutex_owner(lock)) { 966 /* 967 * If the requeue [7] above changed the top waiter, 968 * then we need to wake the new top waiter up to try 969 * to get the lock. 970 */ 971 top_waiter = rt_mutex_top_waiter(lock); 972 if (prerequeue_top_waiter != top_waiter) 973 wake_up_state(top_waiter->task, top_waiter->wake_state); 974 raw_spin_unlock_irq(&lock->wait_lock); 975 return 0; 976 } 977 978 /* 979 * [10] Grab the next task, i.e. the owner of @lock 980 * 981 * Per holding lock->wait_lock and checking for !owner above, there 982 * must be an owner and it cannot go away. 983 */ 984 task = get_task_struct(rt_mutex_owner(lock)); 985 raw_spin_lock(&task->pi_lock); 986 987 /* [11] requeue the pi waiters if necessary */ 988 if (waiter == rt_mutex_top_waiter(lock)) { 989 /* 990 * The waiter became the new top (highest priority) 991 * waiter on the lock. Replace the previous top waiter 992 * in the owner tasks pi waiters tree with this waiter 993 * and adjust the priority of the owner. 994 */ 995 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 996 waiter_clone_prio(waiter, task); 997 rt_mutex_enqueue_pi(task, waiter); 998 rt_mutex_adjust_prio(lock, task); 999 1000 } else if (prerequeue_top_waiter == waiter) { 1001 /* 1002 * The waiter was the top waiter on the lock, but is 1003 * no longer the top priority waiter. Replace waiter in 1004 * the owner tasks pi waiters tree with the new top 1005 * (highest priority) waiter and adjust the priority 1006 * of the owner. 1007 * The new top waiter is stored in @waiter so that 1008 * @waiter == @top_waiter evaluates to true below and 1009 * we continue to deboost the rest of the chain. 1010 */ 1011 rt_mutex_dequeue_pi(task, waiter); 1012 waiter = rt_mutex_top_waiter(lock); 1013 waiter_clone_prio(waiter, task); 1014 rt_mutex_enqueue_pi(task, waiter); 1015 rt_mutex_adjust_prio(lock, task); 1016 } else { 1017 /* 1018 * Nothing changed. No need to do any priority 1019 * adjustment. 1020 */ 1021 } 1022 1023 /* 1024 * [12] check_exit_conditions_4() protected by task->pi_lock 1025 * and lock->wait_lock. The actual decisions are made after we 1026 * dropped the locks. 1027 * 1028 * Check whether the task which owns the current lock is pi 1029 * blocked itself. If yes we store a pointer to the lock for 1030 * the lock chain change detection above. After we dropped 1031 * task->pi_lock next_lock cannot be dereferenced anymore. 1032 */ 1033 next_lock = task_blocked_on_lock(task); 1034 /* 1035 * Store the top waiter of @lock for the end of chain walk 1036 * decision below. 1037 */ 1038 top_waiter = rt_mutex_top_waiter(lock); 1039 1040 /* [13] Drop the locks */ 1041 raw_spin_unlock(&task->pi_lock); 1042 raw_spin_unlock_irq(&lock->wait_lock); 1043 1044 /* 1045 * Make the actual exit decisions [12], based on the stored 1046 * values. 1047 * 1048 * We reached the end of the lock chain. Stop right here. No 1049 * point to go back just to figure that out. 1050 */ 1051 if (!next_lock) 1052 goto out_put_task; 1053 1054 /* 1055 * If the current waiter is not the top waiter on the lock, 1056 * then we can stop the chain walk here if we are not in full 1057 * deadlock detection mode. 1058 */ 1059 if (!detect_deadlock && waiter != top_waiter) 1060 goto out_put_task; 1061 1062 goto again; 1063 1064 out_unlock_pi: 1065 raw_spin_unlock_irq(&task->pi_lock); 1066 out_put_task: 1067 put_task_struct(task); 1068 1069 return ret; 1070 } 1071 1072 /* 1073 * Try to take an rt-mutex 1074 * 1075 * Must be called with lock->wait_lock held and interrupts disabled 1076 * 1077 * @lock: The lock to be acquired. 1078 * @task: The task which wants to acquire the lock 1079 * @waiter: The waiter that is queued to the lock's wait tree if the 1080 * callsite called task_blocked_on_lock(), otherwise NULL 1081 */ 1082 static int __sched 1083 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 1084 struct rt_mutex_waiter *waiter) 1085 { 1086 lockdep_assert_held(&lock->wait_lock); 1087 1088 /* 1089 * Before testing whether we can acquire @lock, we set the 1090 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 1091 * other tasks which try to modify @lock into the slow path 1092 * and they serialize on @lock->wait_lock. 1093 * 1094 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 1095 * as explained at the top of this file if and only if: 1096 * 1097 * - There is a lock owner. The caller must fixup the 1098 * transient state if it does a trylock or leaves the lock 1099 * function due to a signal or timeout. 1100 * 1101 * - @task acquires the lock and there are no other 1102 * waiters. This is undone in rt_mutex_set_owner(@task) at 1103 * the end of this function. 1104 */ 1105 mark_rt_mutex_waiters(lock); 1106 1107 /* 1108 * If @lock has an owner, give up. 1109 */ 1110 if (rt_mutex_owner(lock)) 1111 return 0; 1112 1113 /* 1114 * If @waiter != NULL, @task has already enqueued the waiter 1115 * into @lock waiter tree. If @waiter == NULL then this is a 1116 * trylock attempt. 1117 */ 1118 if (waiter) { 1119 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 1120 1121 /* 1122 * If waiter is the highest priority waiter of @lock, 1123 * or allowed to steal it, take it over. 1124 */ 1125 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1126 /* 1127 * We can acquire the lock. Remove the waiter from the 1128 * lock waiters tree. 1129 */ 1130 rt_mutex_dequeue(lock, waiter); 1131 } else { 1132 return 0; 1133 } 1134 } else { 1135 /* 1136 * If the lock has waiters already we check whether @task is 1137 * eligible to take over the lock. 1138 * 1139 * If there are no other waiters, @task can acquire 1140 * the lock. @task->pi_blocked_on is NULL, so it does 1141 * not need to be dequeued. 1142 */ 1143 if (rt_mutex_has_waiters(lock)) { 1144 /* Check whether the trylock can steal it. */ 1145 if (!rt_mutex_steal(task_to_waiter(task), 1146 rt_mutex_top_waiter(lock))) 1147 return 0; 1148 1149 /* 1150 * The current top waiter stays enqueued. We 1151 * don't have to change anything in the lock 1152 * waiters order. 1153 */ 1154 } else { 1155 /* 1156 * No waiters. Take the lock without the 1157 * pi_lock dance.@task->pi_blocked_on is NULL 1158 * and we have no waiters to enqueue in @task 1159 * pi waiters tree. 1160 */ 1161 goto takeit; 1162 } 1163 } 1164 1165 /* 1166 * Clear @task->pi_blocked_on. Requires protection by 1167 * @task->pi_lock. Redundant operation for the @waiter == NULL 1168 * case, but conditionals are more expensive than a redundant 1169 * store. 1170 */ 1171 raw_spin_lock(&task->pi_lock); 1172 task->pi_blocked_on = NULL; 1173 /* 1174 * Finish the lock acquisition. @task is the new owner. If 1175 * other waiters exist we have to insert the highest priority 1176 * waiter into @task->pi_waiters tree. 1177 */ 1178 if (rt_mutex_has_waiters(lock)) 1179 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1180 raw_spin_unlock(&task->pi_lock); 1181 1182 takeit: 1183 /* 1184 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1185 * are still waiters or clears it. 1186 */ 1187 rt_mutex_set_owner(lock, task); 1188 1189 return 1; 1190 } 1191 1192 /* 1193 * Task blocks on lock. 1194 * 1195 * Prepare waiter and propagate pi chain 1196 * 1197 * This must be called with lock->wait_lock held and interrupts disabled 1198 */ 1199 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1200 struct rt_mutex_waiter *waiter, 1201 struct task_struct *task, 1202 struct ww_acquire_ctx *ww_ctx, 1203 enum rtmutex_chainwalk chwalk) 1204 { 1205 struct task_struct *owner = rt_mutex_owner(lock); 1206 struct rt_mutex_waiter *top_waiter = waiter; 1207 struct rt_mutex_base *next_lock; 1208 int chain_walk = 0, res; 1209 1210 lockdep_assert_held(&lock->wait_lock); 1211 1212 /* 1213 * Early deadlock detection. We really don't want the task to 1214 * enqueue on itself just to untangle the mess later. It's not 1215 * only an optimization. We drop the locks, so another waiter 1216 * can come in before the chain walk detects the deadlock. So 1217 * the other will detect the deadlock and return -EDEADLOCK, 1218 * which is wrong, as the other waiter is not in a deadlock 1219 * situation. 1220 * 1221 * Except for ww_mutex, in that case the chain walk must already deal 1222 * with spurious cycles, see the comments at [3] and [6]. 1223 */ 1224 if (owner == task && !(build_ww_mutex() && ww_ctx)) 1225 return -EDEADLK; 1226 1227 raw_spin_lock(&task->pi_lock); 1228 waiter->task = task; 1229 waiter->lock = lock; 1230 waiter_update_prio(waiter, task); 1231 waiter_clone_prio(waiter, task); 1232 1233 /* Get the top priority waiter on the lock */ 1234 if (rt_mutex_has_waiters(lock)) 1235 top_waiter = rt_mutex_top_waiter(lock); 1236 rt_mutex_enqueue(lock, waiter); 1237 1238 task->pi_blocked_on = waiter; 1239 1240 raw_spin_unlock(&task->pi_lock); 1241 1242 if (build_ww_mutex() && ww_ctx) { 1243 struct rt_mutex *rtm; 1244 1245 /* Check whether the waiter should back out immediately */ 1246 rtm = container_of(lock, struct rt_mutex, rtmutex); 1247 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1248 if (res) { 1249 raw_spin_lock(&task->pi_lock); 1250 rt_mutex_dequeue(lock, waiter); 1251 task->pi_blocked_on = NULL; 1252 raw_spin_unlock(&task->pi_lock); 1253 return res; 1254 } 1255 } 1256 1257 if (!owner) 1258 return 0; 1259 1260 raw_spin_lock(&owner->pi_lock); 1261 if (waiter == rt_mutex_top_waiter(lock)) { 1262 rt_mutex_dequeue_pi(owner, top_waiter); 1263 rt_mutex_enqueue_pi(owner, waiter); 1264 1265 rt_mutex_adjust_prio(lock, owner); 1266 if (owner->pi_blocked_on) 1267 chain_walk = 1; 1268 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1269 chain_walk = 1; 1270 } 1271 1272 /* Store the lock on which owner is blocked or NULL */ 1273 next_lock = task_blocked_on_lock(owner); 1274 1275 raw_spin_unlock(&owner->pi_lock); 1276 /* 1277 * Even if full deadlock detection is on, if the owner is not 1278 * blocked itself, we can avoid finding this out in the chain 1279 * walk. 1280 */ 1281 if (!chain_walk || !next_lock) 1282 return 0; 1283 1284 /* 1285 * The owner can't disappear while holding a lock, 1286 * so the owner struct is protected by wait_lock. 1287 * Gets dropped in rt_mutex_adjust_prio_chain()! 1288 */ 1289 get_task_struct(owner); 1290 1291 raw_spin_unlock_irq(&lock->wait_lock); 1292 1293 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1294 next_lock, waiter, task); 1295 1296 raw_spin_lock_irq(&lock->wait_lock); 1297 1298 return res; 1299 } 1300 1301 /* 1302 * Remove the top waiter from the current tasks pi waiter tree and 1303 * queue it up. 1304 * 1305 * Called with lock->wait_lock held and interrupts disabled. 1306 */ 1307 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1308 struct rt_mutex_base *lock) 1309 { 1310 struct rt_mutex_waiter *waiter; 1311 1312 lockdep_assert_held(&lock->wait_lock); 1313 1314 raw_spin_lock(¤t->pi_lock); 1315 1316 waiter = rt_mutex_top_waiter(lock); 1317 1318 /* 1319 * Remove it from current->pi_waiters and deboost. 1320 * 1321 * We must in fact deboost here in order to ensure we call 1322 * rt_mutex_setprio() to update p->pi_top_task before the 1323 * task unblocks. 1324 */ 1325 rt_mutex_dequeue_pi(current, waiter); 1326 rt_mutex_adjust_prio(lock, current); 1327 1328 /* 1329 * As we are waking up the top waiter, and the waiter stays 1330 * queued on the lock until it gets the lock, this lock 1331 * obviously has waiters. Just set the bit here and this has 1332 * the added benefit of forcing all new tasks into the 1333 * slow path making sure no task of lower priority than 1334 * the top waiter can steal this lock. 1335 */ 1336 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1337 1338 /* 1339 * We deboosted before waking the top waiter task such that we don't 1340 * run two tasks with the 'same' priority (and ensure the 1341 * p->pi_top_task pointer points to a blocked task). This however can 1342 * lead to priority inversion if we would get preempted after the 1343 * deboost but before waking our donor task, hence the preempt_disable() 1344 * before unlock. 1345 * 1346 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1347 */ 1348 preempt_disable(); 1349 rt_mutex_wake_q_add(wqh, waiter); 1350 raw_spin_unlock(¤t->pi_lock); 1351 } 1352 1353 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1354 { 1355 int ret = try_to_take_rt_mutex(lock, current, NULL); 1356 1357 /* 1358 * try_to_take_rt_mutex() sets the lock waiters bit 1359 * unconditionally. Clean this up. 1360 */ 1361 fixup_rt_mutex_waiters(lock, true); 1362 1363 return ret; 1364 } 1365 1366 /* 1367 * Slow path try-lock function: 1368 */ 1369 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1370 { 1371 unsigned long flags; 1372 int ret; 1373 1374 /* 1375 * If the lock already has an owner we fail to get the lock. 1376 * This can be done without taking the @lock->wait_lock as 1377 * it is only being read, and this is a trylock anyway. 1378 */ 1379 if (rt_mutex_owner(lock)) 1380 return 0; 1381 1382 /* 1383 * The mutex has currently no owner. Lock the wait lock and try to 1384 * acquire the lock. We use irqsave here to support early boot calls. 1385 */ 1386 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1387 1388 ret = __rt_mutex_slowtrylock(lock); 1389 1390 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1391 1392 return ret; 1393 } 1394 1395 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1396 { 1397 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1398 return 1; 1399 1400 return rt_mutex_slowtrylock(lock); 1401 } 1402 1403 /* 1404 * Slow path to release a rt-mutex. 1405 */ 1406 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1407 { 1408 DEFINE_RT_WAKE_Q(wqh); 1409 unsigned long flags; 1410 1411 /* irqsave required to support early boot calls */ 1412 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1413 1414 debug_rt_mutex_unlock(lock); 1415 1416 /* 1417 * We must be careful here if the fast path is enabled. If we 1418 * have no waiters queued we cannot set owner to NULL here 1419 * because of: 1420 * 1421 * foo->lock->owner = NULL; 1422 * rtmutex_lock(foo->lock); <- fast path 1423 * free = atomic_dec_and_test(foo->refcnt); 1424 * rtmutex_unlock(foo->lock); <- fast path 1425 * if (free) 1426 * kfree(foo); 1427 * raw_spin_unlock(foo->lock->wait_lock); 1428 * 1429 * So for the fastpath enabled kernel: 1430 * 1431 * Nothing can set the waiters bit as long as we hold 1432 * lock->wait_lock. So we do the following sequence: 1433 * 1434 * owner = rt_mutex_owner(lock); 1435 * clear_rt_mutex_waiters(lock); 1436 * raw_spin_unlock(&lock->wait_lock); 1437 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1438 * return; 1439 * goto retry; 1440 * 1441 * The fastpath disabled variant is simple as all access to 1442 * lock->owner is serialized by lock->wait_lock: 1443 * 1444 * lock->owner = NULL; 1445 * raw_spin_unlock(&lock->wait_lock); 1446 */ 1447 while (!rt_mutex_has_waiters(lock)) { 1448 /* Drops lock->wait_lock ! */ 1449 if (unlock_rt_mutex_safe(lock, flags) == true) 1450 return; 1451 /* Relock the rtmutex and try again */ 1452 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1453 } 1454 1455 /* 1456 * The wakeup next waiter path does not suffer from the above 1457 * race. See the comments there. 1458 * 1459 * Queue the next waiter for wakeup once we release the wait_lock. 1460 */ 1461 mark_wakeup_next_waiter(&wqh, lock); 1462 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1463 1464 rt_mutex_wake_up_q(&wqh); 1465 } 1466 1467 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1468 { 1469 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1470 return; 1471 1472 rt_mutex_slowunlock(lock); 1473 } 1474 1475 #ifdef CONFIG_SMP 1476 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1477 struct rt_mutex_waiter *waiter, 1478 struct task_struct *owner) 1479 { 1480 bool res = true; 1481 1482 rcu_read_lock(); 1483 for (;;) { 1484 /* If owner changed, trylock again. */ 1485 if (owner != rt_mutex_owner(lock)) 1486 break; 1487 /* 1488 * Ensure that @owner is dereferenced after checking that 1489 * the lock owner still matches @owner. If that fails, 1490 * @owner might point to freed memory. If it still matches, 1491 * the rcu_read_lock() ensures the memory stays valid. 1492 */ 1493 barrier(); 1494 /* 1495 * Stop spinning when: 1496 * - the lock owner has been scheduled out 1497 * - current is not longer the top waiter 1498 * - current is requested to reschedule (redundant 1499 * for CONFIG_PREEMPT_RCU=y) 1500 * - the VCPU on which owner runs is preempted 1501 */ 1502 if (!owner_on_cpu(owner) || need_resched() || 1503 !rt_mutex_waiter_is_top_waiter(lock, waiter)) { 1504 res = false; 1505 break; 1506 } 1507 cpu_relax(); 1508 } 1509 rcu_read_unlock(); 1510 return res; 1511 } 1512 #else 1513 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1514 struct rt_mutex_waiter *waiter, 1515 struct task_struct *owner) 1516 { 1517 return false; 1518 } 1519 #endif 1520 1521 #ifdef RT_MUTEX_BUILD_MUTEX 1522 /* 1523 * Functions required for: 1524 * - rtmutex, futex on all kernels 1525 * - mutex and rwsem substitutions on RT kernels 1526 */ 1527 1528 /* 1529 * Remove a waiter from a lock and give up 1530 * 1531 * Must be called with lock->wait_lock held and interrupts disabled. It must 1532 * have just failed to try_to_take_rt_mutex(). 1533 */ 1534 static void __sched remove_waiter(struct rt_mutex_base *lock, 1535 struct rt_mutex_waiter *waiter) 1536 { 1537 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1538 struct task_struct *owner = rt_mutex_owner(lock); 1539 struct rt_mutex_base *next_lock; 1540 1541 lockdep_assert_held(&lock->wait_lock); 1542 1543 raw_spin_lock(¤t->pi_lock); 1544 rt_mutex_dequeue(lock, waiter); 1545 current->pi_blocked_on = NULL; 1546 raw_spin_unlock(¤t->pi_lock); 1547 1548 /* 1549 * Only update priority if the waiter was the highest priority 1550 * waiter of the lock and there is an owner to update. 1551 */ 1552 if (!owner || !is_top_waiter) 1553 return; 1554 1555 raw_spin_lock(&owner->pi_lock); 1556 1557 rt_mutex_dequeue_pi(owner, waiter); 1558 1559 if (rt_mutex_has_waiters(lock)) 1560 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1561 1562 rt_mutex_adjust_prio(lock, owner); 1563 1564 /* Store the lock on which owner is blocked or NULL */ 1565 next_lock = task_blocked_on_lock(owner); 1566 1567 raw_spin_unlock(&owner->pi_lock); 1568 1569 /* 1570 * Don't walk the chain, if the owner task is not blocked 1571 * itself. 1572 */ 1573 if (!next_lock) 1574 return; 1575 1576 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1577 get_task_struct(owner); 1578 1579 raw_spin_unlock_irq(&lock->wait_lock); 1580 1581 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1582 next_lock, NULL, current); 1583 1584 raw_spin_lock_irq(&lock->wait_lock); 1585 } 1586 1587 /** 1588 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1589 * @lock: the rt_mutex to take 1590 * @ww_ctx: WW mutex context pointer 1591 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1592 * or TASK_UNINTERRUPTIBLE) 1593 * @timeout: the pre-initialized and started timer, or NULL for none 1594 * @waiter: the pre-initialized rt_mutex_waiter 1595 * 1596 * Must be called with lock->wait_lock held and interrupts disabled 1597 */ 1598 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1599 struct ww_acquire_ctx *ww_ctx, 1600 unsigned int state, 1601 struct hrtimer_sleeper *timeout, 1602 struct rt_mutex_waiter *waiter) 1603 { 1604 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1605 struct task_struct *owner; 1606 int ret = 0; 1607 1608 for (;;) { 1609 /* Try to acquire the lock: */ 1610 if (try_to_take_rt_mutex(lock, current, waiter)) 1611 break; 1612 1613 if (timeout && !timeout->task) { 1614 ret = -ETIMEDOUT; 1615 break; 1616 } 1617 if (signal_pending_state(state, current)) { 1618 ret = -EINTR; 1619 break; 1620 } 1621 1622 if (build_ww_mutex() && ww_ctx) { 1623 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1624 if (ret) 1625 break; 1626 } 1627 1628 if (waiter == rt_mutex_top_waiter(lock)) 1629 owner = rt_mutex_owner(lock); 1630 else 1631 owner = NULL; 1632 raw_spin_unlock_irq(&lock->wait_lock); 1633 1634 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1635 rt_mutex_schedule(); 1636 1637 raw_spin_lock_irq(&lock->wait_lock); 1638 set_current_state(state); 1639 } 1640 1641 __set_current_state(TASK_RUNNING); 1642 return ret; 1643 } 1644 1645 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1646 struct rt_mutex_waiter *w) 1647 { 1648 /* 1649 * If the result is not -EDEADLOCK or the caller requested 1650 * deadlock detection, nothing to do here. 1651 */ 1652 if (res != -EDEADLOCK || detect_deadlock) 1653 return; 1654 1655 if (build_ww_mutex() && w->ww_ctx) 1656 return; 1657 1658 /* 1659 * Yell loudly and stop the task right here. 1660 */ 1661 WARN(1, "rtmutex deadlock detected\n"); 1662 while (1) { 1663 set_current_state(TASK_INTERRUPTIBLE); 1664 rt_mutex_schedule(); 1665 } 1666 } 1667 1668 /** 1669 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1670 * @lock: The rtmutex to block lock 1671 * @ww_ctx: WW mutex context pointer 1672 * @state: The task state for sleeping 1673 * @chwalk: Indicator whether full or partial chainwalk is requested 1674 * @waiter: Initializer waiter for blocking 1675 */ 1676 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1677 struct ww_acquire_ctx *ww_ctx, 1678 unsigned int state, 1679 enum rtmutex_chainwalk chwalk, 1680 struct rt_mutex_waiter *waiter) 1681 { 1682 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1683 struct ww_mutex *ww = ww_container_of(rtm); 1684 int ret; 1685 1686 lockdep_assert_held(&lock->wait_lock); 1687 1688 /* Try to acquire the lock again: */ 1689 if (try_to_take_rt_mutex(lock, current, NULL)) { 1690 if (build_ww_mutex() && ww_ctx) { 1691 __ww_mutex_check_waiters(rtm, ww_ctx); 1692 ww_mutex_lock_acquired(ww, ww_ctx); 1693 } 1694 return 0; 1695 } 1696 1697 set_current_state(state); 1698 1699 trace_contention_begin(lock, LCB_F_RT); 1700 1701 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1702 if (likely(!ret)) 1703 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1704 1705 if (likely(!ret)) { 1706 /* acquired the lock */ 1707 if (build_ww_mutex() && ww_ctx) { 1708 if (!ww_ctx->is_wait_die) 1709 __ww_mutex_check_waiters(rtm, ww_ctx); 1710 ww_mutex_lock_acquired(ww, ww_ctx); 1711 } 1712 } else { 1713 __set_current_state(TASK_RUNNING); 1714 remove_waiter(lock, waiter); 1715 rt_mutex_handle_deadlock(ret, chwalk, waiter); 1716 } 1717 1718 /* 1719 * try_to_take_rt_mutex() sets the waiter bit 1720 * unconditionally. We might have to fix that up. 1721 */ 1722 fixup_rt_mutex_waiters(lock, true); 1723 1724 trace_contention_end(lock, ret); 1725 1726 return ret; 1727 } 1728 1729 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1730 struct ww_acquire_ctx *ww_ctx, 1731 unsigned int state) 1732 { 1733 struct rt_mutex_waiter waiter; 1734 int ret; 1735 1736 rt_mutex_init_waiter(&waiter); 1737 waiter.ww_ctx = ww_ctx; 1738 1739 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1740 &waiter); 1741 1742 debug_rt_mutex_free_waiter(&waiter); 1743 return ret; 1744 } 1745 1746 /* 1747 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1748 * @lock: The rtmutex to block lock 1749 * @ww_ctx: WW mutex context pointer 1750 * @state: The task state for sleeping 1751 */ 1752 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1753 struct ww_acquire_ctx *ww_ctx, 1754 unsigned int state) 1755 { 1756 unsigned long flags; 1757 int ret; 1758 1759 /* 1760 * Do all pre-schedule work here, before we queue a waiter and invoke 1761 * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would 1762 * otherwise recurse back into task_blocks_on_rt_mutex() through 1763 * rtlock_slowlock() and will then enqueue a second waiter for this 1764 * same task and things get really confusing real fast. 1765 */ 1766 rt_mutex_pre_schedule(); 1767 1768 /* 1769 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1770 * be called in early boot if the cmpxchg() fast path is disabled 1771 * (debug, no architecture support). In this case we will acquire the 1772 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1773 * enable interrupts in that early boot case. So we need to use the 1774 * irqsave/restore variants. 1775 */ 1776 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1777 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1778 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1779 rt_mutex_post_schedule(); 1780 1781 return ret; 1782 } 1783 1784 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1785 unsigned int state) 1786 { 1787 lockdep_assert(!current->pi_blocked_on); 1788 1789 if (likely(rt_mutex_try_acquire(lock))) 1790 return 0; 1791 1792 return rt_mutex_slowlock(lock, NULL, state); 1793 } 1794 #endif /* RT_MUTEX_BUILD_MUTEX */ 1795 1796 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1797 /* 1798 * Functions required for spin/rw_lock substitution on RT kernels 1799 */ 1800 1801 /** 1802 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1803 * @lock: The underlying RT mutex 1804 */ 1805 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1806 { 1807 struct rt_mutex_waiter waiter; 1808 struct task_struct *owner; 1809 1810 lockdep_assert_held(&lock->wait_lock); 1811 1812 if (try_to_take_rt_mutex(lock, current, NULL)) 1813 return; 1814 1815 rt_mutex_init_rtlock_waiter(&waiter); 1816 1817 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1818 current_save_and_set_rtlock_wait_state(); 1819 1820 trace_contention_begin(lock, LCB_F_RT); 1821 1822 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1823 1824 for (;;) { 1825 /* Try to acquire the lock again */ 1826 if (try_to_take_rt_mutex(lock, current, &waiter)) 1827 break; 1828 1829 if (&waiter == rt_mutex_top_waiter(lock)) 1830 owner = rt_mutex_owner(lock); 1831 else 1832 owner = NULL; 1833 raw_spin_unlock_irq(&lock->wait_lock); 1834 1835 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1836 schedule_rtlock(); 1837 1838 raw_spin_lock_irq(&lock->wait_lock); 1839 set_current_state(TASK_RTLOCK_WAIT); 1840 } 1841 1842 /* Restore the task state */ 1843 current_restore_rtlock_saved_state(); 1844 1845 /* 1846 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1847 * We might have to fix that up: 1848 */ 1849 fixup_rt_mutex_waiters(lock, true); 1850 debug_rt_mutex_free_waiter(&waiter); 1851 1852 trace_contention_end(lock, 0); 1853 } 1854 1855 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1856 { 1857 unsigned long flags; 1858 1859 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1860 rtlock_slowlock_locked(lock); 1861 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1862 } 1863 1864 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1865