1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 4 * 5 * started by Ingo Molnar and Thomas Gleixner. 6 * 7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 10 * Copyright (C) 2006 Esben Nielsen 11 * Adaptive Spinlocks: 12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, 13 * and Peter Morreale, 14 * Adaptive Spinlocks simplification: 15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> 16 * 17 * See Documentation/locking/rt-mutex-design.rst for details. 18 */ 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/sched/deadline.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/rt.h> 24 #include <linux/sched/wake_q.h> 25 #include <linux/ww_mutex.h> 26 27 #include "rtmutex_common.h" 28 29 #ifndef WW_RT 30 # define build_ww_mutex() (false) 31 # define ww_container_of(rtm) NULL 32 33 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, 34 struct rt_mutex *lock, 35 struct ww_acquire_ctx *ww_ctx) 36 { 37 return 0; 38 } 39 40 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, 41 struct ww_acquire_ctx *ww_ctx) 42 { 43 } 44 45 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, 46 struct ww_acquire_ctx *ww_ctx) 47 { 48 } 49 50 static inline int __ww_mutex_check_kill(struct rt_mutex *lock, 51 struct rt_mutex_waiter *waiter, 52 struct ww_acquire_ctx *ww_ctx) 53 { 54 return 0; 55 } 56 57 #else 58 # define build_ww_mutex() (true) 59 # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) 60 # include "ww_mutex.h" 61 #endif 62 63 /* 64 * lock->owner state tracking: 65 * 66 * lock->owner holds the task_struct pointer of the owner. Bit 0 67 * is used to keep track of the "lock has waiters" state. 68 * 69 * owner bit0 70 * NULL 0 lock is free (fast acquire possible) 71 * NULL 1 lock is free and has waiters and the top waiter 72 * is going to take the lock* 73 * taskpointer 0 lock is held (fast release possible) 74 * taskpointer 1 lock is held and has waiters** 75 * 76 * The fast atomic compare exchange based acquire and release is only 77 * possible when bit 0 of lock->owner is 0. 78 * 79 * (*) It also can be a transitional state when grabbing the lock 80 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 81 * we need to set the bit0 before looking at the lock, and the owner may be 82 * NULL in this small time, hence this can be a transitional state. 83 * 84 * (**) There is a small time when bit 0 is set but there are no 85 * waiters. This can happen when grabbing the lock in the slow path. 86 * To prevent a cmpxchg of the owner releasing the lock, we need to 87 * set this bit before looking at the lock. 88 */ 89 90 static __always_inline void 91 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) 92 { 93 unsigned long val = (unsigned long)owner; 94 95 if (rt_mutex_has_waiters(lock)) 96 val |= RT_MUTEX_HAS_WAITERS; 97 98 WRITE_ONCE(lock->owner, (struct task_struct *)val); 99 } 100 101 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) 102 { 103 lock->owner = (struct task_struct *) 104 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 105 } 106 107 static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) 108 { 109 unsigned long owner, *p = (unsigned long *) &lock->owner; 110 111 if (rt_mutex_has_waiters(lock)) 112 return; 113 114 /* 115 * The rbtree has no waiters enqueued, now make sure that the 116 * lock->owner still has the waiters bit set, otherwise the 117 * following can happen: 118 * 119 * CPU 0 CPU 1 CPU2 120 * l->owner=T1 121 * rt_mutex_lock(l) 122 * lock(l->lock) 123 * l->owner = T1 | HAS_WAITERS; 124 * enqueue(T2) 125 * boost() 126 * unlock(l->lock) 127 * block() 128 * 129 * rt_mutex_lock(l) 130 * lock(l->lock) 131 * l->owner = T1 | HAS_WAITERS; 132 * enqueue(T3) 133 * boost() 134 * unlock(l->lock) 135 * block() 136 * signal(->T2) signal(->T3) 137 * lock(l->lock) 138 * dequeue(T2) 139 * deboost() 140 * unlock(l->lock) 141 * lock(l->lock) 142 * dequeue(T3) 143 * ==> wait list is empty 144 * deboost() 145 * unlock(l->lock) 146 * lock(l->lock) 147 * fixup_rt_mutex_waiters() 148 * if (wait_list_empty(l) { 149 * l->owner = owner 150 * owner = l->owner & ~HAS_WAITERS; 151 * ==> l->owner = T1 152 * } 153 * lock(l->lock) 154 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 155 * if (wait_list_empty(l) { 156 * owner = l->owner & ~HAS_WAITERS; 157 * cmpxchg(l->owner, T1, NULL) 158 * ===> Success (l->owner = NULL) 159 * 160 * l->owner = owner 161 * ==> l->owner = T1 162 * } 163 * 164 * With the check for the waiter bit in place T3 on CPU2 will not 165 * overwrite. All tasks fiddling with the waiters bit are 166 * serialized by l->lock, so nothing else can modify the waiters 167 * bit. If the bit is set then nothing can change l->owner either 168 * so the simple RMW is safe. The cmpxchg() will simply fail if it 169 * happens in the middle of the RMW because the waiters bit is 170 * still set. 171 */ 172 owner = READ_ONCE(*p); 173 if (owner & RT_MUTEX_HAS_WAITERS) 174 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 175 } 176 177 /* 178 * We can speed up the acquire/release, if there's no debugging state to be 179 * set up. 180 */ 181 #ifndef CONFIG_DEBUG_RT_MUTEXES 182 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 183 struct task_struct *old, 184 struct task_struct *new) 185 { 186 return try_cmpxchg_acquire(&lock->owner, &old, new); 187 } 188 189 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 190 struct task_struct *old, 191 struct task_struct *new) 192 { 193 return try_cmpxchg_release(&lock->owner, &old, new); 194 } 195 196 /* 197 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 198 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 199 * relaxed semantics suffice. 200 */ 201 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 202 { 203 unsigned long owner, *p = (unsigned long *) &lock->owner; 204 205 do { 206 owner = *p; 207 } while (cmpxchg_relaxed(p, owner, 208 owner | RT_MUTEX_HAS_WAITERS) != owner); 209 } 210 211 /* 212 * Safe fastpath aware unlock: 213 * 1) Clear the waiters bit 214 * 2) Drop lock->wait_lock 215 * 3) Try to unlock the lock with cmpxchg 216 */ 217 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 218 unsigned long flags) 219 __releases(lock->wait_lock) 220 { 221 struct task_struct *owner = rt_mutex_owner(lock); 222 223 clear_rt_mutex_waiters(lock); 224 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 225 /* 226 * If a new waiter comes in between the unlock and the cmpxchg 227 * we have two situations: 228 * 229 * unlock(wait_lock); 230 * lock(wait_lock); 231 * cmpxchg(p, owner, 0) == owner 232 * mark_rt_mutex_waiters(lock); 233 * acquire(lock); 234 * or: 235 * 236 * unlock(wait_lock); 237 * lock(wait_lock); 238 * mark_rt_mutex_waiters(lock); 239 * 240 * cmpxchg(p, owner, 0) != owner 241 * enqueue_waiter(); 242 * unlock(wait_lock); 243 * lock(wait_lock); 244 * wake waiter(); 245 * unlock(wait_lock); 246 * lock(wait_lock); 247 * acquire(lock); 248 */ 249 return rt_mutex_cmpxchg_release(lock, owner, NULL); 250 } 251 252 #else 253 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, 254 struct task_struct *old, 255 struct task_struct *new) 256 { 257 return false; 258 259 } 260 261 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, 262 struct task_struct *old, 263 struct task_struct *new) 264 { 265 return false; 266 } 267 268 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) 269 { 270 lock->owner = (struct task_struct *) 271 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 272 } 273 274 /* 275 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 276 */ 277 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, 278 unsigned long flags) 279 __releases(lock->wait_lock) 280 { 281 lock->owner = NULL; 282 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 283 return true; 284 } 285 #endif 286 287 static __always_inline int __waiter_prio(struct task_struct *task) 288 { 289 int prio = task->prio; 290 291 if (!rt_prio(prio)) 292 return DEFAULT_PRIO; 293 294 return prio; 295 } 296 297 static __always_inline void 298 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) 299 { 300 waiter->prio = __waiter_prio(task); 301 waiter->deadline = task->dl.deadline; 302 } 303 304 /* 305 * Only use with rt_mutex_waiter_{less,equal}() 306 */ 307 #define task_to_waiter(p) \ 308 &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } 309 310 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, 311 struct rt_mutex_waiter *right) 312 { 313 if (left->prio < right->prio) 314 return 1; 315 316 /* 317 * If both waiters have dl_prio(), we check the deadlines of the 318 * associated tasks. 319 * If left waiter has a dl_prio(), and we didn't return 1 above, 320 * then right waiter has a dl_prio() too. 321 */ 322 if (dl_prio(left->prio)) 323 return dl_time_before(left->deadline, right->deadline); 324 325 return 0; 326 } 327 328 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, 329 struct rt_mutex_waiter *right) 330 { 331 if (left->prio != right->prio) 332 return 0; 333 334 /* 335 * If both waiters have dl_prio(), we check the deadlines of the 336 * associated tasks. 337 * If left waiter has a dl_prio(), and we didn't return 0 above, 338 * then right waiter has a dl_prio() too. 339 */ 340 if (dl_prio(left->prio)) 341 return left->deadline == right->deadline; 342 343 return 1; 344 } 345 346 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, 347 struct rt_mutex_waiter *top_waiter) 348 { 349 if (rt_mutex_waiter_less(waiter, top_waiter)) 350 return true; 351 352 #ifdef RT_MUTEX_BUILD_SPINLOCKS 353 /* 354 * Note that RT tasks are excluded from same priority (lateral) 355 * steals to prevent the introduction of an unbounded latency. 356 */ 357 if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) 358 return false; 359 360 return rt_mutex_waiter_equal(waiter, top_waiter); 361 #else 362 return false; 363 #endif 364 } 365 366 #define __node_2_waiter(node) \ 367 rb_entry((node), struct rt_mutex_waiter, tree_entry) 368 369 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) 370 { 371 struct rt_mutex_waiter *aw = __node_2_waiter(a); 372 struct rt_mutex_waiter *bw = __node_2_waiter(b); 373 374 if (rt_mutex_waiter_less(aw, bw)) 375 return 1; 376 377 if (!build_ww_mutex()) 378 return 0; 379 380 if (rt_mutex_waiter_less(bw, aw)) 381 return 0; 382 383 /* NOTE: relies on waiter->ww_ctx being set before insertion */ 384 if (aw->ww_ctx) { 385 if (!bw->ww_ctx) 386 return 1; 387 388 return (signed long)(aw->ww_ctx->stamp - 389 bw->ww_ctx->stamp) < 0; 390 } 391 392 return 0; 393 } 394 395 static __always_inline void 396 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 397 { 398 rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); 399 } 400 401 static __always_inline void 402 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) 403 { 404 if (RB_EMPTY_NODE(&waiter->tree_entry)) 405 return; 406 407 rb_erase_cached(&waiter->tree_entry, &lock->waiters); 408 RB_CLEAR_NODE(&waiter->tree_entry); 409 } 410 411 #define __node_2_pi_waiter(node) \ 412 rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) 413 414 static __always_inline bool 415 __pi_waiter_less(struct rb_node *a, const struct rb_node *b) 416 { 417 return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); 418 } 419 420 static __always_inline void 421 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 422 { 423 rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); 424 } 425 426 static __always_inline void 427 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 428 { 429 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 430 return; 431 432 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); 433 RB_CLEAR_NODE(&waiter->pi_tree_entry); 434 } 435 436 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) 437 { 438 struct task_struct *pi_task = NULL; 439 440 lockdep_assert_held(&p->pi_lock); 441 442 if (task_has_pi_waiters(p)) 443 pi_task = task_top_pi_waiter(p)->task; 444 445 rt_mutex_setprio(p, pi_task); 446 } 447 448 /* RT mutex specific wake_q wrappers */ 449 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh, 450 struct task_struct *task, 451 unsigned int wake_state) 452 { 453 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) { 454 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) 455 WARN_ON_ONCE(wqh->rtlock_task); 456 get_task_struct(task); 457 wqh->rtlock_task = task; 458 } else { 459 wake_q_add(&wqh->head, task); 460 } 461 } 462 463 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, 464 struct rt_mutex_waiter *w) 465 { 466 rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state); 467 } 468 469 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) 470 { 471 if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { 472 wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); 473 put_task_struct(wqh->rtlock_task); 474 wqh->rtlock_task = NULL; 475 } 476 477 if (!wake_q_empty(&wqh->head)) 478 wake_up_q(&wqh->head); 479 480 /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ 481 preempt_enable(); 482 } 483 484 /* 485 * Deadlock detection is conditional: 486 * 487 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 488 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 489 * 490 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 491 * conducted independent of the detect argument. 492 * 493 * If the waiter argument is NULL this indicates the deboost path and 494 * deadlock detection is disabled independent of the detect argument 495 * and the config settings. 496 */ 497 static __always_inline bool 498 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 499 enum rtmutex_chainwalk chwalk) 500 { 501 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) 502 return waiter != NULL; 503 return chwalk == RT_MUTEX_FULL_CHAINWALK; 504 } 505 506 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) 507 { 508 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 509 } 510 511 /* 512 * Adjust the priority chain. Also used for deadlock detection. 513 * Decreases task's usage by one - may thus free the task. 514 * 515 * @task: the task owning the mutex (owner) for which a chain walk is 516 * probably needed 517 * @chwalk: do we have to carry out deadlock detection? 518 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 519 * things for a task that has just got its priority adjusted, and 520 * is waiting on a mutex) 521 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 522 * we dropped its pi_lock. Is never dereferenced, only used for 523 * comparison to detect lock chain changes. 524 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 525 * its priority to the mutex owner (can be NULL in the case 526 * depicted above or if the top waiter is gone away and we are 527 * actually deboosting the owner) 528 * @top_task: the current top waiter 529 * 530 * Returns 0 or -EDEADLK. 531 * 532 * Chain walk basics and protection scope 533 * 534 * [R] refcount on task 535 * [P] task->pi_lock held 536 * [L] rtmutex->wait_lock held 537 * 538 * Step Description Protected by 539 * function arguments: 540 * @task [R] 541 * @orig_lock if != NULL @top_task is blocked on it 542 * @next_lock Unprotected. Cannot be 543 * dereferenced. Only used for 544 * comparison. 545 * @orig_waiter if != NULL @top_task is blocked on it 546 * @top_task current, or in case of proxy 547 * locking protected by calling 548 * code 549 * again: 550 * loop_sanity_check(); 551 * retry: 552 * [1] lock(task->pi_lock); [R] acquire [P] 553 * [2] waiter = task->pi_blocked_on; [P] 554 * [3] check_exit_conditions_1(); [P] 555 * [4] lock = waiter->lock; [P] 556 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 557 * unlock(task->pi_lock); release [P] 558 * goto retry; 559 * } 560 * [6] check_exit_conditions_2(); [P] + [L] 561 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 562 * [8] unlock(task->pi_lock); release [P] 563 * put_task_struct(task); release [R] 564 * [9] check_exit_conditions_3(); [L] 565 * [10] task = owner(lock); [L] 566 * get_task_struct(task); [L] acquire [R] 567 * lock(task->pi_lock); [L] acquire [P] 568 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 569 * [12] check_exit_conditions_4(); [P] + [L] 570 * [13] unlock(task->pi_lock); release [P] 571 * unlock(lock->wait_lock); release [L] 572 * goto again; 573 */ 574 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, 575 enum rtmutex_chainwalk chwalk, 576 struct rt_mutex_base *orig_lock, 577 struct rt_mutex_base *next_lock, 578 struct rt_mutex_waiter *orig_waiter, 579 struct task_struct *top_task) 580 { 581 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 582 struct rt_mutex_waiter *prerequeue_top_waiter; 583 int ret = 0, depth = 0; 584 struct rt_mutex_base *lock; 585 bool detect_deadlock; 586 bool requeue = true; 587 588 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 589 590 /* 591 * The (de)boosting is a step by step approach with a lot of 592 * pitfalls. We want this to be preemptible and we want hold a 593 * maximum of two locks per step. So we have to check 594 * carefully whether things change under us. 595 */ 596 again: 597 /* 598 * We limit the lock chain length for each invocation. 599 */ 600 if (++depth > max_lock_depth) { 601 static int prev_max; 602 603 /* 604 * Print this only once. If the admin changes the limit, 605 * print a new message when reaching the limit again. 606 */ 607 if (prev_max != max_lock_depth) { 608 prev_max = max_lock_depth; 609 printk(KERN_WARNING "Maximum lock depth %d reached " 610 "task: %s (%d)\n", max_lock_depth, 611 top_task->comm, task_pid_nr(top_task)); 612 } 613 put_task_struct(task); 614 615 return -EDEADLK; 616 } 617 618 /* 619 * We are fully preemptible here and only hold the refcount on 620 * @task. So everything can have changed under us since the 621 * caller or our own code below (goto retry/again) dropped all 622 * locks. 623 */ 624 retry: 625 /* 626 * [1] Task cannot go away as we did a get_task() before ! 627 */ 628 raw_spin_lock_irq(&task->pi_lock); 629 630 /* 631 * [2] Get the waiter on which @task is blocked on. 632 */ 633 waiter = task->pi_blocked_on; 634 635 /* 636 * [3] check_exit_conditions_1() protected by task->pi_lock. 637 */ 638 639 /* 640 * Check whether the end of the boosting chain has been 641 * reached or the state of the chain has changed while we 642 * dropped the locks. 643 */ 644 if (!waiter) 645 goto out_unlock_pi; 646 647 /* 648 * Check the orig_waiter state. After we dropped the locks, 649 * the previous owner of the lock might have released the lock. 650 */ 651 if (orig_waiter && !rt_mutex_owner(orig_lock)) 652 goto out_unlock_pi; 653 654 /* 655 * We dropped all locks after taking a refcount on @task, so 656 * the task might have moved on in the lock chain or even left 657 * the chain completely and blocks now on an unrelated lock or 658 * on @orig_lock. 659 * 660 * We stored the lock on which @task was blocked in @next_lock, 661 * so we can detect the chain change. 662 */ 663 if (next_lock != waiter->lock) 664 goto out_unlock_pi; 665 666 /* 667 * There could be 'spurious' loops in the lock graph due to ww_mutex, 668 * consider: 669 * 670 * P1: A, ww_A, ww_B 671 * P2: ww_B, ww_A 672 * P3: A 673 * 674 * P3 should not return -EDEADLK because it gets trapped in the cycle 675 * created by P1 and P2 (which will resolve -- and runs into 676 * max_lock_depth above). Therefore disable detect_deadlock such that 677 * the below termination condition can trigger once all relevant tasks 678 * are boosted. 679 * 680 * Even when we start with ww_mutex we can disable deadlock detection, 681 * since we would supress a ww_mutex induced deadlock at [6] anyway. 682 * Supressing it here however is not sufficient since we might still 683 * hit [6] due to adjustment driven iteration. 684 * 685 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd 686 * utterly fail to report it; lockdep should. 687 */ 688 if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) 689 detect_deadlock = false; 690 691 /* 692 * Drop out, when the task has no waiters. Note, 693 * top_waiter can be NULL, when we are in the deboosting 694 * mode! 695 */ 696 if (top_waiter) { 697 if (!task_has_pi_waiters(task)) 698 goto out_unlock_pi; 699 /* 700 * If deadlock detection is off, we stop here if we 701 * are not the top pi waiter of the task. If deadlock 702 * detection is enabled we continue, but stop the 703 * requeueing in the chain walk. 704 */ 705 if (top_waiter != task_top_pi_waiter(task)) { 706 if (!detect_deadlock) 707 goto out_unlock_pi; 708 else 709 requeue = false; 710 } 711 } 712 713 /* 714 * If the waiter priority is the same as the task priority 715 * then there is no further priority adjustment necessary. If 716 * deadlock detection is off, we stop the chain walk. If its 717 * enabled we continue, but stop the requeueing in the chain 718 * walk. 719 */ 720 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { 721 if (!detect_deadlock) 722 goto out_unlock_pi; 723 else 724 requeue = false; 725 } 726 727 /* 728 * [4] Get the next lock 729 */ 730 lock = waiter->lock; 731 /* 732 * [5] We need to trylock here as we are holding task->pi_lock, 733 * which is the reverse lock order versus the other rtmutex 734 * operations. 735 */ 736 if (!raw_spin_trylock(&lock->wait_lock)) { 737 raw_spin_unlock_irq(&task->pi_lock); 738 cpu_relax(); 739 goto retry; 740 } 741 742 /* 743 * [6] check_exit_conditions_2() protected by task->pi_lock and 744 * lock->wait_lock. 745 * 746 * Deadlock detection. If the lock is the same as the original 747 * lock which caused us to walk the lock chain or if the 748 * current lock is owned by the task which initiated the chain 749 * walk, we detected a deadlock. 750 */ 751 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 752 ret = -EDEADLK; 753 754 /* 755 * When the deadlock is due to ww_mutex; also see above. Don't 756 * report the deadlock and instead let the ww_mutex wound/die 757 * logic pick which of the contending threads gets -EDEADLK. 758 * 759 * NOTE: assumes the cycle only contains a single ww_class; any 760 * other configuration and we fail to report; also, see 761 * lockdep. 762 */ 763 if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 764 ret = 0; 765 766 raw_spin_unlock(&lock->wait_lock); 767 goto out_unlock_pi; 768 } 769 770 /* 771 * If we just follow the lock chain for deadlock detection, no 772 * need to do all the requeue operations. To avoid a truckload 773 * of conditionals around the various places below, just do the 774 * minimum chain walk checks. 775 */ 776 if (!requeue) { 777 /* 778 * No requeue[7] here. Just release @task [8] 779 */ 780 raw_spin_unlock(&task->pi_lock); 781 put_task_struct(task); 782 783 /* 784 * [9] check_exit_conditions_3 protected by lock->wait_lock. 785 * If there is no owner of the lock, end of chain. 786 */ 787 if (!rt_mutex_owner(lock)) { 788 raw_spin_unlock_irq(&lock->wait_lock); 789 return 0; 790 } 791 792 /* [10] Grab the next task, i.e. owner of @lock */ 793 task = get_task_struct(rt_mutex_owner(lock)); 794 raw_spin_lock(&task->pi_lock); 795 796 /* 797 * No requeue [11] here. We just do deadlock detection. 798 * 799 * [12] Store whether owner is blocked 800 * itself. Decision is made after dropping the locks 801 */ 802 next_lock = task_blocked_on_lock(task); 803 /* 804 * Get the top waiter for the next iteration 805 */ 806 top_waiter = rt_mutex_top_waiter(lock); 807 808 /* [13] Drop locks */ 809 raw_spin_unlock(&task->pi_lock); 810 raw_spin_unlock_irq(&lock->wait_lock); 811 812 /* If owner is not blocked, end of chain. */ 813 if (!next_lock) 814 goto out_put_task; 815 goto again; 816 } 817 818 /* 819 * Store the current top waiter before doing the requeue 820 * operation on @lock. We need it for the boost/deboost 821 * decision below. 822 */ 823 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 824 825 /* [7] Requeue the waiter in the lock waiter tree. */ 826 rt_mutex_dequeue(lock, waiter); 827 828 /* 829 * Update the waiter prio fields now that we're dequeued. 830 * 831 * These values can have changed through either: 832 * 833 * sys_sched_set_scheduler() / sys_sched_setattr() 834 * 835 * or 836 * 837 * DL CBS enforcement advancing the effective deadline. 838 * 839 * Even though pi_waiters also uses these fields, and that tree is only 840 * updated in [11], we can do this here, since we hold [L], which 841 * serializes all pi_waiters access and rb_erase() does not care about 842 * the values of the node being removed. 843 */ 844 waiter_update_prio(waiter, task); 845 846 rt_mutex_enqueue(lock, waiter); 847 848 /* [8] Release the task */ 849 raw_spin_unlock(&task->pi_lock); 850 put_task_struct(task); 851 852 /* 853 * [9] check_exit_conditions_3 protected by lock->wait_lock. 854 * 855 * We must abort the chain walk if there is no lock owner even 856 * in the dead lock detection case, as we have nothing to 857 * follow here. This is the end of the chain we are walking. 858 */ 859 if (!rt_mutex_owner(lock)) { 860 /* 861 * If the requeue [7] above changed the top waiter, 862 * then we need to wake the new top waiter up to try 863 * to get the lock. 864 */ 865 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 866 wake_up_state(waiter->task, waiter->wake_state); 867 raw_spin_unlock_irq(&lock->wait_lock); 868 return 0; 869 } 870 871 /* [10] Grab the next task, i.e. the owner of @lock */ 872 task = get_task_struct(rt_mutex_owner(lock)); 873 raw_spin_lock(&task->pi_lock); 874 875 /* [11] requeue the pi waiters if necessary */ 876 if (waiter == rt_mutex_top_waiter(lock)) { 877 /* 878 * The waiter became the new top (highest priority) 879 * waiter on the lock. Replace the previous top waiter 880 * in the owner tasks pi waiters tree with this waiter 881 * and adjust the priority of the owner. 882 */ 883 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 884 rt_mutex_enqueue_pi(task, waiter); 885 rt_mutex_adjust_prio(task); 886 887 } else if (prerequeue_top_waiter == waiter) { 888 /* 889 * The waiter was the top waiter on the lock, but is 890 * no longer the top priority waiter. Replace waiter in 891 * the owner tasks pi waiters tree with the new top 892 * (highest priority) waiter and adjust the priority 893 * of the owner. 894 * The new top waiter is stored in @waiter so that 895 * @waiter == @top_waiter evaluates to true below and 896 * we continue to deboost the rest of the chain. 897 */ 898 rt_mutex_dequeue_pi(task, waiter); 899 waiter = rt_mutex_top_waiter(lock); 900 rt_mutex_enqueue_pi(task, waiter); 901 rt_mutex_adjust_prio(task); 902 } else { 903 /* 904 * Nothing changed. No need to do any priority 905 * adjustment. 906 */ 907 } 908 909 /* 910 * [12] check_exit_conditions_4() protected by task->pi_lock 911 * and lock->wait_lock. The actual decisions are made after we 912 * dropped the locks. 913 * 914 * Check whether the task which owns the current lock is pi 915 * blocked itself. If yes we store a pointer to the lock for 916 * the lock chain change detection above. After we dropped 917 * task->pi_lock next_lock cannot be dereferenced anymore. 918 */ 919 next_lock = task_blocked_on_lock(task); 920 /* 921 * Store the top waiter of @lock for the end of chain walk 922 * decision below. 923 */ 924 top_waiter = rt_mutex_top_waiter(lock); 925 926 /* [13] Drop the locks */ 927 raw_spin_unlock(&task->pi_lock); 928 raw_spin_unlock_irq(&lock->wait_lock); 929 930 /* 931 * Make the actual exit decisions [12], based on the stored 932 * values. 933 * 934 * We reached the end of the lock chain. Stop right here. No 935 * point to go back just to figure that out. 936 */ 937 if (!next_lock) 938 goto out_put_task; 939 940 /* 941 * If the current waiter is not the top waiter on the lock, 942 * then we can stop the chain walk here if we are not in full 943 * deadlock detection mode. 944 */ 945 if (!detect_deadlock && waiter != top_waiter) 946 goto out_put_task; 947 948 goto again; 949 950 out_unlock_pi: 951 raw_spin_unlock_irq(&task->pi_lock); 952 out_put_task: 953 put_task_struct(task); 954 955 return ret; 956 } 957 958 /* 959 * Try to take an rt-mutex 960 * 961 * Must be called with lock->wait_lock held and interrupts disabled 962 * 963 * @lock: The lock to be acquired. 964 * @task: The task which wants to acquire the lock 965 * @waiter: The waiter that is queued to the lock's wait tree if the 966 * callsite called task_blocked_on_lock(), otherwise NULL 967 */ 968 static int __sched 969 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, 970 struct rt_mutex_waiter *waiter) 971 { 972 lockdep_assert_held(&lock->wait_lock); 973 974 /* 975 * Before testing whether we can acquire @lock, we set the 976 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 977 * other tasks which try to modify @lock into the slow path 978 * and they serialize on @lock->wait_lock. 979 * 980 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 981 * as explained at the top of this file if and only if: 982 * 983 * - There is a lock owner. The caller must fixup the 984 * transient state if it does a trylock or leaves the lock 985 * function due to a signal or timeout. 986 * 987 * - @task acquires the lock and there are no other 988 * waiters. This is undone in rt_mutex_set_owner(@task) at 989 * the end of this function. 990 */ 991 mark_rt_mutex_waiters(lock); 992 993 /* 994 * If @lock has an owner, give up. 995 */ 996 if (rt_mutex_owner(lock)) 997 return 0; 998 999 /* 1000 * If @waiter != NULL, @task has already enqueued the waiter 1001 * into @lock waiter tree. If @waiter == NULL then this is a 1002 * trylock attempt. 1003 */ 1004 if (waiter) { 1005 struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); 1006 1007 /* 1008 * If waiter is the highest priority waiter of @lock, 1009 * or allowed to steal it, take it over. 1010 */ 1011 if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { 1012 /* 1013 * We can acquire the lock. Remove the waiter from the 1014 * lock waiters tree. 1015 */ 1016 rt_mutex_dequeue(lock, waiter); 1017 } else { 1018 return 0; 1019 } 1020 } else { 1021 /* 1022 * If the lock has waiters already we check whether @task is 1023 * eligible to take over the lock. 1024 * 1025 * If there are no other waiters, @task can acquire 1026 * the lock. @task->pi_blocked_on is NULL, so it does 1027 * not need to be dequeued. 1028 */ 1029 if (rt_mutex_has_waiters(lock)) { 1030 /* Check whether the trylock can steal it. */ 1031 if (!rt_mutex_steal(task_to_waiter(task), 1032 rt_mutex_top_waiter(lock))) 1033 return 0; 1034 1035 /* 1036 * The current top waiter stays enqueued. We 1037 * don't have to change anything in the lock 1038 * waiters order. 1039 */ 1040 } else { 1041 /* 1042 * No waiters. Take the lock without the 1043 * pi_lock dance.@task->pi_blocked_on is NULL 1044 * and we have no waiters to enqueue in @task 1045 * pi waiters tree. 1046 */ 1047 goto takeit; 1048 } 1049 } 1050 1051 /* 1052 * Clear @task->pi_blocked_on. Requires protection by 1053 * @task->pi_lock. Redundant operation for the @waiter == NULL 1054 * case, but conditionals are more expensive than a redundant 1055 * store. 1056 */ 1057 raw_spin_lock(&task->pi_lock); 1058 task->pi_blocked_on = NULL; 1059 /* 1060 * Finish the lock acquisition. @task is the new owner. If 1061 * other waiters exist we have to insert the highest priority 1062 * waiter into @task->pi_waiters tree. 1063 */ 1064 if (rt_mutex_has_waiters(lock)) 1065 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 1066 raw_spin_unlock(&task->pi_lock); 1067 1068 takeit: 1069 /* 1070 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 1071 * are still waiters or clears it. 1072 */ 1073 rt_mutex_set_owner(lock, task); 1074 1075 return 1; 1076 } 1077 1078 /* 1079 * Task blocks on lock. 1080 * 1081 * Prepare waiter and propagate pi chain 1082 * 1083 * This must be called with lock->wait_lock held and interrupts disabled 1084 */ 1085 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, 1086 struct rt_mutex_waiter *waiter, 1087 struct task_struct *task, 1088 struct ww_acquire_ctx *ww_ctx, 1089 enum rtmutex_chainwalk chwalk) 1090 { 1091 struct task_struct *owner = rt_mutex_owner(lock); 1092 struct rt_mutex_waiter *top_waiter = waiter; 1093 struct rt_mutex_base *next_lock; 1094 int chain_walk = 0, res; 1095 1096 lockdep_assert_held(&lock->wait_lock); 1097 1098 /* 1099 * Early deadlock detection. We really don't want the task to 1100 * enqueue on itself just to untangle the mess later. It's not 1101 * only an optimization. We drop the locks, so another waiter 1102 * can come in before the chain walk detects the deadlock. So 1103 * the other will detect the deadlock and return -EDEADLOCK, 1104 * which is wrong, as the other waiter is not in a deadlock 1105 * situation. 1106 */ 1107 if (owner == task) 1108 return -EDEADLK; 1109 1110 raw_spin_lock(&task->pi_lock); 1111 waiter->task = task; 1112 waiter->lock = lock; 1113 waiter_update_prio(waiter, task); 1114 1115 /* Get the top priority waiter on the lock */ 1116 if (rt_mutex_has_waiters(lock)) 1117 top_waiter = rt_mutex_top_waiter(lock); 1118 rt_mutex_enqueue(lock, waiter); 1119 1120 task->pi_blocked_on = waiter; 1121 1122 raw_spin_unlock(&task->pi_lock); 1123 1124 if (build_ww_mutex() && ww_ctx) { 1125 struct rt_mutex *rtm; 1126 1127 /* Check whether the waiter should back out immediately */ 1128 rtm = container_of(lock, struct rt_mutex, rtmutex); 1129 res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); 1130 if (res) { 1131 raw_spin_lock(&task->pi_lock); 1132 rt_mutex_dequeue(lock, waiter); 1133 task->pi_blocked_on = NULL; 1134 raw_spin_unlock(&task->pi_lock); 1135 return res; 1136 } 1137 } 1138 1139 if (!owner) 1140 return 0; 1141 1142 raw_spin_lock(&owner->pi_lock); 1143 if (waiter == rt_mutex_top_waiter(lock)) { 1144 rt_mutex_dequeue_pi(owner, top_waiter); 1145 rt_mutex_enqueue_pi(owner, waiter); 1146 1147 rt_mutex_adjust_prio(owner); 1148 if (owner->pi_blocked_on) 1149 chain_walk = 1; 1150 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1151 chain_walk = 1; 1152 } 1153 1154 /* Store the lock on which owner is blocked or NULL */ 1155 next_lock = task_blocked_on_lock(owner); 1156 1157 raw_spin_unlock(&owner->pi_lock); 1158 /* 1159 * Even if full deadlock detection is on, if the owner is not 1160 * blocked itself, we can avoid finding this out in the chain 1161 * walk. 1162 */ 1163 if (!chain_walk || !next_lock) 1164 return 0; 1165 1166 /* 1167 * The owner can't disappear while holding a lock, 1168 * so the owner struct is protected by wait_lock. 1169 * Gets dropped in rt_mutex_adjust_prio_chain()! 1170 */ 1171 get_task_struct(owner); 1172 1173 raw_spin_unlock_irq(&lock->wait_lock); 1174 1175 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1176 next_lock, waiter, task); 1177 1178 raw_spin_lock_irq(&lock->wait_lock); 1179 1180 return res; 1181 } 1182 1183 /* 1184 * Remove the top waiter from the current tasks pi waiter tree and 1185 * queue it up. 1186 * 1187 * Called with lock->wait_lock held and interrupts disabled. 1188 */ 1189 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, 1190 struct rt_mutex_base *lock) 1191 { 1192 struct rt_mutex_waiter *waiter; 1193 1194 raw_spin_lock(¤t->pi_lock); 1195 1196 waiter = rt_mutex_top_waiter(lock); 1197 1198 /* 1199 * Remove it from current->pi_waiters and deboost. 1200 * 1201 * We must in fact deboost here in order to ensure we call 1202 * rt_mutex_setprio() to update p->pi_top_task before the 1203 * task unblocks. 1204 */ 1205 rt_mutex_dequeue_pi(current, waiter); 1206 rt_mutex_adjust_prio(current); 1207 1208 /* 1209 * As we are waking up the top waiter, and the waiter stays 1210 * queued on the lock until it gets the lock, this lock 1211 * obviously has waiters. Just set the bit here and this has 1212 * the added benefit of forcing all new tasks into the 1213 * slow path making sure no task of lower priority than 1214 * the top waiter can steal this lock. 1215 */ 1216 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1217 1218 /* 1219 * We deboosted before waking the top waiter task such that we don't 1220 * run two tasks with the 'same' priority (and ensure the 1221 * p->pi_top_task pointer points to a blocked task). This however can 1222 * lead to priority inversion if we would get preempted after the 1223 * deboost but before waking our donor task, hence the preempt_disable() 1224 * before unlock. 1225 * 1226 * Pairs with preempt_enable() in rt_mutex_wake_up_q(); 1227 */ 1228 preempt_disable(); 1229 rt_mutex_wake_q_add(wqh, waiter); 1230 raw_spin_unlock(¤t->pi_lock); 1231 } 1232 1233 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1234 { 1235 int ret = try_to_take_rt_mutex(lock, current, NULL); 1236 1237 /* 1238 * try_to_take_rt_mutex() sets the lock waiters bit 1239 * unconditionally. Clean this up. 1240 */ 1241 fixup_rt_mutex_waiters(lock); 1242 1243 return ret; 1244 } 1245 1246 /* 1247 * Slow path try-lock function: 1248 */ 1249 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) 1250 { 1251 unsigned long flags; 1252 int ret; 1253 1254 /* 1255 * If the lock already has an owner we fail to get the lock. 1256 * This can be done without taking the @lock->wait_lock as 1257 * it is only being read, and this is a trylock anyway. 1258 */ 1259 if (rt_mutex_owner(lock)) 1260 return 0; 1261 1262 /* 1263 * The mutex has currently no owner. Lock the wait lock and try to 1264 * acquire the lock. We use irqsave here to support early boot calls. 1265 */ 1266 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1267 1268 ret = __rt_mutex_slowtrylock(lock); 1269 1270 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1271 1272 return ret; 1273 } 1274 1275 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) 1276 { 1277 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1278 return 1; 1279 1280 return rt_mutex_slowtrylock(lock); 1281 } 1282 1283 /* 1284 * Slow path to release a rt-mutex. 1285 */ 1286 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) 1287 { 1288 DEFINE_RT_WAKE_Q(wqh); 1289 unsigned long flags; 1290 1291 /* irqsave required to support early boot calls */ 1292 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1293 1294 debug_rt_mutex_unlock(lock); 1295 1296 /* 1297 * We must be careful here if the fast path is enabled. If we 1298 * have no waiters queued we cannot set owner to NULL here 1299 * because of: 1300 * 1301 * foo->lock->owner = NULL; 1302 * rtmutex_lock(foo->lock); <- fast path 1303 * free = atomic_dec_and_test(foo->refcnt); 1304 * rtmutex_unlock(foo->lock); <- fast path 1305 * if (free) 1306 * kfree(foo); 1307 * raw_spin_unlock(foo->lock->wait_lock); 1308 * 1309 * So for the fastpath enabled kernel: 1310 * 1311 * Nothing can set the waiters bit as long as we hold 1312 * lock->wait_lock. So we do the following sequence: 1313 * 1314 * owner = rt_mutex_owner(lock); 1315 * clear_rt_mutex_waiters(lock); 1316 * raw_spin_unlock(&lock->wait_lock); 1317 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1318 * return; 1319 * goto retry; 1320 * 1321 * The fastpath disabled variant is simple as all access to 1322 * lock->owner is serialized by lock->wait_lock: 1323 * 1324 * lock->owner = NULL; 1325 * raw_spin_unlock(&lock->wait_lock); 1326 */ 1327 while (!rt_mutex_has_waiters(lock)) { 1328 /* Drops lock->wait_lock ! */ 1329 if (unlock_rt_mutex_safe(lock, flags) == true) 1330 return; 1331 /* Relock the rtmutex and try again */ 1332 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1333 } 1334 1335 /* 1336 * The wakeup next waiter path does not suffer from the above 1337 * race. See the comments there. 1338 * 1339 * Queue the next waiter for wakeup once we release the wait_lock. 1340 */ 1341 mark_wakeup_next_waiter(&wqh, lock); 1342 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1343 1344 rt_mutex_wake_up_q(&wqh); 1345 } 1346 1347 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) 1348 { 1349 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) 1350 return; 1351 1352 rt_mutex_slowunlock(lock); 1353 } 1354 1355 #ifdef CONFIG_SMP 1356 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1357 struct rt_mutex_waiter *waiter, 1358 struct task_struct *owner) 1359 { 1360 bool res = true; 1361 1362 rcu_read_lock(); 1363 for (;;) { 1364 /* If owner changed, trylock again. */ 1365 if (owner != rt_mutex_owner(lock)) 1366 break; 1367 /* 1368 * Ensure that @owner is dereferenced after checking that 1369 * the lock owner still matches @owner. If that fails, 1370 * @owner might point to freed memory. If it still matches, 1371 * the rcu_read_lock() ensures the memory stays valid. 1372 */ 1373 barrier(); 1374 /* 1375 * Stop spinning when: 1376 * - the lock owner has been scheduled out 1377 * - current is not longer the top waiter 1378 * - current is requested to reschedule (redundant 1379 * for CONFIG_PREEMPT_RCU=y) 1380 * - the VCPU on which owner runs is preempted 1381 */ 1382 if (!owner->on_cpu || need_resched() || 1383 rt_mutex_waiter_is_top_waiter(lock, waiter) || 1384 vcpu_is_preempted(task_cpu(owner))) { 1385 res = false; 1386 break; 1387 } 1388 cpu_relax(); 1389 } 1390 rcu_read_unlock(); 1391 return res; 1392 } 1393 #else 1394 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, 1395 struct rt_mutex_waiter *waiter, 1396 struct task_struct *owner) 1397 { 1398 return false; 1399 } 1400 #endif 1401 1402 #ifdef RT_MUTEX_BUILD_MUTEX 1403 /* 1404 * Functions required for: 1405 * - rtmutex, futex on all kernels 1406 * - mutex and rwsem substitutions on RT kernels 1407 */ 1408 1409 /* 1410 * Remove a waiter from a lock and give up 1411 * 1412 * Must be called with lock->wait_lock held and interrupts disabled. It must 1413 * have just failed to try_to_take_rt_mutex(). 1414 */ 1415 static void __sched remove_waiter(struct rt_mutex_base *lock, 1416 struct rt_mutex_waiter *waiter) 1417 { 1418 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1419 struct task_struct *owner = rt_mutex_owner(lock); 1420 struct rt_mutex_base *next_lock; 1421 1422 lockdep_assert_held(&lock->wait_lock); 1423 1424 raw_spin_lock(¤t->pi_lock); 1425 rt_mutex_dequeue(lock, waiter); 1426 current->pi_blocked_on = NULL; 1427 raw_spin_unlock(¤t->pi_lock); 1428 1429 /* 1430 * Only update priority if the waiter was the highest priority 1431 * waiter of the lock and there is an owner to update. 1432 */ 1433 if (!owner || !is_top_waiter) 1434 return; 1435 1436 raw_spin_lock(&owner->pi_lock); 1437 1438 rt_mutex_dequeue_pi(owner, waiter); 1439 1440 if (rt_mutex_has_waiters(lock)) 1441 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1442 1443 rt_mutex_adjust_prio(owner); 1444 1445 /* Store the lock on which owner is blocked or NULL */ 1446 next_lock = task_blocked_on_lock(owner); 1447 1448 raw_spin_unlock(&owner->pi_lock); 1449 1450 /* 1451 * Don't walk the chain, if the owner task is not blocked 1452 * itself. 1453 */ 1454 if (!next_lock) 1455 return; 1456 1457 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1458 get_task_struct(owner); 1459 1460 raw_spin_unlock_irq(&lock->wait_lock); 1461 1462 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1463 next_lock, NULL, current); 1464 1465 raw_spin_lock_irq(&lock->wait_lock); 1466 } 1467 1468 /** 1469 * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop 1470 * @lock: the rt_mutex to take 1471 * @ww_ctx: WW mutex context pointer 1472 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1473 * or TASK_UNINTERRUPTIBLE) 1474 * @timeout: the pre-initialized and started timer, or NULL for none 1475 * @waiter: the pre-initialized rt_mutex_waiter 1476 * 1477 * Must be called with lock->wait_lock held and interrupts disabled 1478 */ 1479 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, 1480 struct ww_acquire_ctx *ww_ctx, 1481 unsigned int state, 1482 struct hrtimer_sleeper *timeout, 1483 struct rt_mutex_waiter *waiter) 1484 { 1485 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1486 struct task_struct *owner; 1487 int ret = 0; 1488 1489 for (;;) { 1490 /* Try to acquire the lock: */ 1491 if (try_to_take_rt_mutex(lock, current, waiter)) 1492 break; 1493 1494 if (timeout && !timeout->task) { 1495 ret = -ETIMEDOUT; 1496 break; 1497 } 1498 if (signal_pending_state(state, current)) { 1499 ret = -EINTR; 1500 break; 1501 } 1502 1503 if (build_ww_mutex() && ww_ctx) { 1504 ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); 1505 if (ret) 1506 break; 1507 } 1508 1509 if (waiter == rt_mutex_top_waiter(lock)) 1510 owner = rt_mutex_owner(lock); 1511 else 1512 owner = NULL; 1513 raw_spin_unlock_irq(&lock->wait_lock); 1514 1515 if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) 1516 schedule(); 1517 1518 raw_spin_lock_irq(&lock->wait_lock); 1519 set_current_state(state); 1520 } 1521 1522 __set_current_state(TASK_RUNNING); 1523 return ret; 1524 } 1525 1526 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, 1527 struct rt_mutex_waiter *w) 1528 { 1529 /* 1530 * If the result is not -EDEADLOCK or the caller requested 1531 * deadlock detection, nothing to do here. 1532 */ 1533 if (res != -EDEADLOCK || detect_deadlock) 1534 return; 1535 1536 if (build_ww_mutex() && w->ww_ctx) 1537 return; 1538 1539 /* 1540 * Yell loudly and stop the task right here. 1541 */ 1542 WARN(1, "rtmutex deadlock detected\n"); 1543 while (1) { 1544 set_current_state(TASK_INTERRUPTIBLE); 1545 schedule(); 1546 } 1547 } 1548 1549 /** 1550 * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held 1551 * @lock: The rtmutex to block lock 1552 * @ww_ctx: WW mutex context pointer 1553 * @state: The task state for sleeping 1554 * @chwalk: Indicator whether full or partial chainwalk is requested 1555 * @waiter: Initializer waiter for blocking 1556 */ 1557 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, 1558 struct ww_acquire_ctx *ww_ctx, 1559 unsigned int state, 1560 enum rtmutex_chainwalk chwalk, 1561 struct rt_mutex_waiter *waiter) 1562 { 1563 struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); 1564 struct ww_mutex *ww = ww_container_of(rtm); 1565 int ret; 1566 1567 lockdep_assert_held(&lock->wait_lock); 1568 1569 /* Try to acquire the lock again: */ 1570 if (try_to_take_rt_mutex(lock, current, NULL)) { 1571 if (build_ww_mutex() && ww_ctx) { 1572 __ww_mutex_check_waiters(rtm, ww_ctx); 1573 ww_mutex_lock_acquired(ww, ww_ctx); 1574 } 1575 return 0; 1576 } 1577 1578 set_current_state(state); 1579 1580 ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); 1581 if (likely(!ret)) 1582 ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); 1583 1584 if (likely(!ret)) { 1585 /* acquired the lock */ 1586 if (build_ww_mutex() && ww_ctx) { 1587 if (!ww_ctx->is_wait_die) 1588 __ww_mutex_check_waiters(rtm, ww_ctx); 1589 ww_mutex_lock_acquired(ww, ww_ctx); 1590 } 1591 } else { 1592 __set_current_state(TASK_RUNNING); 1593 remove_waiter(lock, waiter); 1594 rt_mutex_handle_deadlock(ret, chwalk, waiter); 1595 } 1596 1597 /* 1598 * try_to_take_rt_mutex() sets the waiter bit 1599 * unconditionally. We might have to fix that up. 1600 */ 1601 fixup_rt_mutex_waiters(lock); 1602 return ret; 1603 } 1604 1605 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, 1606 struct ww_acquire_ctx *ww_ctx, 1607 unsigned int state) 1608 { 1609 struct rt_mutex_waiter waiter; 1610 int ret; 1611 1612 rt_mutex_init_waiter(&waiter); 1613 waiter.ww_ctx = ww_ctx; 1614 1615 ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, 1616 &waiter); 1617 1618 debug_rt_mutex_free_waiter(&waiter); 1619 return ret; 1620 } 1621 1622 /* 1623 * rt_mutex_slowlock - Locking slowpath invoked when fast path fails 1624 * @lock: The rtmutex to block lock 1625 * @ww_ctx: WW mutex context pointer 1626 * @state: The task state for sleeping 1627 */ 1628 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, 1629 struct ww_acquire_ctx *ww_ctx, 1630 unsigned int state) 1631 { 1632 unsigned long flags; 1633 int ret; 1634 1635 /* 1636 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1637 * be called in early boot if the cmpxchg() fast path is disabled 1638 * (debug, no architecture support). In this case we will acquire the 1639 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1640 * enable interrupts in that early boot case. So we need to use the 1641 * irqsave/restore variants. 1642 */ 1643 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1644 ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); 1645 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1646 1647 return ret; 1648 } 1649 1650 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, 1651 unsigned int state) 1652 { 1653 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) 1654 return 0; 1655 1656 return rt_mutex_slowlock(lock, NULL, state); 1657 } 1658 #endif /* RT_MUTEX_BUILD_MUTEX */ 1659 1660 #ifdef RT_MUTEX_BUILD_SPINLOCKS 1661 /* 1662 * Functions required for spin/rw_lock substitution on RT kernels 1663 */ 1664 1665 /** 1666 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks 1667 * @lock: The underlying RT mutex 1668 */ 1669 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) 1670 { 1671 struct rt_mutex_waiter waiter; 1672 struct task_struct *owner; 1673 1674 lockdep_assert_held(&lock->wait_lock); 1675 1676 if (try_to_take_rt_mutex(lock, current, NULL)) 1677 return; 1678 1679 rt_mutex_init_rtlock_waiter(&waiter); 1680 1681 /* Save current state and set state to TASK_RTLOCK_WAIT */ 1682 current_save_and_set_rtlock_wait_state(); 1683 1684 task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); 1685 1686 for (;;) { 1687 /* Try to acquire the lock again */ 1688 if (try_to_take_rt_mutex(lock, current, &waiter)) 1689 break; 1690 1691 if (&waiter == rt_mutex_top_waiter(lock)) 1692 owner = rt_mutex_owner(lock); 1693 else 1694 owner = NULL; 1695 raw_spin_unlock_irq(&lock->wait_lock); 1696 1697 if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) 1698 schedule_rtlock(); 1699 1700 raw_spin_lock_irq(&lock->wait_lock); 1701 set_current_state(TASK_RTLOCK_WAIT); 1702 } 1703 1704 /* Restore the task state */ 1705 current_restore_rtlock_saved_state(); 1706 1707 /* 1708 * try_to_take_rt_mutex() sets the waiter bit unconditionally. 1709 * We might have to fix that up: 1710 */ 1711 fixup_rt_mutex_waiters(lock); 1712 debug_rt_mutex_free_waiter(&waiter); 1713 } 1714 1715 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) 1716 { 1717 unsigned long flags; 1718 1719 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1720 rtlock_slowlock_locked(lock); 1721 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1722 } 1723 1724 #endif /* RT_MUTEX_BUILD_SPINLOCKS */ 1725