1 /* 2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 3 * 4 * started by Ingo Molnar and Thomas Gleixner. 5 * 6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 9 * Copyright (C) 2006 Esben Nielsen 10 * 11 * See Documentation/locking/rt-mutex-design.txt for details. 12 */ 13 #include <linux/spinlock.h> 14 #include <linux/export.h> 15 #include <linux/sched.h> 16 #include <linux/sched/rt.h> 17 #include <linux/sched/deadline.h> 18 #include <linux/sched/wake_q.h> 19 #include <linux/timer.h> 20 21 #include "rtmutex_common.h" 22 23 /* 24 * lock->owner state tracking: 25 * 26 * lock->owner holds the task_struct pointer of the owner. Bit 0 27 * is used to keep track of the "lock has waiters" state. 28 * 29 * owner bit0 30 * NULL 0 lock is free (fast acquire possible) 31 * NULL 1 lock is free and has waiters and the top waiter 32 * is going to take the lock* 33 * taskpointer 0 lock is held (fast release possible) 34 * taskpointer 1 lock is held and has waiters** 35 * 36 * The fast atomic compare exchange based acquire and release is only 37 * possible when bit 0 of lock->owner is 0. 38 * 39 * (*) It also can be a transitional state when grabbing the lock 40 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 41 * we need to set the bit0 before looking at the lock, and the owner may be 42 * NULL in this small time, hence this can be a transitional state. 43 * 44 * (**) There is a small time when bit 0 is set but there are no 45 * waiters. This can happen when grabbing the lock in the slow path. 46 * To prevent a cmpxchg of the owner releasing the lock, we need to 47 * set this bit before looking at the lock. 48 */ 49 50 static void 51 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 52 { 53 unsigned long val = (unsigned long)owner; 54 55 if (rt_mutex_has_waiters(lock)) 56 val |= RT_MUTEX_HAS_WAITERS; 57 58 lock->owner = (struct task_struct *)val; 59 } 60 61 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 62 { 63 lock->owner = (struct task_struct *) 64 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 65 } 66 67 static void fixup_rt_mutex_waiters(struct rt_mutex *lock) 68 { 69 unsigned long owner, *p = (unsigned long *) &lock->owner; 70 71 if (rt_mutex_has_waiters(lock)) 72 return; 73 74 /* 75 * The rbtree has no waiters enqueued, now make sure that the 76 * lock->owner still has the waiters bit set, otherwise the 77 * following can happen: 78 * 79 * CPU 0 CPU 1 CPU2 80 * l->owner=T1 81 * rt_mutex_lock(l) 82 * lock(l->lock) 83 * l->owner = T1 | HAS_WAITERS; 84 * enqueue(T2) 85 * boost() 86 * unlock(l->lock) 87 * block() 88 * 89 * rt_mutex_lock(l) 90 * lock(l->lock) 91 * l->owner = T1 | HAS_WAITERS; 92 * enqueue(T3) 93 * boost() 94 * unlock(l->lock) 95 * block() 96 * signal(->T2) signal(->T3) 97 * lock(l->lock) 98 * dequeue(T2) 99 * deboost() 100 * unlock(l->lock) 101 * lock(l->lock) 102 * dequeue(T3) 103 * ==> wait list is empty 104 * deboost() 105 * unlock(l->lock) 106 * lock(l->lock) 107 * fixup_rt_mutex_waiters() 108 * if (wait_list_empty(l) { 109 * l->owner = owner 110 * owner = l->owner & ~HAS_WAITERS; 111 * ==> l->owner = T1 112 * } 113 * lock(l->lock) 114 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 115 * if (wait_list_empty(l) { 116 * owner = l->owner & ~HAS_WAITERS; 117 * cmpxchg(l->owner, T1, NULL) 118 * ===> Success (l->owner = NULL) 119 * 120 * l->owner = owner 121 * ==> l->owner = T1 122 * } 123 * 124 * With the check for the waiter bit in place T3 on CPU2 will not 125 * overwrite. All tasks fiddling with the waiters bit are 126 * serialized by l->lock, so nothing else can modify the waiters 127 * bit. If the bit is set then nothing can change l->owner either 128 * so the simple RMW is safe. The cmpxchg() will simply fail if it 129 * happens in the middle of the RMW because the waiters bit is 130 * still set. 131 */ 132 owner = READ_ONCE(*p); 133 if (owner & RT_MUTEX_HAS_WAITERS) 134 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 135 } 136 137 /* 138 * We can speed up the acquire/release, if there's no debugging state to be 139 * set up. 140 */ 141 #ifndef CONFIG_DEBUG_RT_MUTEXES 142 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c) 143 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) 144 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) 145 146 /* 147 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 148 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 149 * relaxed semantics suffice. 150 */ 151 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 152 { 153 unsigned long owner, *p = (unsigned long *) &lock->owner; 154 155 do { 156 owner = *p; 157 } while (cmpxchg_relaxed(p, owner, 158 owner | RT_MUTEX_HAS_WAITERS) != owner); 159 } 160 161 /* 162 * Safe fastpath aware unlock: 163 * 1) Clear the waiters bit 164 * 2) Drop lock->wait_lock 165 * 3) Try to unlock the lock with cmpxchg 166 */ 167 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 168 unsigned long flags) 169 __releases(lock->wait_lock) 170 { 171 struct task_struct *owner = rt_mutex_owner(lock); 172 173 clear_rt_mutex_waiters(lock); 174 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 175 /* 176 * If a new waiter comes in between the unlock and the cmpxchg 177 * we have two situations: 178 * 179 * unlock(wait_lock); 180 * lock(wait_lock); 181 * cmpxchg(p, owner, 0) == owner 182 * mark_rt_mutex_waiters(lock); 183 * acquire(lock); 184 * or: 185 * 186 * unlock(wait_lock); 187 * lock(wait_lock); 188 * mark_rt_mutex_waiters(lock); 189 * 190 * cmpxchg(p, owner, 0) != owner 191 * enqueue_waiter(); 192 * unlock(wait_lock); 193 * lock(wait_lock); 194 * wake waiter(); 195 * unlock(wait_lock); 196 * lock(wait_lock); 197 * acquire(lock); 198 */ 199 return rt_mutex_cmpxchg_release(lock, owner, NULL); 200 } 201 202 #else 203 # define rt_mutex_cmpxchg_relaxed(l,c,n) (0) 204 # define rt_mutex_cmpxchg_acquire(l,c,n) (0) 205 # define rt_mutex_cmpxchg_release(l,c,n) (0) 206 207 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 208 { 209 lock->owner = (struct task_struct *) 210 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 211 } 212 213 /* 214 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 215 */ 216 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 217 unsigned long flags) 218 __releases(lock->wait_lock) 219 { 220 lock->owner = NULL; 221 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 222 return true; 223 } 224 #endif 225 226 static inline int 227 rt_mutex_waiter_less(struct rt_mutex_waiter *left, 228 struct rt_mutex_waiter *right) 229 { 230 if (left->prio < right->prio) 231 return 1; 232 233 /* 234 * If both waiters have dl_prio(), we check the deadlines of the 235 * associated tasks. 236 * If left waiter has a dl_prio(), and we didn't return 1 above, 237 * then right waiter has a dl_prio() too. 238 */ 239 if (dl_prio(left->prio)) 240 return dl_time_before(left->task->dl.deadline, 241 right->task->dl.deadline); 242 243 return 0; 244 } 245 246 static void 247 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 248 { 249 struct rb_node **link = &lock->waiters.rb_node; 250 struct rb_node *parent = NULL; 251 struct rt_mutex_waiter *entry; 252 int leftmost = 1; 253 254 while (*link) { 255 parent = *link; 256 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); 257 if (rt_mutex_waiter_less(waiter, entry)) { 258 link = &parent->rb_left; 259 } else { 260 link = &parent->rb_right; 261 leftmost = 0; 262 } 263 } 264 265 if (leftmost) 266 lock->waiters_leftmost = &waiter->tree_entry; 267 268 rb_link_node(&waiter->tree_entry, parent, link); 269 rb_insert_color(&waiter->tree_entry, &lock->waiters); 270 } 271 272 static void 273 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 274 { 275 if (RB_EMPTY_NODE(&waiter->tree_entry)) 276 return; 277 278 if (lock->waiters_leftmost == &waiter->tree_entry) 279 lock->waiters_leftmost = rb_next(&waiter->tree_entry); 280 281 rb_erase(&waiter->tree_entry, &lock->waiters); 282 RB_CLEAR_NODE(&waiter->tree_entry); 283 } 284 285 static void 286 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 287 { 288 struct rb_node **link = &task->pi_waiters.rb_node; 289 struct rb_node *parent = NULL; 290 struct rt_mutex_waiter *entry; 291 int leftmost = 1; 292 293 while (*link) { 294 parent = *link; 295 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); 296 if (rt_mutex_waiter_less(waiter, entry)) { 297 link = &parent->rb_left; 298 } else { 299 link = &parent->rb_right; 300 leftmost = 0; 301 } 302 } 303 304 if (leftmost) 305 task->pi_waiters_leftmost = &waiter->pi_tree_entry; 306 307 rb_link_node(&waiter->pi_tree_entry, parent, link); 308 rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); 309 } 310 311 static void 312 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 313 { 314 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 315 return; 316 317 if (task->pi_waiters_leftmost == &waiter->pi_tree_entry) 318 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry); 319 320 rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); 321 RB_CLEAR_NODE(&waiter->pi_tree_entry); 322 } 323 324 /* 325 * Calculate task priority from the waiter tree priority 326 * 327 * Return task->normal_prio when the waiter tree is empty or when 328 * the waiter is not allowed to do priority boosting 329 */ 330 int rt_mutex_getprio(struct task_struct *task) 331 { 332 if (likely(!task_has_pi_waiters(task))) 333 return task->normal_prio; 334 335 return min(task_top_pi_waiter(task)->prio, 336 task->normal_prio); 337 } 338 339 struct task_struct *rt_mutex_get_top_task(struct task_struct *task) 340 { 341 if (likely(!task_has_pi_waiters(task))) 342 return NULL; 343 344 return task_top_pi_waiter(task)->task; 345 } 346 347 /* 348 * Called by sched_setscheduler() to get the priority which will be 349 * effective after the change. 350 */ 351 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) 352 { 353 if (!task_has_pi_waiters(task)) 354 return newprio; 355 356 if (task_top_pi_waiter(task)->task->prio <= newprio) 357 return task_top_pi_waiter(task)->task->prio; 358 return newprio; 359 } 360 361 /* 362 * Adjust the priority of a task, after its pi_waiters got modified. 363 * 364 * This can be both boosting and unboosting. task->pi_lock must be held. 365 */ 366 static void __rt_mutex_adjust_prio(struct task_struct *task) 367 { 368 int prio = rt_mutex_getprio(task); 369 370 if (task->prio != prio || dl_prio(prio)) 371 rt_mutex_setprio(task, prio); 372 } 373 374 /* 375 * Adjust task priority (undo boosting). Called from the exit path of 376 * rt_mutex_slowunlock() and rt_mutex_slowlock(). 377 * 378 * (Note: We do this outside of the protection of lock->wait_lock to 379 * allow the lock to be taken while or before we readjust the priority 380 * of task. We do not use the spin_xx_mutex() variants here as we are 381 * outside of the debug path.) 382 */ 383 void rt_mutex_adjust_prio(struct task_struct *task) 384 { 385 unsigned long flags; 386 387 raw_spin_lock_irqsave(&task->pi_lock, flags); 388 __rt_mutex_adjust_prio(task); 389 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 390 } 391 392 /* 393 * Deadlock detection is conditional: 394 * 395 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 396 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 397 * 398 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 399 * conducted independent of the detect argument. 400 * 401 * If the waiter argument is NULL this indicates the deboost path and 402 * deadlock detection is disabled independent of the detect argument 403 * and the config settings. 404 */ 405 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 406 enum rtmutex_chainwalk chwalk) 407 { 408 /* 409 * This is just a wrapper function for the following call, 410 * because debug_rt_mutex_detect_deadlock() smells like a magic 411 * debug feature and I wanted to keep the cond function in the 412 * main source file along with the comments instead of having 413 * two of the same in the headers. 414 */ 415 return debug_rt_mutex_detect_deadlock(waiter, chwalk); 416 } 417 418 /* 419 * Max number of times we'll walk the boosting chain: 420 */ 421 int max_lock_depth = 1024; 422 423 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 424 { 425 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 426 } 427 428 /* 429 * Adjust the priority chain. Also used for deadlock detection. 430 * Decreases task's usage by one - may thus free the task. 431 * 432 * @task: the task owning the mutex (owner) for which a chain walk is 433 * probably needed 434 * @chwalk: do we have to carry out deadlock detection? 435 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 436 * things for a task that has just got its priority adjusted, and 437 * is waiting on a mutex) 438 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 439 * we dropped its pi_lock. Is never dereferenced, only used for 440 * comparison to detect lock chain changes. 441 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 442 * its priority to the mutex owner (can be NULL in the case 443 * depicted above or if the top waiter is gone away and we are 444 * actually deboosting the owner) 445 * @top_task: the current top waiter 446 * 447 * Returns 0 or -EDEADLK. 448 * 449 * Chain walk basics and protection scope 450 * 451 * [R] refcount on task 452 * [P] task->pi_lock held 453 * [L] rtmutex->wait_lock held 454 * 455 * Step Description Protected by 456 * function arguments: 457 * @task [R] 458 * @orig_lock if != NULL @top_task is blocked on it 459 * @next_lock Unprotected. Cannot be 460 * dereferenced. Only used for 461 * comparison. 462 * @orig_waiter if != NULL @top_task is blocked on it 463 * @top_task current, or in case of proxy 464 * locking protected by calling 465 * code 466 * again: 467 * loop_sanity_check(); 468 * retry: 469 * [1] lock(task->pi_lock); [R] acquire [P] 470 * [2] waiter = task->pi_blocked_on; [P] 471 * [3] check_exit_conditions_1(); [P] 472 * [4] lock = waiter->lock; [P] 473 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 474 * unlock(task->pi_lock); release [P] 475 * goto retry; 476 * } 477 * [6] check_exit_conditions_2(); [P] + [L] 478 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 479 * [8] unlock(task->pi_lock); release [P] 480 * put_task_struct(task); release [R] 481 * [9] check_exit_conditions_3(); [L] 482 * [10] task = owner(lock); [L] 483 * get_task_struct(task); [L] acquire [R] 484 * lock(task->pi_lock); [L] acquire [P] 485 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 486 * [12] check_exit_conditions_4(); [P] + [L] 487 * [13] unlock(task->pi_lock); release [P] 488 * unlock(lock->wait_lock); release [L] 489 * goto again; 490 */ 491 static int rt_mutex_adjust_prio_chain(struct task_struct *task, 492 enum rtmutex_chainwalk chwalk, 493 struct rt_mutex *orig_lock, 494 struct rt_mutex *next_lock, 495 struct rt_mutex_waiter *orig_waiter, 496 struct task_struct *top_task) 497 { 498 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 499 struct rt_mutex_waiter *prerequeue_top_waiter; 500 int ret = 0, depth = 0; 501 struct rt_mutex *lock; 502 bool detect_deadlock; 503 bool requeue = true; 504 505 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 506 507 /* 508 * The (de)boosting is a step by step approach with a lot of 509 * pitfalls. We want this to be preemptible and we want hold a 510 * maximum of two locks per step. So we have to check 511 * carefully whether things change under us. 512 */ 513 again: 514 /* 515 * We limit the lock chain length for each invocation. 516 */ 517 if (++depth > max_lock_depth) { 518 static int prev_max; 519 520 /* 521 * Print this only once. If the admin changes the limit, 522 * print a new message when reaching the limit again. 523 */ 524 if (prev_max != max_lock_depth) { 525 prev_max = max_lock_depth; 526 printk(KERN_WARNING "Maximum lock depth %d reached " 527 "task: %s (%d)\n", max_lock_depth, 528 top_task->comm, task_pid_nr(top_task)); 529 } 530 put_task_struct(task); 531 532 return -EDEADLK; 533 } 534 535 /* 536 * We are fully preemptible here and only hold the refcount on 537 * @task. So everything can have changed under us since the 538 * caller or our own code below (goto retry/again) dropped all 539 * locks. 540 */ 541 retry: 542 /* 543 * [1] Task cannot go away as we did a get_task() before ! 544 */ 545 raw_spin_lock_irq(&task->pi_lock); 546 547 /* 548 * [2] Get the waiter on which @task is blocked on. 549 */ 550 waiter = task->pi_blocked_on; 551 552 /* 553 * [3] check_exit_conditions_1() protected by task->pi_lock. 554 */ 555 556 /* 557 * Check whether the end of the boosting chain has been 558 * reached or the state of the chain has changed while we 559 * dropped the locks. 560 */ 561 if (!waiter) 562 goto out_unlock_pi; 563 564 /* 565 * Check the orig_waiter state. After we dropped the locks, 566 * the previous owner of the lock might have released the lock. 567 */ 568 if (orig_waiter && !rt_mutex_owner(orig_lock)) 569 goto out_unlock_pi; 570 571 /* 572 * We dropped all locks after taking a refcount on @task, so 573 * the task might have moved on in the lock chain or even left 574 * the chain completely and blocks now on an unrelated lock or 575 * on @orig_lock. 576 * 577 * We stored the lock on which @task was blocked in @next_lock, 578 * so we can detect the chain change. 579 */ 580 if (next_lock != waiter->lock) 581 goto out_unlock_pi; 582 583 /* 584 * Drop out, when the task has no waiters. Note, 585 * top_waiter can be NULL, when we are in the deboosting 586 * mode! 587 */ 588 if (top_waiter) { 589 if (!task_has_pi_waiters(task)) 590 goto out_unlock_pi; 591 /* 592 * If deadlock detection is off, we stop here if we 593 * are not the top pi waiter of the task. If deadlock 594 * detection is enabled we continue, but stop the 595 * requeueing in the chain walk. 596 */ 597 if (top_waiter != task_top_pi_waiter(task)) { 598 if (!detect_deadlock) 599 goto out_unlock_pi; 600 else 601 requeue = false; 602 } 603 } 604 605 /* 606 * If the waiter priority is the same as the task priority 607 * then there is no further priority adjustment necessary. If 608 * deadlock detection is off, we stop the chain walk. If its 609 * enabled we continue, but stop the requeueing in the chain 610 * walk. 611 */ 612 if (waiter->prio == task->prio) { 613 if (!detect_deadlock) 614 goto out_unlock_pi; 615 else 616 requeue = false; 617 } 618 619 /* 620 * [4] Get the next lock 621 */ 622 lock = waiter->lock; 623 /* 624 * [5] We need to trylock here as we are holding task->pi_lock, 625 * which is the reverse lock order versus the other rtmutex 626 * operations. 627 */ 628 if (!raw_spin_trylock(&lock->wait_lock)) { 629 raw_spin_unlock_irq(&task->pi_lock); 630 cpu_relax(); 631 goto retry; 632 } 633 634 /* 635 * [6] check_exit_conditions_2() protected by task->pi_lock and 636 * lock->wait_lock. 637 * 638 * Deadlock detection. If the lock is the same as the original 639 * lock which caused us to walk the lock chain or if the 640 * current lock is owned by the task which initiated the chain 641 * walk, we detected a deadlock. 642 */ 643 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 644 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); 645 raw_spin_unlock(&lock->wait_lock); 646 ret = -EDEADLK; 647 goto out_unlock_pi; 648 } 649 650 /* 651 * If we just follow the lock chain for deadlock detection, no 652 * need to do all the requeue operations. To avoid a truckload 653 * of conditionals around the various places below, just do the 654 * minimum chain walk checks. 655 */ 656 if (!requeue) { 657 /* 658 * No requeue[7] here. Just release @task [8] 659 */ 660 raw_spin_unlock(&task->pi_lock); 661 put_task_struct(task); 662 663 /* 664 * [9] check_exit_conditions_3 protected by lock->wait_lock. 665 * If there is no owner of the lock, end of chain. 666 */ 667 if (!rt_mutex_owner(lock)) { 668 raw_spin_unlock_irq(&lock->wait_lock); 669 return 0; 670 } 671 672 /* [10] Grab the next task, i.e. owner of @lock */ 673 task = rt_mutex_owner(lock); 674 get_task_struct(task); 675 raw_spin_lock(&task->pi_lock); 676 677 /* 678 * No requeue [11] here. We just do deadlock detection. 679 * 680 * [12] Store whether owner is blocked 681 * itself. Decision is made after dropping the locks 682 */ 683 next_lock = task_blocked_on_lock(task); 684 /* 685 * Get the top waiter for the next iteration 686 */ 687 top_waiter = rt_mutex_top_waiter(lock); 688 689 /* [13] Drop locks */ 690 raw_spin_unlock(&task->pi_lock); 691 raw_spin_unlock_irq(&lock->wait_lock); 692 693 /* If owner is not blocked, end of chain. */ 694 if (!next_lock) 695 goto out_put_task; 696 goto again; 697 } 698 699 /* 700 * Store the current top waiter before doing the requeue 701 * operation on @lock. We need it for the boost/deboost 702 * decision below. 703 */ 704 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 705 706 /* [7] Requeue the waiter in the lock waiter tree. */ 707 rt_mutex_dequeue(lock, waiter); 708 waiter->prio = task->prio; 709 rt_mutex_enqueue(lock, waiter); 710 711 /* [8] Release the task */ 712 raw_spin_unlock(&task->pi_lock); 713 put_task_struct(task); 714 715 /* 716 * [9] check_exit_conditions_3 protected by lock->wait_lock. 717 * 718 * We must abort the chain walk if there is no lock owner even 719 * in the dead lock detection case, as we have nothing to 720 * follow here. This is the end of the chain we are walking. 721 */ 722 if (!rt_mutex_owner(lock)) { 723 /* 724 * If the requeue [7] above changed the top waiter, 725 * then we need to wake the new top waiter up to try 726 * to get the lock. 727 */ 728 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 729 wake_up_process(rt_mutex_top_waiter(lock)->task); 730 raw_spin_unlock_irq(&lock->wait_lock); 731 return 0; 732 } 733 734 /* [10] Grab the next task, i.e. the owner of @lock */ 735 task = rt_mutex_owner(lock); 736 get_task_struct(task); 737 raw_spin_lock(&task->pi_lock); 738 739 /* [11] requeue the pi waiters if necessary */ 740 if (waiter == rt_mutex_top_waiter(lock)) { 741 /* 742 * The waiter became the new top (highest priority) 743 * waiter on the lock. Replace the previous top waiter 744 * in the owner tasks pi waiters tree with this waiter 745 * and adjust the priority of the owner. 746 */ 747 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 748 rt_mutex_enqueue_pi(task, waiter); 749 __rt_mutex_adjust_prio(task); 750 751 } else if (prerequeue_top_waiter == waiter) { 752 /* 753 * The waiter was the top waiter on the lock, but is 754 * no longer the top prority waiter. Replace waiter in 755 * the owner tasks pi waiters tree with the new top 756 * (highest priority) waiter and adjust the priority 757 * of the owner. 758 * The new top waiter is stored in @waiter so that 759 * @waiter == @top_waiter evaluates to true below and 760 * we continue to deboost the rest of the chain. 761 */ 762 rt_mutex_dequeue_pi(task, waiter); 763 waiter = rt_mutex_top_waiter(lock); 764 rt_mutex_enqueue_pi(task, waiter); 765 __rt_mutex_adjust_prio(task); 766 } else { 767 /* 768 * Nothing changed. No need to do any priority 769 * adjustment. 770 */ 771 } 772 773 /* 774 * [12] check_exit_conditions_4() protected by task->pi_lock 775 * and lock->wait_lock. The actual decisions are made after we 776 * dropped the locks. 777 * 778 * Check whether the task which owns the current lock is pi 779 * blocked itself. If yes we store a pointer to the lock for 780 * the lock chain change detection above. After we dropped 781 * task->pi_lock next_lock cannot be dereferenced anymore. 782 */ 783 next_lock = task_blocked_on_lock(task); 784 /* 785 * Store the top waiter of @lock for the end of chain walk 786 * decision below. 787 */ 788 top_waiter = rt_mutex_top_waiter(lock); 789 790 /* [13] Drop the locks */ 791 raw_spin_unlock(&task->pi_lock); 792 raw_spin_unlock_irq(&lock->wait_lock); 793 794 /* 795 * Make the actual exit decisions [12], based on the stored 796 * values. 797 * 798 * We reached the end of the lock chain. Stop right here. No 799 * point to go back just to figure that out. 800 */ 801 if (!next_lock) 802 goto out_put_task; 803 804 /* 805 * If the current waiter is not the top waiter on the lock, 806 * then we can stop the chain walk here if we are not in full 807 * deadlock detection mode. 808 */ 809 if (!detect_deadlock && waiter != top_waiter) 810 goto out_put_task; 811 812 goto again; 813 814 out_unlock_pi: 815 raw_spin_unlock_irq(&task->pi_lock); 816 out_put_task: 817 put_task_struct(task); 818 819 return ret; 820 } 821 822 /* 823 * Try to take an rt-mutex 824 * 825 * Must be called with lock->wait_lock held and interrupts disabled 826 * 827 * @lock: The lock to be acquired. 828 * @task: The task which wants to acquire the lock 829 * @waiter: The waiter that is queued to the lock's wait tree if the 830 * callsite called task_blocked_on_lock(), otherwise NULL 831 */ 832 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 833 struct rt_mutex_waiter *waiter) 834 { 835 /* 836 * Before testing whether we can acquire @lock, we set the 837 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 838 * other tasks which try to modify @lock into the slow path 839 * and they serialize on @lock->wait_lock. 840 * 841 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 842 * as explained at the top of this file if and only if: 843 * 844 * - There is a lock owner. The caller must fixup the 845 * transient state if it does a trylock or leaves the lock 846 * function due to a signal or timeout. 847 * 848 * - @task acquires the lock and there are no other 849 * waiters. This is undone in rt_mutex_set_owner(@task) at 850 * the end of this function. 851 */ 852 mark_rt_mutex_waiters(lock); 853 854 /* 855 * If @lock has an owner, give up. 856 */ 857 if (rt_mutex_owner(lock)) 858 return 0; 859 860 /* 861 * If @waiter != NULL, @task has already enqueued the waiter 862 * into @lock waiter tree. If @waiter == NULL then this is a 863 * trylock attempt. 864 */ 865 if (waiter) { 866 /* 867 * If waiter is not the highest priority waiter of 868 * @lock, give up. 869 */ 870 if (waiter != rt_mutex_top_waiter(lock)) 871 return 0; 872 873 /* 874 * We can acquire the lock. Remove the waiter from the 875 * lock waiters tree. 876 */ 877 rt_mutex_dequeue(lock, waiter); 878 879 } else { 880 /* 881 * If the lock has waiters already we check whether @task is 882 * eligible to take over the lock. 883 * 884 * If there are no other waiters, @task can acquire 885 * the lock. @task->pi_blocked_on is NULL, so it does 886 * not need to be dequeued. 887 */ 888 if (rt_mutex_has_waiters(lock)) { 889 /* 890 * If @task->prio is greater than or equal to 891 * the top waiter priority (kernel view), 892 * @task lost. 893 */ 894 if (task->prio >= rt_mutex_top_waiter(lock)->prio) 895 return 0; 896 897 /* 898 * The current top waiter stays enqueued. We 899 * don't have to change anything in the lock 900 * waiters order. 901 */ 902 } else { 903 /* 904 * No waiters. Take the lock without the 905 * pi_lock dance.@task->pi_blocked_on is NULL 906 * and we have no waiters to enqueue in @task 907 * pi waiters tree. 908 */ 909 goto takeit; 910 } 911 } 912 913 /* 914 * Clear @task->pi_blocked_on. Requires protection by 915 * @task->pi_lock. Redundant operation for the @waiter == NULL 916 * case, but conditionals are more expensive than a redundant 917 * store. 918 */ 919 raw_spin_lock(&task->pi_lock); 920 task->pi_blocked_on = NULL; 921 /* 922 * Finish the lock acquisition. @task is the new owner. If 923 * other waiters exist we have to insert the highest priority 924 * waiter into @task->pi_waiters tree. 925 */ 926 if (rt_mutex_has_waiters(lock)) 927 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 928 raw_spin_unlock(&task->pi_lock); 929 930 takeit: 931 /* We got the lock. */ 932 debug_rt_mutex_lock(lock); 933 934 /* 935 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 936 * are still waiters or clears it. 937 */ 938 rt_mutex_set_owner(lock, task); 939 940 rt_mutex_deadlock_account_lock(lock, task); 941 942 return 1; 943 } 944 945 /* 946 * Task blocks on lock. 947 * 948 * Prepare waiter and propagate pi chain 949 * 950 * This must be called with lock->wait_lock held and interrupts disabled 951 */ 952 static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 953 struct rt_mutex_waiter *waiter, 954 struct task_struct *task, 955 enum rtmutex_chainwalk chwalk) 956 { 957 struct task_struct *owner = rt_mutex_owner(lock); 958 struct rt_mutex_waiter *top_waiter = waiter; 959 struct rt_mutex *next_lock; 960 int chain_walk = 0, res; 961 962 /* 963 * Early deadlock detection. We really don't want the task to 964 * enqueue on itself just to untangle the mess later. It's not 965 * only an optimization. We drop the locks, so another waiter 966 * can come in before the chain walk detects the deadlock. So 967 * the other will detect the deadlock and return -EDEADLOCK, 968 * which is wrong, as the other waiter is not in a deadlock 969 * situation. 970 */ 971 if (owner == task) 972 return -EDEADLK; 973 974 raw_spin_lock(&task->pi_lock); 975 __rt_mutex_adjust_prio(task); 976 waiter->task = task; 977 waiter->lock = lock; 978 waiter->prio = task->prio; 979 980 /* Get the top priority waiter on the lock */ 981 if (rt_mutex_has_waiters(lock)) 982 top_waiter = rt_mutex_top_waiter(lock); 983 rt_mutex_enqueue(lock, waiter); 984 985 task->pi_blocked_on = waiter; 986 987 raw_spin_unlock(&task->pi_lock); 988 989 if (!owner) 990 return 0; 991 992 raw_spin_lock(&owner->pi_lock); 993 if (waiter == rt_mutex_top_waiter(lock)) { 994 rt_mutex_dequeue_pi(owner, top_waiter); 995 rt_mutex_enqueue_pi(owner, waiter); 996 997 __rt_mutex_adjust_prio(owner); 998 if (owner->pi_blocked_on) 999 chain_walk = 1; 1000 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1001 chain_walk = 1; 1002 } 1003 1004 /* Store the lock on which owner is blocked or NULL */ 1005 next_lock = task_blocked_on_lock(owner); 1006 1007 raw_spin_unlock(&owner->pi_lock); 1008 /* 1009 * Even if full deadlock detection is on, if the owner is not 1010 * blocked itself, we can avoid finding this out in the chain 1011 * walk. 1012 */ 1013 if (!chain_walk || !next_lock) 1014 return 0; 1015 1016 /* 1017 * The owner can't disappear while holding a lock, 1018 * so the owner struct is protected by wait_lock. 1019 * Gets dropped in rt_mutex_adjust_prio_chain()! 1020 */ 1021 get_task_struct(owner); 1022 1023 raw_spin_unlock_irq(&lock->wait_lock); 1024 1025 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1026 next_lock, waiter, task); 1027 1028 raw_spin_lock_irq(&lock->wait_lock); 1029 1030 return res; 1031 } 1032 1033 /* 1034 * Remove the top waiter from the current tasks pi waiter tree and 1035 * queue it up. 1036 * 1037 * Called with lock->wait_lock held and interrupts disabled. 1038 */ 1039 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, 1040 struct rt_mutex *lock) 1041 { 1042 struct rt_mutex_waiter *waiter; 1043 1044 raw_spin_lock(¤t->pi_lock); 1045 1046 waiter = rt_mutex_top_waiter(lock); 1047 1048 /* 1049 * Remove it from current->pi_waiters. We do not adjust a 1050 * possible priority boost right now. We execute wakeup in the 1051 * boosted mode and go back to normal after releasing 1052 * lock->wait_lock. 1053 */ 1054 rt_mutex_dequeue_pi(current, waiter); 1055 1056 /* 1057 * As we are waking up the top waiter, and the waiter stays 1058 * queued on the lock until it gets the lock, this lock 1059 * obviously has waiters. Just set the bit here and this has 1060 * the added benefit of forcing all new tasks into the 1061 * slow path making sure no task of lower priority than 1062 * the top waiter can steal this lock. 1063 */ 1064 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1065 1066 raw_spin_unlock(¤t->pi_lock); 1067 1068 wake_q_add(wake_q, waiter->task); 1069 } 1070 1071 /* 1072 * Remove a waiter from a lock and give up 1073 * 1074 * Must be called with lock->wait_lock held and interrupts disabled. I must 1075 * have just failed to try_to_take_rt_mutex(). 1076 */ 1077 static void remove_waiter(struct rt_mutex *lock, 1078 struct rt_mutex_waiter *waiter) 1079 { 1080 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1081 struct task_struct *owner = rt_mutex_owner(lock); 1082 struct rt_mutex *next_lock; 1083 1084 raw_spin_lock(¤t->pi_lock); 1085 rt_mutex_dequeue(lock, waiter); 1086 current->pi_blocked_on = NULL; 1087 raw_spin_unlock(¤t->pi_lock); 1088 1089 /* 1090 * Only update priority if the waiter was the highest priority 1091 * waiter of the lock and there is an owner to update. 1092 */ 1093 if (!owner || !is_top_waiter) 1094 return; 1095 1096 raw_spin_lock(&owner->pi_lock); 1097 1098 rt_mutex_dequeue_pi(owner, waiter); 1099 1100 if (rt_mutex_has_waiters(lock)) 1101 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1102 1103 __rt_mutex_adjust_prio(owner); 1104 1105 /* Store the lock on which owner is blocked or NULL */ 1106 next_lock = task_blocked_on_lock(owner); 1107 1108 raw_spin_unlock(&owner->pi_lock); 1109 1110 /* 1111 * Don't walk the chain, if the owner task is not blocked 1112 * itself. 1113 */ 1114 if (!next_lock) 1115 return; 1116 1117 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1118 get_task_struct(owner); 1119 1120 raw_spin_unlock_irq(&lock->wait_lock); 1121 1122 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1123 next_lock, NULL, current); 1124 1125 raw_spin_lock_irq(&lock->wait_lock); 1126 } 1127 1128 /* 1129 * Recheck the pi chain, in case we got a priority setting 1130 * 1131 * Called from sched_setscheduler 1132 */ 1133 void rt_mutex_adjust_pi(struct task_struct *task) 1134 { 1135 struct rt_mutex_waiter *waiter; 1136 struct rt_mutex *next_lock; 1137 unsigned long flags; 1138 1139 raw_spin_lock_irqsave(&task->pi_lock, flags); 1140 1141 waiter = task->pi_blocked_on; 1142 if (!waiter || (waiter->prio == task->prio && 1143 !dl_prio(task->prio))) { 1144 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1145 return; 1146 } 1147 next_lock = waiter->lock; 1148 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1149 1150 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1151 get_task_struct(task); 1152 1153 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, 1154 next_lock, NULL, task); 1155 } 1156 1157 /** 1158 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 1159 * @lock: the rt_mutex to take 1160 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1161 * or TASK_UNINTERRUPTIBLE) 1162 * @timeout: the pre-initialized and started timer, or NULL for none 1163 * @waiter: the pre-initialized rt_mutex_waiter 1164 * 1165 * Must be called with lock->wait_lock held and interrupts disabled 1166 */ 1167 static int __sched 1168 __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1169 struct hrtimer_sleeper *timeout, 1170 struct rt_mutex_waiter *waiter) 1171 { 1172 int ret = 0; 1173 1174 for (;;) { 1175 /* Try to acquire the lock: */ 1176 if (try_to_take_rt_mutex(lock, current, waiter)) 1177 break; 1178 1179 /* 1180 * TASK_INTERRUPTIBLE checks for signals and 1181 * timeout. Ignored otherwise. 1182 */ 1183 if (likely(state == TASK_INTERRUPTIBLE)) { 1184 /* Signal pending? */ 1185 if (signal_pending(current)) 1186 ret = -EINTR; 1187 if (timeout && !timeout->task) 1188 ret = -ETIMEDOUT; 1189 if (ret) 1190 break; 1191 } 1192 1193 raw_spin_unlock_irq(&lock->wait_lock); 1194 1195 debug_rt_mutex_print_deadlock(waiter); 1196 1197 schedule(); 1198 1199 raw_spin_lock_irq(&lock->wait_lock); 1200 set_current_state(state); 1201 } 1202 1203 __set_current_state(TASK_RUNNING); 1204 return ret; 1205 } 1206 1207 static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 1208 struct rt_mutex_waiter *w) 1209 { 1210 /* 1211 * If the result is not -EDEADLOCK or the caller requested 1212 * deadlock detection, nothing to do here. 1213 */ 1214 if (res != -EDEADLOCK || detect_deadlock) 1215 return; 1216 1217 /* 1218 * Yell lowdly and stop the task right here. 1219 */ 1220 rt_mutex_print_deadlock(w); 1221 while (1) { 1222 set_current_state(TASK_INTERRUPTIBLE); 1223 schedule(); 1224 } 1225 } 1226 1227 /* 1228 * Slow path lock function: 1229 */ 1230 static int __sched 1231 rt_mutex_slowlock(struct rt_mutex *lock, int state, 1232 struct hrtimer_sleeper *timeout, 1233 enum rtmutex_chainwalk chwalk) 1234 { 1235 struct rt_mutex_waiter waiter; 1236 unsigned long flags; 1237 int ret = 0; 1238 1239 debug_rt_mutex_init_waiter(&waiter); 1240 RB_CLEAR_NODE(&waiter.pi_tree_entry); 1241 RB_CLEAR_NODE(&waiter.tree_entry); 1242 1243 /* 1244 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1245 * be called in early boot if the cmpxchg() fast path is disabled 1246 * (debug, no architecture support). In this case we will acquire the 1247 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1248 * enable interrupts in that early boot case. So we need to use the 1249 * irqsave/restore variants. 1250 */ 1251 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1252 1253 /* Try to acquire the lock again: */ 1254 if (try_to_take_rt_mutex(lock, current, NULL)) { 1255 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1256 return 0; 1257 } 1258 1259 set_current_state(state); 1260 1261 /* Setup the timer, when timeout != NULL */ 1262 if (unlikely(timeout)) 1263 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1264 1265 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1266 1267 if (likely(!ret)) 1268 /* sleep on the mutex */ 1269 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1270 1271 if (unlikely(ret)) { 1272 __set_current_state(TASK_RUNNING); 1273 if (rt_mutex_has_waiters(lock)) 1274 remove_waiter(lock, &waiter); 1275 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1276 } 1277 1278 /* 1279 * try_to_take_rt_mutex() sets the waiter bit 1280 * unconditionally. We might have to fix that up. 1281 */ 1282 fixup_rt_mutex_waiters(lock); 1283 1284 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1285 1286 /* Remove pending timer: */ 1287 if (unlikely(timeout)) 1288 hrtimer_cancel(&timeout->timer); 1289 1290 debug_rt_mutex_free_waiter(&waiter); 1291 1292 return ret; 1293 } 1294 1295 /* 1296 * Slow path try-lock function: 1297 */ 1298 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) 1299 { 1300 unsigned long flags; 1301 int ret; 1302 1303 /* 1304 * If the lock already has an owner we fail to get the lock. 1305 * This can be done without taking the @lock->wait_lock as 1306 * it is only being read, and this is a trylock anyway. 1307 */ 1308 if (rt_mutex_owner(lock)) 1309 return 0; 1310 1311 /* 1312 * The mutex has currently no owner. Lock the wait lock and try to 1313 * acquire the lock. We use irqsave here to support early boot calls. 1314 */ 1315 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1316 1317 ret = try_to_take_rt_mutex(lock, current, NULL); 1318 1319 /* 1320 * try_to_take_rt_mutex() sets the lock waiters bit 1321 * unconditionally. Clean this up. 1322 */ 1323 fixup_rt_mutex_waiters(lock); 1324 1325 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1326 1327 return ret; 1328 } 1329 1330 /* 1331 * Slow path to release a rt-mutex. 1332 * Return whether the current task needs to undo a potential priority boosting. 1333 */ 1334 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, 1335 struct wake_q_head *wake_q) 1336 { 1337 unsigned long flags; 1338 1339 /* irqsave required to support early boot calls */ 1340 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1341 1342 debug_rt_mutex_unlock(lock); 1343 1344 rt_mutex_deadlock_account_unlock(current); 1345 1346 /* 1347 * We must be careful here if the fast path is enabled. If we 1348 * have no waiters queued we cannot set owner to NULL here 1349 * because of: 1350 * 1351 * foo->lock->owner = NULL; 1352 * rtmutex_lock(foo->lock); <- fast path 1353 * free = atomic_dec_and_test(foo->refcnt); 1354 * rtmutex_unlock(foo->lock); <- fast path 1355 * if (free) 1356 * kfree(foo); 1357 * raw_spin_unlock(foo->lock->wait_lock); 1358 * 1359 * So for the fastpath enabled kernel: 1360 * 1361 * Nothing can set the waiters bit as long as we hold 1362 * lock->wait_lock. So we do the following sequence: 1363 * 1364 * owner = rt_mutex_owner(lock); 1365 * clear_rt_mutex_waiters(lock); 1366 * raw_spin_unlock(&lock->wait_lock); 1367 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1368 * return; 1369 * goto retry; 1370 * 1371 * The fastpath disabled variant is simple as all access to 1372 * lock->owner is serialized by lock->wait_lock: 1373 * 1374 * lock->owner = NULL; 1375 * raw_spin_unlock(&lock->wait_lock); 1376 */ 1377 while (!rt_mutex_has_waiters(lock)) { 1378 /* Drops lock->wait_lock ! */ 1379 if (unlock_rt_mutex_safe(lock, flags) == true) 1380 return false; 1381 /* Relock the rtmutex and try again */ 1382 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1383 } 1384 1385 /* 1386 * The wakeup next waiter path does not suffer from the above 1387 * race. See the comments there. 1388 * 1389 * Queue the next waiter for wakeup once we release the wait_lock. 1390 */ 1391 mark_wakeup_next_waiter(wake_q, lock); 1392 1393 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1394 1395 /* check PI boosting */ 1396 return true; 1397 } 1398 1399 /* 1400 * debug aware fast / slowpath lock,trylock,unlock 1401 * 1402 * The atomic acquire/release ops are compiled away, when either the 1403 * architecture does not support cmpxchg or when debugging is enabled. 1404 */ 1405 static inline int 1406 rt_mutex_fastlock(struct rt_mutex *lock, int state, 1407 int (*slowfn)(struct rt_mutex *lock, int state, 1408 struct hrtimer_sleeper *timeout, 1409 enum rtmutex_chainwalk chwalk)) 1410 { 1411 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1412 rt_mutex_deadlock_account_lock(lock, current); 1413 return 0; 1414 } else 1415 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1416 } 1417 1418 static inline int 1419 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, 1420 struct hrtimer_sleeper *timeout, 1421 enum rtmutex_chainwalk chwalk, 1422 int (*slowfn)(struct rt_mutex *lock, int state, 1423 struct hrtimer_sleeper *timeout, 1424 enum rtmutex_chainwalk chwalk)) 1425 { 1426 if (chwalk == RT_MUTEX_MIN_CHAINWALK && 1427 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1428 rt_mutex_deadlock_account_lock(lock, current); 1429 return 0; 1430 } else 1431 return slowfn(lock, state, timeout, chwalk); 1432 } 1433 1434 static inline int 1435 rt_mutex_fasttrylock(struct rt_mutex *lock, 1436 int (*slowfn)(struct rt_mutex *lock)) 1437 { 1438 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1439 rt_mutex_deadlock_account_lock(lock, current); 1440 return 1; 1441 } 1442 return slowfn(lock); 1443 } 1444 1445 static inline void 1446 rt_mutex_fastunlock(struct rt_mutex *lock, 1447 bool (*slowfn)(struct rt_mutex *lock, 1448 struct wake_q_head *wqh)) 1449 { 1450 DEFINE_WAKE_Q(wake_q); 1451 1452 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { 1453 rt_mutex_deadlock_account_unlock(current); 1454 1455 } else { 1456 bool deboost = slowfn(lock, &wake_q); 1457 1458 wake_up_q(&wake_q); 1459 1460 /* Undo pi boosting if necessary: */ 1461 if (deboost) 1462 rt_mutex_adjust_prio(current); 1463 } 1464 } 1465 1466 /** 1467 * rt_mutex_lock - lock a rt_mutex 1468 * 1469 * @lock: the rt_mutex to be locked 1470 */ 1471 void __sched rt_mutex_lock(struct rt_mutex *lock) 1472 { 1473 might_sleep(); 1474 1475 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); 1476 } 1477 EXPORT_SYMBOL_GPL(rt_mutex_lock); 1478 1479 /** 1480 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible 1481 * 1482 * @lock: the rt_mutex to be locked 1483 * 1484 * Returns: 1485 * 0 on success 1486 * -EINTR when interrupted by a signal 1487 */ 1488 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 1489 { 1490 might_sleep(); 1491 1492 return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); 1493 } 1494 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 1495 1496 /* 1497 * Futex variant with full deadlock detection. 1498 */ 1499 int rt_mutex_timed_futex_lock(struct rt_mutex *lock, 1500 struct hrtimer_sleeper *timeout) 1501 { 1502 might_sleep(); 1503 1504 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1505 RT_MUTEX_FULL_CHAINWALK, 1506 rt_mutex_slowlock); 1507 } 1508 1509 /** 1510 * rt_mutex_timed_lock - lock a rt_mutex interruptible 1511 * the timeout structure is provided 1512 * by the caller 1513 * 1514 * @lock: the rt_mutex to be locked 1515 * @timeout: timeout structure or NULL (no timeout) 1516 * 1517 * Returns: 1518 * 0 on success 1519 * -EINTR when interrupted by a signal 1520 * -ETIMEDOUT when the timeout expired 1521 */ 1522 int 1523 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) 1524 { 1525 might_sleep(); 1526 1527 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1528 RT_MUTEX_MIN_CHAINWALK, 1529 rt_mutex_slowlock); 1530 } 1531 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); 1532 1533 /** 1534 * rt_mutex_trylock - try to lock a rt_mutex 1535 * 1536 * @lock: the rt_mutex to be locked 1537 * 1538 * This function can only be called in thread context. It's safe to 1539 * call it from atomic regions, but not from hard interrupt or soft 1540 * interrupt context. 1541 * 1542 * Returns 1 on success and 0 on contention 1543 */ 1544 int __sched rt_mutex_trylock(struct rt_mutex *lock) 1545 { 1546 if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) 1547 return 0; 1548 1549 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1550 } 1551 EXPORT_SYMBOL_GPL(rt_mutex_trylock); 1552 1553 /** 1554 * rt_mutex_unlock - unlock a rt_mutex 1555 * 1556 * @lock: the rt_mutex to be unlocked 1557 */ 1558 void __sched rt_mutex_unlock(struct rt_mutex *lock) 1559 { 1560 rt_mutex_fastunlock(lock, rt_mutex_slowunlock); 1561 } 1562 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1563 1564 /** 1565 * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock 1566 * @lock: the rt_mutex to be unlocked 1567 * 1568 * Returns: true/false indicating whether priority adjustment is 1569 * required or not. 1570 */ 1571 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, 1572 struct wake_q_head *wqh) 1573 { 1574 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { 1575 rt_mutex_deadlock_account_unlock(current); 1576 return false; 1577 } 1578 return rt_mutex_slowunlock(lock, wqh); 1579 } 1580 1581 /** 1582 * rt_mutex_destroy - mark a mutex unusable 1583 * @lock: the mutex to be destroyed 1584 * 1585 * This function marks the mutex uninitialized, and any subsequent 1586 * use of the mutex is forbidden. The mutex must not be locked when 1587 * this function is called. 1588 */ 1589 void rt_mutex_destroy(struct rt_mutex *lock) 1590 { 1591 WARN_ON(rt_mutex_is_locked(lock)); 1592 #ifdef CONFIG_DEBUG_RT_MUTEXES 1593 lock->magic = NULL; 1594 #endif 1595 } 1596 1597 EXPORT_SYMBOL_GPL(rt_mutex_destroy); 1598 1599 /** 1600 * __rt_mutex_init - initialize the rt lock 1601 * 1602 * @lock: the rt lock to be initialized 1603 * 1604 * Initialize the rt lock to unlocked state. 1605 * 1606 * Initializing of a locked rt lock is not allowed 1607 */ 1608 void __rt_mutex_init(struct rt_mutex *lock, const char *name) 1609 { 1610 lock->owner = NULL; 1611 raw_spin_lock_init(&lock->wait_lock); 1612 lock->waiters = RB_ROOT; 1613 lock->waiters_leftmost = NULL; 1614 1615 debug_rt_mutex_init(lock, name); 1616 } 1617 EXPORT_SYMBOL_GPL(__rt_mutex_init); 1618 1619 /** 1620 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a 1621 * proxy owner 1622 * 1623 * @lock: the rt_mutex to be locked 1624 * @proxy_owner:the task to set as owner 1625 * 1626 * No locking. Caller has to do serializing itself 1627 * 1628 * Special API call for PI-futex support. This initializes the rtmutex and 1629 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not 1630 * possible at this point because the pi_state which contains the rtmutex 1631 * is not yet visible to other tasks. 1632 */ 1633 void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1634 struct task_struct *proxy_owner) 1635 { 1636 __rt_mutex_init(lock, NULL); 1637 debug_rt_mutex_proxy_lock(lock, proxy_owner); 1638 rt_mutex_set_owner(lock, proxy_owner); 1639 rt_mutex_deadlock_account_lock(lock, proxy_owner); 1640 } 1641 1642 /** 1643 * rt_mutex_proxy_unlock - release a lock on behalf of owner 1644 * 1645 * @lock: the rt_mutex to be locked 1646 * 1647 * No locking. Caller has to do serializing itself 1648 * 1649 * Special API call for PI-futex support. This merrily cleans up the rtmutex 1650 * (debugging) state. Concurrent operations on this rt_mutex are not 1651 * possible because it belongs to the pi_state which is about to be freed 1652 * and it is not longer visible to other tasks. 1653 */ 1654 void rt_mutex_proxy_unlock(struct rt_mutex *lock, 1655 struct task_struct *proxy_owner) 1656 { 1657 debug_rt_mutex_proxy_unlock(lock); 1658 rt_mutex_set_owner(lock, NULL); 1659 rt_mutex_deadlock_account_unlock(proxy_owner); 1660 } 1661 1662 /** 1663 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1664 * @lock: the rt_mutex to take 1665 * @waiter: the pre-initialized rt_mutex_waiter 1666 * @task: the task to prepare 1667 * 1668 * Returns: 1669 * 0 - task blocked on lock 1670 * 1 - acquired the lock for task, caller should wake it up 1671 * <0 - error 1672 * 1673 * Special API call for FUTEX_REQUEUE_PI support. 1674 */ 1675 int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1676 struct rt_mutex_waiter *waiter, 1677 struct task_struct *task) 1678 { 1679 int ret; 1680 1681 raw_spin_lock_irq(&lock->wait_lock); 1682 1683 if (try_to_take_rt_mutex(lock, task, NULL)) { 1684 raw_spin_unlock_irq(&lock->wait_lock); 1685 return 1; 1686 } 1687 1688 /* We enforce deadlock detection for futexes */ 1689 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1690 RT_MUTEX_FULL_CHAINWALK); 1691 1692 if (ret && !rt_mutex_owner(lock)) { 1693 /* 1694 * Reset the return value. We might have 1695 * returned with -EDEADLK and the owner 1696 * released the lock while we were walking the 1697 * pi chain. Let the waiter sort it out. 1698 */ 1699 ret = 0; 1700 } 1701 1702 if (unlikely(ret)) 1703 remove_waiter(lock, waiter); 1704 1705 raw_spin_unlock_irq(&lock->wait_lock); 1706 1707 debug_rt_mutex_print_deadlock(waiter); 1708 1709 return ret; 1710 } 1711 1712 /** 1713 * rt_mutex_next_owner - return the next owner of the lock 1714 * 1715 * @lock: the rt lock query 1716 * 1717 * Returns the next owner of the lock or NULL 1718 * 1719 * Caller has to serialize against other accessors to the lock 1720 * itself. 1721 * 1722 * Special API call for PI-futex support 1723 */ 1724 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) 1725 { 1726 if (!rt_mutex_has_waiters(lock)) 1727 return NULL; 1728 1729 return rt_mutex_top_waiter(lock)->task; 1730 } 1731 1732 /** 1733 * rt_mutex_finish_proxy_lock() - Complete lock acquisition 1734 * @lock: the rt_mutex we were woken on 1735 * @to: the timeout, null if none. hrtimer should already have 1736 * been started. 1737 * @waiter: the pre-initialized rt_mutex_waiter 1738 * 1739 * Complete the lock acquisition started our behalf by another thread. 1740 * 1741 * Returns: 1742 * 0 - success 1743 * <0 - error, one of -EINTR, -ETIMEDOUT 1744 * 1745 * Special API call for PI-futex requeue support 1746 */ 1747 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, 1748 struct hrtimer_sleeper *to, 1749 struct rt_mutex_waiter *waiter) 1750 { 1751 int ret; 1752 1753 raw_spin_lock_irq(&lock->wait_lock); 1754 1755 set_current_state(TASK_INTERRUPTIBLE); 1756 1757 /* sleep on the mutex */ 1758 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); 1759 1760 if (unlikely(ret)) 1761 remove_waiter(lock, waiter); 1762 1763 /* 1764 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1765 * have to fix that up. 1766 */ 1767 fixup_rt_mutex_waiters(lock); 1768 1769 raw_spin_unlock_irq(&lock->wait_lock); 1770 1771 return ret; 1772 } 1773