1 /* 2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support 3 * 4 * started by Ingo Molnar and Thomas Gleixner. 5 * 6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 9 * Copyright (C) 2006 Esben Nielsen 10 * 11 * See Documentation/locking/rt-mutex-design.txt for details. 12 */ 13 #include <linux/spinlock.h> 14 #include <linux/export.h> 15 #include <linux/sched/signal.h> 16 #include <linux/sched/rt.h> 17 #include <linux/sched/deadline.h> 18 #include <linux/sched/wake_q.h> 19 #include <linux/sched/debug.h> 20 #include <linux/timer.h> 21 22 #include "rtmutex_common.h" 23 24 /* 25 * lock->owner state tracking: 26 * 27 * lock->owner holds the task_struct pointer of the owner. Bit 0 28 * is used to keep track of the "lock has waiters" state. 29 * 30 * owner bit0 31 * NULL 0 lock is free (fast acquire possible) 32 * NULL 1 lock is free and has waiters and the top waiter 33 * is going to take the lock* 34 * taskpointer 0 lock is held (fast release possible) 35 * taskpointer 1 lock is held and has waiters** 36 * 37 * The fast atomic compare exchange based acquire and release is only 38 * possible when bit 0 of lock->owner is 0. 39 * 40 * (*) It also can be a transitional state when grabbing the lock 41 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, 42 * we need to set the bit0 before looking at the lock, and the owner may be 43 * NULL in this small time, hence this can be a transitional state. 44 * 45 * (**) There is a small time when bit 0 is set but there are no 46 * waiters. This can happen when grabbing the lock in the slow path. 47 * To prevent a cmpxchg of the owner releasing the lock, we need to 48 * set this bit before looking at the lock. 49 */ 50 51 static void 52 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) 53 { 54 unsigned long val = (unsigned long)owner; 55 56 if (rt_mutex_has_waiters(lock)) 57 val |= RT_MUTEX_HAS_WAITERS; 58 59 lock->owner = (struct task_struct *)val; 60 } 61 62 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) 63 { 64 lock->owner = (struct task_struct *) 65 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); 66 } 67 68 static void fixup_rt_mutex_waiters(struct rt_mutex *lock) 69 { 70 unsigned long owner, *p = (unsigned long *) &lock->owner; 71 72 if (rt_mutex_has_waiters(lock)) 73 return; 74 75 /* 76 * The rbtree has no waiters enqueued, now make sure that the 77 * lock->owner still has the waiters bit set, otherwise the 78 * following can happen: 79 * 80 * CPU 0 CPU 1 CPU2 81 * l->owner=T1 82 * rt_mutex_lock(l) 83 * lock(l->lock) 84 * l->owner = T1 | HAS_WAITERS; 85 * enqueue(T2) 86 * boost() 87 * unlock(l->lock) 88 * block() 89 * 90 * rt_mutex_lock(l) 91 * lock(l->lock) 92 * l->owner = T1 | HAS_WAITERS; 93 * enqueue(T3) 94 * boost() 95 * unlock(l->lock) 96 * block() 97 * signal(->T2) signal(->T3) 98 * lock(l->lock) 99 * dequeue(T2) 100 * deboost() 101 * unlock(l->lock) 102 * lock(l->lock) 103 * dequeue(T3) 104 * ==> wait list is empty 105 * deboost() 106 * unlock(l->lock) 107 * lock(l->lock) 108 * fixup_rt_mutex_waiters() 109 * if (wait_list_empty(l) { 110 * l->owner = owner 111 * owner = l->owner & ~HAS_WAITERS; 112 * ==> l->owner = T1 113 * } 114 * lock(l->lock) 115 * rt_mutex_unlock(l) fixup_rt_mutex_waiters() 116 * if (wait_list_empty(l) { 117 * owner = l->owner & ~HAS_WAITERS; 118 * cmpxchg(l->owner, T1, NULL) 119 * ===> Success (l->owner = NULL) 120 * 121 * l->owner = owner 122 * ==> l->owner = T1 123 * } 124 * 125 * With the check for the waiter bit in place T3 on CPU2 will not 126 * overwrite. All tasks fiddling with the waiters bit are 127 * serialized by l->lock, so nothing else can modify the waiters 128 * bit. If the bit is set then nothing can change l->owner either 129 * so the simple RMW is safe. The cmpxchg() will simply fail if it 130 * happens in the middle of the RMW because the waiters bit is 131 * still set. 132 */ 133 owner = READ_ONCE(*p); 134 if (owner & RT_MUTEX_HAS_WAITERS) 135 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); 136 } 137 138 /* 139 * We can speed up the acquire/release, if there's no debugging state to be 140 * set up. 141 */ 142 #ifndef CONFIG_DEBUG_RT_MUTEXES 143 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c) 144 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) 145 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) 146 147 /* 148 * Callers must hold the ->wait_lock -- which is the whole purpose as we force 149 * all future threads that attempt to [Rmw] the lock to the slowpath. As such 150 * relaxed semantics suffice. 151 */ 152 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 153 { 154 unsigned long owner, *p = (unsigned long *) &lock->owner; 155 156 do { 157 owner = *p; 158 } while (cmpxchg_relaxed(p, owner, 159 owner | RT_MUTEX_HAS_WAITERS) != owner); 160 } 161 162 /* 163 * Safe fastpath aware unlock: 164 * 1) Clear the waiters bit 165 * 2) Drop lock->wait_lock 166 * 3) Try to unlock the lock with cmpxchg 167 */ 168 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 169 unsigned long flags) 170 __releases(lock->wait_lock) 171 { 172 struct task_struct *owner = rt_mutex_owner(lock); 173 174 clear_rt_mutex_waiters(lock); 175 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 176 /* 177 * If a new waiter comes in between the unlock and the cmpxchg 178 * we have two situations: 179 * 180 * unlock(wait_lock); 181 * lock(wait_lock); 182 * cmpxchg(p, owner, 0) == owner 183 * mark_rt_mutex_waiters(lock); 184 * acquire(lock); 185 * or: 186 * 187 * unlock(wait_lock); 188 * lock(wait_lock); 189 * mark_rt_mutex_waiters(lock); 190 * 191 * cmpxchg(p, owner, 0) != owner 192 * enqueue_waiter(); 193 * unlock(wait_lock); 194 * lock(wait_lock); 195 * wake waiter(); 196 * unlock(wait_lock); 197 * lock(wait_lock); 198 * acquire(lock); 199 */ 200 return rt_mutex_cmpxchg_release(lock, owner, NULL); 201 } 202 203 #else 204 # define rt_mutex_cmpxchg_relaxed(l,c,n) (0) 205 # define rt_mutex_cmpxchg_acquire(l,c,n) (0) 206 # define rt_mutex_cmpxchg_release(l,c,n) (0) 207 208 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 209 { 210 lock->owner = (struct task_struct *) 211 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 212 } 213 214 /* 215 * Simple slow path only version: lock->owner is protected by lock->wait_lock. 216 */ 217 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, 218 unsigned long flags) 219 __releases(lock->wait_lock) 220 { 221 lock->owner = NULL; 222 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 223 return true; 224 } 225 #endif 226 227 static inline int 228 rt_mutex_waiter_less(struct rt_mutex_waiter *left, 229 struct rt_mutex_waiter *right) 230 { 231 if (left->prio < right->prio) 232 return 1; 233 234 /* 235 * If both waiters have dl_prio(), we check the deadlines of the 236 * associated tasks. 237 * If left waiter has a dl_prio(), and we didn't return 1 above, 238 * then right waiter has a dl_prio() too. 239 */ 240 if (dl_prio(left->prio)) 241 return dl_time_before(left->task->dl.deadline, 242 right->task->dl.deadline); 243 244 return 0; 245 } 246 247 static void 248 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 249 { 250 struct rb_node **link = &lock->waiters.rb_node; 251 struct rb_node *parent = NULL; 252 struct rt_mutex_waiter *entry; 253 int leftmost = 1; 254 255 while (*link) { 256 parent = *link; 257 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); 258 if (rt_mutex_waiter_less(waiter, entry)) { 259 link = &parent->rb_left; 260 } else { 261 link = &parent->rb_right; 262 leftmost = 0; 263 } 264 } 265 266 if (leftmost) 267 lock->waiters_leftmost = &waiter->tree_entry; 268 269 rb_link_node(&waiter->tree_entry, parent, link); 270 rb_insert_color(&waiter->tree_entry, &lock->waiters); 271 } 272 273 static void 274 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) 275 { 276 if (RB_EMPTY_NODE(&waiter->tree_entry)) 277 return; 278 279 if (lock->waiters_leftmost == &waiter->tree_entry) 280 lock->waiters_leftmost = rb_next(&waiter->tree_entry); 281 282 rb_erase(&waiter->tree_entry, &lock->waiters); 283 RB_CLEAR_NODE(&waiter->tree_entry); 284 } 285 286 static void 287 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 288 { 289 struct rb_node **link = &task->pi_waiters.rb_node; 290 struct rb_node *parent = NULL; 291 struct rt_mutex_waiter *entry; 292 int leftmost = 1; 293 294 while (*link) { 295 parent = *link; 296 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); 297 if (rt_mutex_waiter_less(waiter, entry)) { 298 link = &parent->rb_left; 299 } else { 300 link = &parent->rb_right; 301 leftmost = 0; 302 } 303 } 304 305 if (leftmost) 306 task->pi_waiters_leftmost = &waiter->pi_tree_entry; 307 308 rb_link_node(&waiter->pi_tree_entry, parent, link); 309 rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); 310 } 311 312 static void 313 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) 314 { 315 if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) 316 return; 317 318 if (task->pi_waiters_leftmost == &waiter->pi_tree_entry) 319 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry); 320 321 rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); 322 RB_CLEAR_NODE(&waiter->pi_tree_entry); 323 } 324 325 /* 326 * Calculate task priority from the waiter tree priority 327 * 328 * Return task->normal_prio when the waiter tree is empty or when 329 * the waiter is not allowed to do priority boosting 330 */ 331 int rt_mutex_getprio(struct task_struct *task) 332 { 333 if (likely(!task_has_pi_waiters(task))) 334 return task->normal_prio; 335 336 return min(task_top_pi_waiter(task)->prio, 337 task->normal_prio); 338 } 339 340 struct task_struct *rt_mutex_get_top_task(struct task_struct *task) 341 { 342 if (likely(!task_has_pi_waiters(task))) 343 return NULL; 344 345 return task_top_pi_waiter(task)->task; 346 } 347 348 /* 349 * Called by sched_setscheduler() to get the priority which will be 350 * effective after the change. 351 */ 352 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) 353 { 354 if (!task_has_pi_waiters(task)) 355 return newprio; 356 357 if (task_top_pi_waiter(task)->task->prio <= newprio) 358 return task_top_pi_waiter(task)->task->prio; 359 return newprio; 360 } 361 362 /* 363 * Adjust the priority of a task, after its pi_waiters got modified. 364 * 365 * This can be both boosting and unboosting. task->pi_lock must be held. 366 */ 367 static void __rt_mutex_adjust_prio(struct task_struct *task) 368 { 369 int prio = rt_mutex_getprio(task); 370 371 if (task->prio != prio || dl_prio(prio)) 372 rt_mutex_setprio(task, prio); 373 } 374 375 /* 376 * Adjust task priority (undo boosting). Called from the exit path of 377 * rt_mutex_slowunlock() and rt_mutex_slowlock(). 378 * 379 * (Note: We do this outside of the protection of lock->wait_lock to 380 * allow the lock to be taken while or before we readjust the priority 381 * of task. We do not use the spin_xx_mutex() variants here as we are 382 * outside of the debug path.) 383 */ 384 void rt_mutex_adjust_prio(struct task_struct *task) 385 { 386 unsigned long flags; 387 388 raw_spin_lock_irqsave(&task->pi_lock, flags); 389 __rt_mutex_adjust_prio(task); 390 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 391 } 392 393 /* 394 * Deadlock detection is conditional: 395 * 396 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted 397 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. 398 * 399 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always 400 * conducted independent of the detect argument. 401 * 402 * If the waiter argument is NULL this indicates the deboost path and 403 * deadlock detection is disabled independent of the detect argument 404 * and the config settings. 405 */ 406 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, 407 enum rtmutex_chainwalk chwalk) 408 { 409 /* 410 * This is just a wrapper function for the following call, 411 * because debug_rt_mutex_detect_deadlock() smells like a magic 412 * debug feature and I wanted to keep the cond function in the 413 * main source file along with the comments instead of having 414 * two of the same in the headers. 415 */ 416 return debug_rt_mutex_detect_deadlock(waiter, chwalk); 417 } 418 419 /* 420 * Max number of times we'll walk the boosting chain: 421 */ 422 int max_lock_depth = 1024; 423 424 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 425 { 426 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 427 } 428 429 /* 430 * Adjust the priority chain. Also used for deadlock detection. 431 * Decreases task's usage by one - may thus free the task. 432 * 433 * @task: the task owning the mutex (owner) for which a chain walk is 434 * probably needed 435 * @chwalk: do we have to carry out deadlock detection? 436 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 437 * things for a task that has just got its priority adjusted, and 438 * is waiting on a mutex) 439 * @next_lock: the mutex on which the owner of @orig_lock was blocked before 440 * we dropped its pi_lock. Is never dereferenced, only used for 441 * comparison to detect lock chain changes. 442 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 443 * its priority to the mutex owner (can be NULL in the case 444 * depicted above or if the top waiter is gone away and we are 445 * actually deboosting the owner) 446 * @top_task: the current top waiter 447 * 448 * Returns 0 or -EDEADLK. 449 * 450 * Chain walk basics and protection scope 451 * 452 * [R] refcount on task 453 * [P] task->pi_lock held 454 * [L] rtmutex->wait_lock held 455 * 456 * Step Description Protected by 457 * function arguments: 458 * @task [R] 459 * @orig_lock if != NULL @top_task is blocked on it 460 * @next_lock Unprotected. Cannot be 461 * dereferenced. Only used for 462 * comparison. 463 * @orig_waiter if != NULL @top_task is blocked on it 464 * @top_task current, or in case of proxy 465 * locking protected by calling 466 * code 467 * again: 468 * loop_sanity_check(); 469 * retry: 470 * [1] lock(task->pi_lock); [R] acquire [P] 471 * [2] waiter = task->pi_blocked_on; [P] 472 * [3] check_exit_conditions_1(); [P] 473 * [4] lock = waiter->lock; [P] 474 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] 475 * unlock(task->pi_lock); release [P] 476 * goto retry; 477 * } 478 * [6] check_exit_conditions_2(); [P] + [L] 479 * [7] requeue_lock_waiter(lock, waiter); [P] + [L] 480 * [8] unlock(task->pi_lock); release [P] 481 * put_task_struct(task); release [R] 482 * [9] check_exit_conditions_3(); [L] 483 * [10] task = owner(lock); [L] 484 * get_task_struct(task); [L] acquire [R] 485 * lock(task->pi_lock); [L] acquire [P] 486 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] 487 * [12] check_exit_conditions_4(); [P] + [L] 488 * [13] unlock(task->pi_lock); release [P] 489 * unlock(lock->wait_lock); release [L] 490 * goto again; 491 */ 492 static int rt_mutex_adjust_prio_chain(struct task_struct *task, 493 enum rtmutex_chainwalk chwalk, 494 struct rt_mutex *orig_lock, 495 struct rt_mutex *next_lock, 496 struct rt_mutex_waiter *orig_waiter, 497 struct task_struct *top_task) 498 { 499 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; 500 struct rt_mutex_waiter *prerequeue_top_waiter; 501 int ret = 0, depth = 0; 502 struct rt_mutex *lock; 503 bool detect_deadlock; 504 bool requeue = true; 505 506 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); 507 508 /* 509 * The (de)boosting is a step by step approach with a lot of 510 * pitfalls. We want this to be preemptible and we want hold a 511 * maximum of two locks per step. So we have to check 512 * carefully whether things change under us. 513 */ 514 again: 515 /* 516 * We limit the lock chain length for each invocation. 517 */ 518 if (++depth > max_lock_depth) { 519 static int prev_max; 520 521 /* 522 * Print this only once. If the admin changes the limit, 523 * print a new message when reaching the limit again. 524 */ 525 if (prev_max != max_lock_depth) { 526 prev_max = max_lock_depth; 527 printk(KERN_WARNING "Maximum lock depth %d reached " 528 "task: %s (%d)\n", max_lock_depth, 529 top_task->comm, task_pid_nr(top_task)); 530 } 531 put_task_struct(task); 532 533 return -EDEADLK; 534 } 535 536 /* 537 * We are fully preemptible here and only hold the refcount on 538 * @task. So everything can have changed under us since the 539 * caller or our own code below (goto retry/again) dropped all 540 * locks. 541 */ 542 retry: 543 /* 544 * [1] Task cannot go away as we did a get_task() before ! 545 */ 546 raw_spin_lock_irq(&task->pi_lock); 547 548 /* 549 * [2] Get the waiter on which @task is blocked on. 550 */ 551 waiter = task->pi_blocked_on; 552 553 /* 554 * [3] check_exit_conditions_1() protected by task->pi_lock. 555 */ 556 557 /* 558 * Check whether the end of the boosting chain has been 559 * reached or the state of the chain has changed while we 560 * dropped the locks. 561 */ 562 if (!waiter) 563 goto out_unlock_pi; 564 565 /* 566 * Check the orig_waiter state. After we dropped the locks, 567 * the previous owner of the lock might have released the lock. 568 */ 569 if (orig_waiter && !rt_mutex_owner(orig_lock)) 570 goto out_unlock_pi; 571 572 /* 573 * We dropped all locks after taking a refcount on @task, so 574 * the task might have moved on in the lock chain or even left 575 * the chain completely and blocks now on an unrelated lock or 576 * on @orig_lock. 577 * 578 * We stored the lock on which @task was blocked in @next_lock, 579 * so we can detect the chain change. 580 */ 581 if (next_lock != waiter->lock) 582 goto out_unlock_pi; 583 584 /* 585 * Drop out, when the task has no waiters. Note, 586 * top_waiter can be NULL, when we are in the deboosting 587 * mode! 588 */ 589 if (top_waiter) { 590 if (!task_has_pi_waiters(task)) 591 goto out_unlock_pi; 592 /* 593 * If deadlock detection is off, we stop here if we 594 * are not the top pi waiter of the task. If deadlock 595 * detection is enabled we continue, but stop the 596 * requeueing in the chain walk. 597 */ 598 if (top_waiter != task_top_pi_waiter(task)) { 599 if (!detect_deadlock) 600 goto out_unlock_pi; 601 else 602 requeue = false; 603 } 604 } 605 606 /* 607 * If the waiter priority is the same as the task priority 608 * then there is no further priority adjustment necessary. If 609 * deadlock detection is off, we stop the chain walk. If its 610 * enabled we continue, but stop the requeueing in the chain 611 * walk. 612 */ 613 if (waiter->prio == task->prio) { 614 if (!detect_deadlock) 615 goto out_unlock_pi; 616 else 617 requeue = false; 618 } 619 620 /* 621 * [4] Get the next lock 622 */ 623 lock = waiter->lock; 624 /* 625 * [5] We need to trylock here as we are holding task->pi_lock, 626 * which is the reverse lock order versus the other rtmutex 627 * operations. 628 */ 629 if (!raw_spin_trylock(&lock->wait_lock)) { 630 raw_spin_unlock_irq(&task->pi_lock); 631 cpu_relax(); 632 goto retry; 633 } 634 635 /* 636 * [6] check_exit_conditions_2() protected by task->pi_lock and 637 * lock->wait_lock. 638 * 639 * Deadlock detection. If the lock is the same as the original 640 * lock which caused us to walk the lock chain or if the 641 * current lock is owned by the task which initiated the chain 642 * walk, we detected a deadlock. 643 */ 644 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 645 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); 646 raw_spin_unlock(&lock->wait_lock); 647 ret = -EDEADLK; 648 goto out_unlock_pi; 649 } 650 651 /* 652 * If we just follow the lock chain for deadlock detection, no 653 * need to do all the requeue operations. To avoid a truckload 654 * of conditionals around the various places below, just do the 655 * minimum chain walk checks. 656 */ 657 if (!requeue) { 658 /* 659 * No requeue[7] here. Just release @task [8] 660 */ 661 raw_spin_unlock(&task->pi_lock); 662 put_task_struct(task); 663 664 /* 665 * [9] check_exit_conditions_3 protected by lock->wait_lock. 666 * If there is no owner of the lock, end of chain. 667 */ 668 if (!rt_mutex_owner(lock)) { 669 raw_spin_unlock_irq(&lock->wait_lock); 670 return 0; 671 } 672 673 /* [10] Grab the next task, i.e. owner of @lock */ 674 task = rt_mutex_owner(lock); 675 get_task_struct(task); 676 raw_spin_lock(&task->pi_lock); 677 678 /* 679 * No requeue [11] here. We just do deadlock detection. 680 * 681 * [12] Store whether owner is blocked 682 * itself. Decision is made after dropping the locks 683 */ 684 next_lock = task_blocked_on_lock(task); 685 /* 686 * Get the top waiter for the next iteration 687 */ 688 top_waiter = rt_mutex_top_waiter(lock); 689 690 /* [13] Drop locks */ 691 raw_spin_unlock(&task->pi_lock); 692 raw_spin_unlock_irq(&lock->wait_lock); 693 694 /* If owner is not blocked, end of chain. */ 695 if (!next_lock) 696 goto out_put_task; 697 goto again; 698 } 699 700 /* 701 * Store the current top waiter before doing the requeue 702 * operation on @lock. We need it for the boost/deboost 703 * decision below. 704 */ 705 prerequeue_top_waiter = rt_mutex_top_waiter(lock); 706 707 /* [7] Requeue the waiter in the lock waiter tree. */ 708 rt_mutex_dequeue(lock, waiter); 709 waiter->prio = task->prio; 710 rt_mutex_enqueue(lock, waiter); 711 712 /* [8] Release the task */ 713 raw_spin_unlock(&task->pi_lock); 714 put_task_struct(task); 715 716 /* 717 * [9] check_exit_conditions_3 protected by lock->wait_lock. 718 * 719 * We must abort the chain walk if there is no lock owner even 720 * in the dead lock detection case, as we have nothing to 721 * follow here. This is the end of the chain we are walking. 722 */ 723 if (!rt_mutex_owner(lock)) { 724 /* 725 * If the requeue [7] above changed the top waiter, 726 * then we need to wake the new top waiter up to try 727 * to get the lock. 728 */ 729 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) 730 wake_up_process(rt_mutex_top_waiter(lock)->task); 731 raw_spin_unlock_irq(&lock->wait_lock); 732 return 0; 733 } 734 735 /* [10] Grab the next task, i.e. the owner of @lock */ 736 task = rt_mutex_owner(lock); 737 get_task_struct(task); 738 raw_spin_lock(&task->pi_lock); 739 740 /* [11] requeue the pi waiters if necessary */ 741 if (waiter == rt_mutex_top_waiter(lock)) { 742 /* 743 * The waiter became the new top (highest priority) 744 * waiter on the lock. Replace the previous top waiter 745 * in the owner tasks pi waiters tree with this waiter 746 * and adjust the priority of the owner. 747 */ 748 rt_mutex_dequeue_pi(task, prerequeue_top_waiter); 749 rt_mutex_enqueue_pi(task, waiter); 750 __rt_mutex_adjust_prio(task); 751 752 } else if (prerequeue_top_waiter == waiter) { 753 /* 754 * The waiter was the top waiter on the lock, but is 755 * no longer the top prority waiter. Replace waiter in 756 * the owner tasks pi waiters tree with the new top 757 * (highest priority) waiter and adjust the priority 758 * of the owner. 759 * The new top waiter is stored in @waiter so that 760 * @waiter == @top_waiter evaluates to true below and 761 * we continue to deboost the rest of the chain. 762 */ 763 rt_mutex_dequeue_pi(task, waiter); 764 waiter = rt_mutex_top_waiter(lock); 765 rt_mutex_enqueue_pi(task, waiter); 766 __rt_mutex_adjust_prio(task); 767 } else { 768 /* 769 * Nothing changed. No need to do any priority 770 * adjustment. 771 */ 772 } 773 774 /* 775 * [12] check_exit_conditions_4() protected by task->pi_lock 776 * and lock->wait_lock. The actual decisions are made after we 777 * dropped the locks. 778 * 779 * Check whether the task which owns the current lock is pi 780 * blocked itself. If yes we store a pointer to the lock for 781 * the lock chain change detection above. After we dropped 782 * task->pi_lock next_lock cannot be dereferenced anymore. 783 */ 784 next_lock = task_blocked_on_lock(task); 785 /* 786 * Store the top waiter of @lock for the end of chain walk 787 * decision below. 788 */ 789 top_waiter = rt_mutex_top_waiter(lock); 790 791 /* [13] Drop the locks */ 792 raw_spin_unlock(&task->pi_lock); 793 raw_spin_unlock_irq(&lock->wait_lock); 794 795 /* 796 * Make the actual exit decisions [12], based on the stored 797 * values. 798 * 799 * We reached the end of the lock chain. Stop right here. No 800 * point to go back just to figure that out. 801 */ 802 if (!next_lock) 803 goto out_put_task; 804 805 /* 806 * If the current waiter is not the top waiter on the lock, 807 * then we can stop the chain walk here if we are not in full 808 * deadlock detection mode. 809 */ 810 if (!detect_deadlock && waiter != top_waiter) 811 goto out_put_task; 812 813 goto again; 814 815 out_unlock_pi: 816 raw_spin_unlock_irq(&task->pi_lock); 817 out_put_task: 818 put_task_struct(task); 819 820 return ret; 821 } 822 823 /* 824 * Try to take an rt-mutex 825 * 826 * Must be called with lock->wait_lock held and interrupts disabled 827 * 828 * @lock: The lock to be acquired. 829 * @task: The task which wants to acquire the lock 830 * @waiter: The waiter that is queued to the lock's wait tree if the 831 * callsite called task_blocked_on_lock(), otherwise NULL 832 */ 833 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, 834 struct rt_mutex_waiter *waiter) 835 { 836 /* 837 * Before testing whether we can acquire @lock, we set the 838 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all 839 * other tasks which try to modify @lock into the slow path 840 * and they serialize on @lock->wait_lock. 841 * 842 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state 843 * as explained at the top of this file if and only if: 844 * 845 * - There is a lock owner. The caller must fixup the 846 * transient state if it does a trylock or leaves the lock 847 * function due to a signal or timeout. 848 * 849 * - @task acquires the lock and there are no other 850 * waiters. This is undone in rt_mutex_set_owner(@task) at 851 * the end of this function. 852 */ 853 mark_rt_mutex_waiters(lock); 854 855 /* 856 * If @lock has an owner, give up. 857 */ 858 if (rt_mutex_owner(lock)) 859 return 0; 860 861 /* 862 * If @waiter != NULL, @task has already enqueued the waiter 863 * into @lock waiter tree. If @waiter == NULL then this is a 864 * trylock attempt. 865 */ 866 if (waiter) { 867 /* 868 * If waiter is not the highest priority waiter of 869 * @lock, give up. 870 */ 871 if (waiter != rt_mutex_top_waiter(lock)) 872 return 0; 873 874 /* 875 * We can acquire the lock. Remove the waiter from the 876 * lock waiters tree. 877 */ 878 rt_mutex_dequeue(lock, waiter); 879 880 } else { 881 /* 882 * If the lock has waiters already we check whether @task is 883 * eligible to take over the lock. 884 * 885 * If there are no other waiters, @task can acquire 886 * the lock. @task->pi_blocked_on is NULL, so it does 887 * not need to be dequeued. 888 */ 889 if (rt_mutex_has_waiters(lock)) { 890 /* 891 * If @task->prio is greater than or equal to 892 * the top waiter priority (kernel view), 893 * @task lost. 894 */ 895 if (task->prio >= rt_mutex_top_waiter(lock)->prio) 896 return 0; 897 898 /* 899 * The current top waiter stays enqueued. We 900 * don't have to change anything in the lock 901 * waiters order. 902 */ 903 } else { 904 /* 905 * No waiters. Take the lock without the 906 * pi_lock dance.@task->pi_blocked_on is NULL 907 * and we have no waiters to enqueue in @task 908 * pi waiters tree. 909 */ 910 goto takeit; 911 } 912 } 913 914 /* 915 * Clear @task->pi_blocked_on. Requires protection by 916 * @task->pi_lock. Redundant operation for the @waiter == NULL 917 * case, but conditionals are more expensive than a redundant 918 * store. 919 */ 920 raw_spin_lock(&task->pi_lock); 921 task->pi_blocked_on = NULL; 922 /* 923 * Finish the lock acquisition. @task is the new owner. If 924 * other waiters exist we have to insert the highest priority 925 * waiter into @task->pi_waiters tree. 926 */ 927 if (rt_mutex_has_waiters(lock)) 928 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); 929 raw_spin_unlock(&task->pi_lock); 930 931 takeit: 932 /* We got the lock. */ 933 debug_rt_mutex_lock(lock); 934 935 /* 936 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there 937 * are still waiters or clears it. 938 */ 939 rt_mutex_set_owner(lock, task); 940 941 rt_mutex_deadlock_account_lock(lock, task); 942 943 return 1; 944 } 945 946 /* 947 * Task blocks on lock. 948 * 949 * Prepare waiter and propagate pi chain 950 * 951 * This must be called with lock->wait_lock held and interrupts disabled 952 */ 953 static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 954 struct rt_mutex_waiter *waiter, 955 struct task_struct *task, 956 enum rtmutex_chainwalk chwalk) 957 { 958 struct task_struct *owner = rt_mutex_owner(lock); 959 struct rt_mutex_waiter *top_waiter = waiter; 960 struct rt_mutex *next_lock; 961 int chain_walk = 0, res; 962 963 /* 964 * Early deadlock detection. We really don't want the task to 965 * enqueue on itself just to untangle the mess later. It's not 966 * only an optimization. We drop the locks, so another waiter 967 * can come in before the chain walk detects the deadlock. So 968 * the other will detect the deadlock and return -EDEADLOCK, 969 * which is wrong, as the other waiter is not in a deadlock 970 * situation. 971 */ 972 if (owner == task) 973 return -EDEADLK; 974 975 raw_spin_lock(&task->pi_lock); 976 __rt_mutex_adjust_prio(task); 977 waiter->task = task; 978 waiter->lock = lock; 979 waiter->prio = task->prio; 980 981 /* Get the top priority waiter on the lock */ 982 if (rt_mutex_has_waiters(lock)) 983 top_waiter = rt_mutex_top_waiter(lock); 984 rt_mutex_enqueue(lock, waiter); 985 986 task->pi_blocked_on = waiter; 987 988 raw_spin_unlock(&task->pi_lock); 989 990 if (!owner) 991 return 0; 992 993 raw_spin_lock(&owner->pi_lock); 994 if (waiter == rt_mutex_top_waiter(lock)) { 995 rt_mutex_dequeue_pi(owner, top_waiter); 996 rt_mutex_enqueue_pi(owner, waiter); 997 998 __rt_mutex_adjust_prio(owner); 999 if (owner->pi_blocked_on) 1000 chain_walk = 1; 1001 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { 1002 chain_walk = 1; 1003 } 1004 1005 /* Store the lock on which owner is blocked or NULL */ 1006 next_lock = task_blocked_on_lock(owner); 1007 1008 raw_spin_unlock(&owner->pi_lock); 1009 /* 1010 * Even if full deadlock detection is on, if the owner is not 1011 * blocked itself, we can avoid finding this out in the chain 1012 * walk. 1013 */ 1014 if (!chain_walk || !next_lock) 1015 return 0; 1016 1017 /* 1018 * The owner can't disappear while holding a lock, 1019 * so the owner struct is protected by wait_lock. 1020 * Gets dropped in rt_mutex_adjust_prio_chain()! 1021 */ 1022 get_task_struct(owner); 1023 1024 raw_spin_unlock_irq(&lock->wait_lock); 1025 1026 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, 1027 next_lock, waiter, task); 1028 1029 raw_spin_lock_irq(&lock->wait_lock); 1030 1031 return res; 1032 } 1033 1034 /* 1035 * Remove the top waiter from the current tasks pi waiter tree and 1036 * queue it up. 1037 * 1038 * Called with lock->wait_lock held and interrupts disabled. 1039 */ 1040 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, 1041 struct rt_mutex *lock) 1042 { 1043 struct rt_mutex_waiter *waiter; 1044 1045 raw_spin_lock(¤t->pi_lock); 1046 1047 waiter = rt_mutex_top_waiter(lock); 1048 1049 /* 1050 * Remove it from current->pi_waiters. We do not adjust a 1051 * possible priority boost right now. We execute wakeup in the 1052 * boosted mode and go back to normal after releasing 1053 * lock->wait_lock. 1054 */ 1055 rt_mutex_dequeue_pi(current, waiter); 1056 1057 /* 1058 * As we are waking up the top waiter, and the waiter stays 1059 * queued on the lock until it gets the lock, this lock 1060 * obviously has waiters. Just set the bit here and this has 1061 * the added benefit of forcing all new tasks into the 1062 * slow path making sure no task of lower priority than 1063 * the top waiter can steal this lock. 1064 */ 1065 lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 1066 1067 raw_spin_unlock(¤t->pi_lock); 1068 1069 wake_q_add(wake_q, waiter->task); 1070 } 1071 1072 /* 1073 * Remove a waiter from a lock and give up 1074 * 1075 * Must be called with lock->wait_lock held and interrupts disabled. I must 1076 * have just failed to try_to_take_rt_mutex(). 1077 */ 1078 static void remove_waiter(struct rt_mutex *lock, 1079 struct rt_mutex_waiter *waiter) 1080 { 1081 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); 1082 struct task_struct *owner = rt_mutex_owner(lock); 1083 struct rt_mutex *next_lock; 1084 1085 raw_spin_lock(¤t->pi_lock); 1086 rt_mutex_dequeue(lock, waiter); 1087 current->pi_blocked_on = NULL; 1088 raw_spin_unlock(¤t->pi_lock); 1089 1090 /* 1091 * Only update priority if the waiter was the highest priority 1092 * waiter of the lock and there is an owner to update. 1093 */ 1094 if (!owner || !is_top_waiter) 1095 return; 1096 1097 raw_spin_lock(&owner->pi_lock); 1098 1099 rt_mutex_dequeue_pi(owner, waiter); 1100 1101 if (rt_mutex_has_waiters(lock)) 1102 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); 1103 1104 __rt_mutex_adjust_prio(owner); 1105 1106 /* Store the lock on which owner is blocked or NULL */ 1107 next_lock = task_blocked_on_lock(owner); 1108 1109 raw_spin_unlock(&owner->pi_lock); 1110 1111 /* 1112 * Don't walk the chain, if the owner task is not blocked 1113 * itself. 1114 */ 1115 if (!next_lock) 1116 return; 1117 1118 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1119 get_task_struct(owner); 1120 1121 raw_spin_unlock_irq(&lock->wait_lock); 1122 1123 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, 1124 next_lock, NULL, current); 1125 1126 raw_spin_lock_irq(&lock->wait_lock); 1127 } 1128 1129 /* 1130 * Recheck the pi chain, in case we got a priority setting 1131 * 1132 * Called from sched_setscheduler 1133 */ 1134 void rt_mutex_adjust_pi(struct task_struct *task) 1135 { 1136 struct rt_mutex_waiter *waiter; 1137 struct rt_mutex *next_lock; 1138 unsigned long flags; 1139 1140 raw_spin_lock_irqsave(&task->pi_lock, flags); 1141 1142 waiter = task->pi_blocked_on; 1143 if (!waiter || (waiter->prio == task->prio && 1144 !dl_prio(task->prio))) { 1145 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1146 return; 1147 } 1148 next_lock = waiter->lock; 1149 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 1150 1151 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 1152 get_task_struct(task); 1153 1154 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, 1155 next_lock, NULL, task); 1156 } 1157 1158 /** 1159 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 1160 * @lock: the rt_mutex to take 1161 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1162 * or TASK_UNINTERRUPTIBLE) 1163 * @timeout: the pre-initialized and started timer, or NULL for none 1164 * @waiter: the pre-initialized rt_mutex_waiter 1165 * 1166 * Must be called with lock->wait_lock held and interrupts disabled 1167 */ 1168 static int __sched 1169 __rt_mutex_slowlock(struct rt_mutex *lock, int state, 1170 struct hrtimer_sleeper *timeout, 1171 struct rt_mutex_waiter *waiter) 1172 { 1173 int ret = 0; 1174 1175 for (;;) { 1176 /* Try to acquire the lock: */ 1177 if (try_to_take_rt_mutex(lock, current, waiter)) 1178 break; 1179 1180 /* 1181 * TASK_INTERRUPTIBLE checks for signals and 1182 * timeout. Ignored otherwise. 1183 */ 1184 if (likely(state == TASK_INTERRUPTIBLE)) { 1185 /* Signal pending? */ 1186 if (signal_pending(current)) 1187 ret = -EINTR; 1188 if (timeout && !timeout->task) 1189 ret = -ETIMEDOUT; 1190 if (ret) 1191 break; 1192 } 1193 1194 raw_spin_unlock_irq(&lock->wait_lock); 1195 1196 debug_rt_mutex_print_deadlock(waiter); 1197 1198 schedule(); 1199 1200 raw_spin_lock_irq(&lock->wait_lock); 1201 set_current_state(state); 1202 } 1203 1204 __set_current_state(TASK_RUNNING); 1205 return ret; 1206 } 1207 1208 static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 1209 struct rt_mutex_waiter *w) 1210 { 1211 /* 1212 * If the result is not -EDEADLOCK or the caller requested 1213 * deadlock detection, nothing to do here. 1214 */ 1215 if (res != -EDEADLOCK || detect_deadlock) 1216 return; 1217 1218 /* 1219 * Yell lowdly and stop the task right here. 1220 */ 1221 rt_mutex_print_deadlock(w); 1222 while (1) { 1223 set_current_state(TASK_INTERRUPTIBLE); 1224 schedule(); 1225 } 1226 } 1227 1228 /* 1229 * Slow path lock function: 1230 */ 1231 static int __sched 1232 rt_mutex_slowlock(struct rt_mutex *lock, int state, 1233 struct hrtimer_sleeper *timeout, 1234 enum rtmutex_chainwalk chwalk) 1235 { 1236 struct rt_mutex_waiter waiter; 1237 unsigned long flags; 1238 int ret = 0; 1239 1240 debug_rt_mutex_init_waiter(&waiter); 1241 RB_CLEAR_NODE(&waiter.pi_tree_entry); 1242 RB_CLEAR_NODE(&waiter.tree_entry); 1243 1244 /* 1245 * Technically we could use raw_spin_[un]lock_irq() here, but this can 1246 * be called in early boot if the cmpxchg() fast path is disabled 1247 * (debug, no architecture support). In this case we will acquire the 1248 * rtmutex with lock->wait_lock held. But we cannot unconditionally 1249 * enable interrupts in that early boot case. So we need to use the 1250 * irqsave/restore variants. 1251 */ 1252 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1253 1254 /* Try to acquire the lock again: */ 1255 if (try_to_take_rt_mutex(lock, current, NULL)) { 1256 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1257 return 0; 1258 } 1259 1260 set_current_state(state); 1261 1262 /* Setup the timer, when timeout != NULL */ 1263 if (unlikely(timeout)) 1264 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1265 1266 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1267 1268 if (likely(!ret)) 1269 /* sleep on the mutex */ 1270 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); 1271 1272 if (unlikely(ret)) { 1273 __set_current_state(TASK_RUNNING); 1274 if (rt_mutex_has_waiters(lock)) 1275 remove_waiter(lock, &waiter); 1276 rt_mutex_handle_deadlock(ret, chwalk, &waiter); 1277 } 1278 1279 /* 1280 * try_to_take_rt_mutex() sets the waiter bit 1281 * unconditionally. We might have to fix that up. 1282 */ 1283 fixup_rt_mutex_waiters(lock); 1284 1285 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1286 1287 /* Remove pending timer: */ 1288 if (unlikely(timeout)) 1289 hrtimer_cancel(&timeout->timer); 1290 1291 debug_rt_mutex_free_waiter(&waiter); 1292 1293 return ret; 1294 } 1295 1296 /* 1297 * Slow path try-lock function: 1298 */ 1299 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) 1300 { 1301 unsigned long flags; 1302 int ret; 1303 1304 /* 1305 * If the lock already has an owner we fail to get the lock. 1306 * This can be done without taking the @lock->wait_lock as 1307 * it is only being read, and this is a trylock anyway. 1308 */ 1309 if (rt_mutex_owner(lock)) 1310 return 0; 1311 1312 /* 1313 * The mutex has currently no owner. Lock the wait lock and try to 1314 * acquire the lock. We use irqsave here to support early boot calls. 1315 */ 1316 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1317 1318 ret = try_to_take_rt_mutex(lock, current, NULL); 1319 1320 /* 1321 * try_to_take_rt_mutex() sets the lock waiters bit 1322 * unconditionally. Clean this up. 1323 */ 1324 fixup_rt_mutex_waiters(lock); 1325 1326 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1327 1328 return ret; 1329 } 1330 1331 /* 1332 * Slow path to release a rt-mutex. 1333 * Return whether the current task needs to undo a potential priority boosting. 1334 */ 1335 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, 1336 struct wake_q_head *wake_q) 1337 { 1338 unsigned long flags; 1339 1340 /* irqsave required to support early boot calls */ 1341 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1342 1343 debug_rt_mutex_unlock(lock); 1344 1345 rt_mutex_deadlock_account_unlock(current); 1346 1347 /* 1348 * We must be careful here if the fast path is enabled. If we 1349 * have no waiters queued we cannot set owner to NULL here 1350 * because of: 1351 * 1352 * foo->lock->owner = NULL; 1353 * rtmutex_lock(foo->lock); <- fast path 1354 * free = atomic_dec_and_test(foo->refcnt); 1355 * rtmutex_unlock(foo->lock); <- fast path 1356 * if (free) 1357 * kfree(foo); 1358 * raw_spin_unlock(foo->lock->wait_lock); 1359 * 1360 * So for the fastpath enabled kernel: 1361 * 1362 * Nothing can set the waiters bit as long as we hold 1363 * lock->wait_lock. So we do the following sequence: 1364 * 1365 * owner = rt_mutex_owner(lock); 1366 * clear_rt_mutex_waiters(lock); 1367 * raw_spin_unlock(&lock->wait_lock); 1368 * if (cmpxchg(&lock->owner, owner, 0) == owner) 1369 * return; 1370 * goto retry; 1371 * 1372 * The fastpath disabled variant is simple as all access to 1373 * lock->owner is serialized by lock->wait_lock: 1374 * 1375 * lock->owner = NULL; 1376 * raw_spin_unlock(&lock->wait_lock); 1377 */ 1378 while (!rt_mutex_has_waiters(lock)) { 1379 /* Drops lock->wait_lock ! */ 1380 if (unlock_rt_mutex_safe(lock, flags) == true) 1381 return false; 1382 /* Relock the rtmutex and try again */ 1383 raw_spin_lock_irqsave(&lock->wait_lock, flags); 1384 } 1385 1386 /* 1387 * The wakeup next waiter path does not suffer from the above 1388 * race. See the comments there. 1389 * 1390 * Queue the next waiter for wakeup once we release the wait_lock. 1391 */ 1392 mark_wakeup_next_waiter(wake_q, lock); 1393 1394 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 1395 1396 /* check PI boosting */ 1397 return true; 1398 } 1399 1400 /* 1401 * debug aware fast / slowpath lock,trylock,unlock 1402 * 1403 * The atomic acquire/release ops are compiled away, when either the 1404 * architecture does not support cmpxchg or when debugging is enabled. 1405 */ 1406 static inline int 1407 rt_mutex_fastlock(struct rt_mutex *lock, int state, 1408 int (*slowfn)(struct rt_mutex *lock, int state, 1409 struct hrtimer_sleeper *timeout, 1410 enum rtmutex_chainwalk chwalk)) 1411 { 1412 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1413 rt_mutex_deadlock_account_lock(lock, current); 1414 return 0; 1415 } else 1416 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); 1417 } 1418 1419 static inline int 1420 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, 1421 struct hrtimer_sleeper *timeout, 1422 enum rtmutex_chainwalk chwalk, 1423 int (*slowfn)(struct rt_mutex *lock, int state, 1424 struct hrtimer_sleeper *timeout, 1425 enum rtmutex_chainwalk chwalk)) 1426 { 1427 if (chwalk == RT_MUTEX_MIN_CHAINWALK && 1428 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1429 rt_mutex_deadlock_account_lock(lock, current); 1430 return 0; 1431 } else 1432 return slowfn(lock, state, timeout, chwalk); 1433 } 1434 1435 static inline int 1436 rt_mutex_fasttrylock(struct rt_mutex *lock, 1437 int (*slowfn)(struct rt_mutex *lock)) 1438 { 1439 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { 1440 rt_mutex_deadlock_account_lock(lock, current); 1441 return 1; 1442 } 1443 return slowfn(lock); 1444 } 1445 1446 static inline void 1447 rt_mutex_fastunlock(struct rt_mutex *lock, 1448 bool (*slowfn)(struct rt_mutex *lock, 1449 struct wake_q_head *wqh)) 1450 { 1451 DEFINE_WAKE_Q(wake_q); 1452 1453 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { 1454 rt_mutex_deadlock_account_unlock(current); 1455 1456 } else { 1457 bool deboost = slowfn(lock, &wake_q); 1458 1459 wake_up_q(&wake_q); 1460 1461 /* Undo pi boosting if necessary: */ 1462 if (deboost) 1463 rt_mutex_adjust_prio(current); 1464 } 1465 } 1466 1467 /** 1468 * rt_mutex_lock - lock a rt_mutex 1469 * 1470 * @lock: the rt_mutex to be locked 1471 */ 1472 void __sched rt_mutex_lock(struct rt_mutex *lock) 1473 { 1474 might_sleep(); 1475 1476 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); 1477 } 1478 EXPORT_SYMBOL_GPL(rt_mutex_lock); 1479 1480 /** 1481 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible 1482 * 1483 * @lock: the rt_mutex to be locked 1484 * 1485 * Returns: 1486 * 0 on success 1487 * -EINTR when interrupted by a signal 1488 */ 1489 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 1490 { 1491 might_sleep(); 1492 1493 return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); 1494 } 1495 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 1496 1497 /* 1498 * Futex variant with full deadlock detection. 1499 */ 1500 int rt_mutex_timed_futex_lock(struct rt_mutex *lock, 1501 struct hrtimer_sleeper *timeout) 1502 { 1503 might_sleep(); 1504 1505 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1506 RT_MUTEX_FULL_CHAINWALK, 1507 rt_mutex_slowlock); 1508 } 1509 1510 /** 1511 * rt_mutex_timed_lock - lock a rt_mutex interruptible 1512 * the timeout structure is provided 1513 * by the caller 1514 * 1515 * @lock: the rt_mutex to be locked 1516 * @timeout: timeout structure or NULL (no timeout) 1517 * 1518 * Returns: 1519 * 0 on success 1520 * -EINTR when interrupted by a signal 1521 * -ETIMEDOUT when the timeout expired 1522 */ 1523 int 1524 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) 1525 { 1526 might_sleep(); 1527 1528 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1529 RT_MUTEX_MIN_CHAINWALK, 1530 rt_mutex_slowlock); 1531 } 1532 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); 1533 1534 /** 1535 * rt_mutex_trylock - try to lock a rt_mutex 1536 * 1537 * @lock: the rt_mutex to be locked 1538 * 1539 * This function can only be called in thread context. It's safe to 1540 * call it from atomic regions, but not from hard interrupt or soft 1541 * interrupt context. 1542 * 1543 * Returns 1 on success and 0 on contention 1544 */ 1545 int __sched rt_mutex_trylock(struct rt_mutex *lock) 1546 { 1547 if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) 1548 return 0; 1549 1550 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1551 } 1552 EXPORT_SYMBOL_GPL(rt_mutex_trylock); 1553 1554 /** 1555 * rt_mutex_unlock - unlock a rt_mutex 1556 * 1557 * @lock: the rt_mutex to be unlocked 1558 */ 1559 void __sched rt_mutex_unlock(struct rt_mutex *lock) 1560 { 1561 rt_mutex_fastunlock(lock, rt_mutex_slowunlock); 1562 } 1563 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 1564 1565 /** 1566 * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock 1567 * @lock: the rt_mutex to be unlocked 1568 * 1569 * Returns: true/false indicating whether priority adjustment is 1570 * required or not. 1571 */ 1572 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, 1573 struct wake_q_head *wqh) 1574 { 1575 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { 1576 rt_mutex_deadlock_account_unlock(current); 1577 return false; 1578 } 1579 return rt_mutex_slowunlock(lock, wqh); 1580 } 1581 1582 /** 1583 * rt_mutex_destroy - mark a mutex unusable 1584 * @lock: the mutex to be destroyed 1585 * 1586 * This function marks the mutex uninitialized, and any subsequent 1587 * use of the mutex is forbidden. The mutex must not be locked when 1588 * this function is called. 1589 */ 1590 void rt_mutex_destroy(struct rt_mutex *lock) 1591 { 1592 WARN_ON(rt_mutex_is_locked(lock)); 1593 #ifdef CONFIG_DEBUG_RT_MUTEXES 1594 lock->magic = NULL; 1595 #endif 1596 } 1597 1598 EXPORT_SYMBOL_GPL(rt_mutex_destroy); 1599 1600 /** 1601 * __rt_mutex_init - initialize the rt lock 1602 * 1603 * @lock: the rt lock to be initialized 1604 * 1605 * Initialize the rt lock to unlocked state. 1606 * 1607 * Initializing of a locked rt lock is not allowed 1608 */ 1609 void __rt_mutex_init(struct rt_mutex *lock, const char *name) 1610 { 1611 lock->owner = NULL; 1612 raw_spin_lock_init(&lock->wait_lock); 1613 lock->waiters = RB_ROOT; 1614 lock->waiters_leftmost = NULL; 1615 1616 debug_rt_mutex_init(lock, name); 1617 } 1618 EXPORT_SYMBOL_GPL(__rt_mutex_init); 1619 1620 /** 1621 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a 1622 * proxy owner 1623 * 1624 * @lock: the rt_mutex to be locked 1625 * @proxy_owner:the task to set as owner 1626 * 1627 * No locking. Caller has to do serializing itself 1628 * 1629 * Special API call for PI-futex support. This initializes the rtmutex and 1630 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not 1631 * possible at this point because the pi_state which contains the rtmutex 1632 * is not yet visible to other tasks. 1633 */ 1634 void rt_mutex_init_proxy_locked(struct rt_mutex *lock, 1635 struct task_struct *proxy_owner) 1636 { 1637 __rt_mutex_init(lock, NULL); 1638 debug_rt_mutex_proxy_lock(lock, proxy_owner); 1639 rt_mutex_set_owner(lock, proxy_owner); 1640 rt_mutex_deadlock_account_lock(lock, proxy_owner); 1641 } 1642 1643 /** 1644 * rt_mutex_proxy_unlock - release a lock on behalf of owner 1645 * 1646 * @lock: the rt_mutex to be locked 1647 * 1648 * No locking. Caller has to do serializing itself 1649 * 1650 * Special API call for PI-futex support. This merrily cleans up the rtmutex 1651 * (debugging) state. Concurrent operations on this rt_mutex are not 1652 * possible because it belongs to the pi_state which is about to be freed 1653 * and it is not longer visible to other tasks. 1654 */ 1655 void rt_mutex_proxy_unlock(struct rt_mutex *lock, 1656 struct task_struct *proxy_owner) 1657 { 1658 debug_rt_mutex_proxy_unlock(lock); 1659 rt_mutex_set_owner(lock, NULL); 1660 rt_mutex_deadlock_account_unlock(proxy_owner); 1661 } 1662 1663 /** 1664 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task 1665 * @lock: the rt_mutex to take 1666 * @waiter: the pre-initialized rt_mutex_waiter 1667 * @task: the task to prepare 1668 * 1669 * Returns: 1670 * 0 - task blocked on lock 1671 * 1 - acquired the lock for task, caller should wake it up 1672 * <0 - error 1673 * 1674 * Special API call for FUTEX_REQUEUE_PI support. 1675 */ 1676 int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1677 struct rt_mutex_waiter *waiter, 1678 struct task_struct *task) 1679 { 1680 int ret; 1681 1682 raw_spin_lock_irq(&lock->wait_lock); 1683 1684 if (try_to_take_rt_mutex(lock, task, NULL)) { 1685 raw_spin_unlock_irq(&lock->wait_lock); 1686 return 1; 1687 } 1688 1689 /* We enforce deadlock detection for futexes */ 1690 ret = task_blocks_on_rt_mutex(lock, waiter, task, 1691 RT_MUTEX_FULL_CHAINWALK); 1692 1693 if (ret && !rt_mutex_owner(lock)) { 1694 /* 1695 * Reset the return value. We might have 1696 * returned with -EDEADLK and the owner 1697 * released the lock while we were walking the 1698 * pi chain. Let the waiter sort it out. 1699 */ 1700 ret = 0; 1701 } 1702 1703 if (unlikely(ret)) 1704 remove_waiter(lock, waiter); 1705 1706 raw_spin_unlock_irq(&lock->wait_lock); 1707 1708 debug_rt_mutex_print_deadlock(waiter); 1709 1710 return ret; 1711 } 1712 1713 /** 1714 * rt_mutex_next_owner - return the next owner of the lock 1715 * 1716 * @lock: the rt lock query 1717 * 1718 * Returns the next owner of the lock or NULL 1719 * 1720 * Caller has to serialize against other accessors to the lock 1721 * itself. 1722 * 1723 * Special API call for PI-futex support 1724 */ 1725 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) 1726 { 1727 if (!rt_mutex_has_waiters(lock)) 1728 return NULL; 1729 1730 return rt_mutex_top_waiter(lock)->task; 1731 } 1732 1733 /** 1734 * rt_mutex_finish_proxy_lock() - Complete lock acquisition 1735 * @lock: the rt_mutex we were woken on 1736 * @to: the timeout, null if none. hrtimer should already have 1737 * been started. 1738 * @waiter: the pre-initialized rt_mutex_waiter 1739 * 1740 * Complete the lock acquisition started our behalf by another thread. 1741 * 1742 * Returns: 1743 * 0 - success 1744 * <0 - error, one of -EINTR, -ETIMEDOUT 1745 * 1746 * Special API call for PI-futex requeue support 1747 */ 1748 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, 1749 struct hrtimer_sleeper *to, 1750 struct rt_mutex_waiter *waiter) 1751 { 1752 int ret; 1753 1754 raw_spin_lock_irq(&lock->wait_lock); 1755 1756 set_current_state(TASK_INTERRUPTIBLE); 1757 1758 /* sleep on the mutex */ 1759 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); 1760 1761 if (unlikely(ret)) 1762 remove_waiter(lock, waiter); 1763 1764 /* 1765 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1766 * have to fix that up. 1767 */ 1768 fixup_rt_mutex_waiters(lock); 1769 1770 raw_spin_unlock_irq(&lock->wait_lock); 1771 1772 return ret; 1773 } 1774