1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * rtmutex API 4 */ 5 #include <linux/spinlock.h> 6 #include <linux/export.h> 7 8 #define RT_MUTEX_BUILD_MUTEX 9 #include "rtmutex.c" 10 11 /* 12 * Max number of times we'll walk the boosting chain: 13 */ 14 int max_lock_depth = 1024; 15 16 static const struct ctl_table rtmutex_sysctl_table[] = { 17 { 18 .procname = "max_lock_depth", 19 .data = &max_lock_depth, 20 .maxlen = sizeof(int), 21 .mode = 0644, 22 .proc_handler = proc_dointvec, 23 }, 24 }; 25 26 static int __init init_rtmutex_sysctl(void) 27 { 28 register_sysctl_init("kernel", rtmutex_sysctl_table); 29 return 0; 30 } 31 32 subsys_initcall(init_rtmutex_sysctl); 33 34 /* 35 * Debug aware fast / slowpath lock,trylock,unlock 36 * 37 * The atomic acquire/release ops are compiled away, when either the 38 * architecture does not support cmpxchg or when debugging is enabled. 39 */ 40 static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, 41 unsigned int state, 42 struct lockdep_map *nest_lock, 43 unsigned int subclass) 44 { 45 int ret; 46 47 might_sleep(); 48 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_); 49 ret = __rt_mutex_lock(&lock->rtmutex, state); 50 if (ret) 51 mutex_release(&lock->dep_map, _RET_IP_); 52 return ret; 53 } 54 55 void rt_mutex_base_init(struct rt_mutex_base *rtb) 56 { 57 __rt_mutex_base_init(rtb); 58 } 59 EXPORT_SYMBOL(rt_mutex_base_init); 60 61 #ifdef CONFIG_DEBUG_LOCK_ALLOC 62 /** 63 * rt_mutex_lock_nested - lock a rt_mutex 64 * 65 * @lock: the rt_mutex to be locked 66 * @subclass: the lockdep subclass 67 */ 68 void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) 69 { 70 __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass); 71 } 72 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); 73 74 void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock) 75 { 76 __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0); 77 } 78 EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); 79 80 #else /* !CONFIG_DEBUG_LOCK_ALLOC */ 81 82 /** 83 * rt_mutex_lock - lock a rt_mutex 84 * 85 * @lock: the rt_mutex to be locked 86 */ 87 void __sched rt_mutex_lock(struct rt_mutex *lock) 88 { 89 __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0); 90 } 91 EXPORT_SYMBOL_GPL(rt_mutex_lock); 92 #endif 93 94 /** 95 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible 96 * 97 * @lock: the rt_mutex to be locked 98 * 99 * Returns: 100 * 0 on success 101 * -EINTR when interrupted by a signal 102 */ 103 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) 104 { 105 return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0); 106 } 107 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); 108 109 /** 110 * rt_mutex_lock_killable - lock a rt_mutex killable 111 * 112 * @lock: the rt_mutex to be locked 113 * 114 * Returns: 115 * 0 on success 116 * -EINTR when interrupted by a signal 117 */ 118 int __sched rt_mutex_lock_killable(struct rt_mutex *lock) 119 { 120 return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0); 121 } 122 EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); 123 124 /** 125 * rt_mutex_trylock - try to lock a rt_mutex 126 * 127 * @lock: the rt_mutex to be locked 128 * 129 * This function can only be called in thread context. It's safe to call it 130 * from atomic regions, but not from hard or soft interrupt context. 131 * 132 * Returns: 133 * 1 on success 134 * 0 on contention 135 */ 136 int __sched rt_mutex_trylock(struct rt_mutex *lock) 137 { 138 int ret; 139 140 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) 141 return 0; 142 143 ret = __rt_mutex_trylock(&lock->rtmutex); 144 if (ret) 145 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 146 147 return ret; 148 } 149 EXPORT_SYMBOL_GPL(rt_mutex_trylock); 150 151 /** 152 * rt_mutex_unlock - unlock a rt_mutex 153 * 154 * @lock: the rt_mutex to be unlocked 155 */ 156 void __sched rt_mutex_unlock(struct rt_mutex *lock) 157 { 158 mutex_release(&lock->dep_map, _RET_IP_); 159 __rt_mutex_unlock(&lock->rtmutex); 160 } 161 EXPORT_SYMBOL_GPL(rt_mutex_unlock); 162 163 /* 164 * Futex variants, must not use fastpath. 165 */ 166 int __sched rt_mutex_futex_trylock(struct rt_mutex_base *lock) 167 { 168 return rt_mutex_slowtrylock(lock); 169 } 170 171 int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock) 172 { 173 return __rt_mutex_slowtrylock(lock); 174 } 175 176 /** 177 * __rt_mutex_futex_unlock - Futex variant, that since futex variants 178 * do not use the fast-path, can be simple and will not need to retry. 179 * 180 * @lock: The rt_mutex to be unlocked 181 * @wqh: The wake queue head from which to get the next lock waiter 182 */ 183 bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, 184 struct rt_wake_q_head *wqh) 185 { 186 lockdep_assert_held(&lock->wait_lock); 187 188 debug_rt_mutex_unlock(lock); 189 190 if (!rt_mutex_has_waiters(lock)) { 191 lock->owner = NULL; 192 return false; /* done */ 193 } 194 195 /* 196 * mark_wakeup_next_waiter() deboosts and retains preemption 197 * disabled when dropping the wait_lock, to avoid inversion prior 198 * to the wakeup. preempt_disable() therein pairs with the 199 * preempt_enable() in rt_mutex_postunlock(). 200 */ 201 mark_wakeup_next_waiter(wqh, lock); 202 203 return true; /* call postunlock() */ 204 } 205 206 void __sched rt_mutex_futex_unlock(struct rt_mutex_base *lock) 207 { 208 DEFINE_RT_WAKE_Q(wqh); 209 unsigned long flags; 210 bool postunlock; 211 212 raw_spin_lock_irqsave(&lock->wait_lock, flags); 213 postunlock = __rt_mutex_futex_unlock(lock, &wqh); 214 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 215 216 if (postunlock) 217 rt_mutex_postunlock(&wqh); 218 } 219 220 /** 221 * __rt_mutex_init - initialize the rt_mutex 222 * 223 * @lock: The rt_mutex to be initialized 224 * @name: The lock name used for debugging 225 * @key: The lock class key used for debugging 226 * 227 * Initialize the rt_mutex to unlocked state. 228 * 229 * Initializing of a locked rt_mutex is not allowed 230 */ 231 void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, 232 struct lock_class_key *key) 233 { 234 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 235 __rt_mutex_base_init(&lock->rtmutex); 236 lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); 237 } 238 EXPORT_SYMBOL_GPL(__rt_mutex_init); 239 240 /** 241 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a 242 * proxy owner 243 * 244 * @lock: the rt_mutex to be locked 245 * @proxy_owner:the task to set as owner 246 * 247 * No locking. Caller has to do serializing itself 248 * 249 * Special API call for PI-futex support. This initializes the rtmutex and 250 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not 251 * possible at this point because the pi_state which contains the rtmutex 252 * is not yet visible to other tasks. 253 */ 254 void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, 255 struct task_struct *proxy_owner) 256 { 257 static struct lock_class_key pi_futex_key; 258 259 __rt_mutex_base_init(lock); 260 /* 261 * On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping' 262 * and rtmutex based. That causes a lockdep false positive, because 263 * some of the futex functions invoke spin_unlock(&hb->lock) with 264 * the wait_lock of the rtmutex associated to the pi_futex held. 265 * spin_unlock() in turn takes wait_lock of the rtmutex on which 266 * the spinlock is based, which makes lockdep notice a lock 267 * recursion. Give the futex/rtmutex wait_lock a separate key. 268 */ 269 lockdep_set_class(&lock->wait_lock, &pi_futex_key); 270 rt_mutex_set_owner(lock, proxy_owner); 271 } 272 273 /** 274 * rt_mutex_proxy_unlock - release a lock on behalf of owner 275 * 276 * @lock: the rt_mutex to be locked 277 * 278 * No locking. Caller has to do serializing itself 279 * 280 * Special API call for PI-futex support. This just cleans up the rtmutex 281 * (debugging) state. Concurrent operations on this rt_mutex are not 282 * possible because it belongs to the pi_state which is about to be freed 283 * and it is not longer visible to other tasks. 284 */ 285 void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) 286 { 287 debug_rt_mutex_proxy_unlock(lock); 288 rt_mutex_clear_owner(lock); 289 } 290 291 /** 292 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task 293 * @lock: the rt_mutex to take 294 * @waiter: the pre-initialized rt_mutex_waiter 295 * @task: the task to prepare 296 * @wake_q: the wake_q to wake tasks after we release the wait_lock 297 * 298 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock 299 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. 300 * 301 * NOTE: does _NOT_ remove the @waiter on failure; must either call 302 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. 303 * 304 * Returns: 305 * 0 - task blocked on lock 306 * 1 - acquired the lock for task, caller should wake it up 307 * <0 - error 308 * 309 * Special API call for PI-futex support. 310 */ 311 int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, 312 struct rt_mutex_waiter *waiter, 313 struct task_struct *task, 314 struct wake_q_head *wake_q) 315 { 316 int ret; 317 318 lockdep_assert_held(&lock->wait_lock); 319 320 if (try_to_take_rt_mutex(lock, task, NULL)) 321 return 1; 322 323 /* We enforce deadlock detection for futexes */ 324 ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL, 325 RT_MUTEX_FULL_CHAINWALK, wake_q); 326 327 if (ret && !rt_mutex_owner(lock)) { 328 /* 329 * Reset the return value. We might have 330 * returned with -EDEADLK and the owner 331 * released the lock while we were walking the 332 * pi chain. Let the waiter sort it out. 333 */ 334 ret = 0; 335 } 336 337 return ret; 338 } 339 340 /** 341 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task 342 * @lock: the rt_mutex to take 343 * @waiter: the pre-initialized rt_mutex_waiter 344 * @task: the task to prepare 345 * 346 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock 347 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. 348 * 349 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter 350 * on failure. 351 * 352 * Returns: 353 * 0 - task blocked on lock 354 * 1 - acquired the lock for task, caller should wake it up 355 * <0 - error 356 * 357 * Special API call for PI-futex support. 358 */ 359 int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, 360 struct rt_mutex_waiter *waiter, 361 struct task_struct *task) 362 { 363 int ret; 364 DEFINE_WAKE_Q(wake_q); 365 366 raw_spin_lock_irq(&lock->wait_lock); 367 ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q); 368 if (unlikely(ret)) 369 remove_waiter(lock, waiter); 370 preempt_disable(); 371 raw_spin_unlock_irq(&lock->wait_lock); 372 wake_up_q(&wake_q); 373 preempt_enable(); 374 375 return ret; 376 } 377 378 /** 379 * rt_mutex_wait_proxy_lock() - Wait for lock acquisition 380 * @lock: the rt_mutex we were woken on 381 * @to: the timeout, null if none. hrtimer should already have 382 * been started. 383 * @waiter: the pre-initialized rt_mutex_waiter 384 * 385 * Wait for the lock acquisition started on our behalf by 386 * rt_mutex_start_proxy_lock(). Upon failure, the caller must call 387 * rt_mutex_cleanup_proxy_lock(). 388 * 389 * Returns: 390 * 0 - success 391 * <0 - error, one of -EINTR, -ETIMEDOUT 392 * 393 * Special API call for PI-futex support 394 */ 395 int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock, 396 struct hrtimer_sleeper *to, 397 struct rt_mutex_waiter *waiter) 398 { 399 int ret; 400 401 raw_spin_lock_irq(&lock->wait_lock); 402 /* sleep on the mutex */ 403 set_current_state(TASK_INTERRUPTIBLE); 404 ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter, NULL); 405 /* 406 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 407 * have to fix that up. 408 */ 409 fixup_rt_mutex_waiters(lock, true); 410 raw_spin_unlock_irq(&lock->wait_lock); 411 412 return ret; 413 } 414 415 /** 416 * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition 417 * @lock: the rt_mutex we were woken on 418 * @waiter: the pre-initialized rt_mutex_waiter 419 * 420 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or 421 * rt_mutex_wait_proxy_lock(). 422 * 423 * Unless we acquired the lock; we're still enqueued on the wait-list and can 424 * in fact still be granted ownership until we're removed. Therefore we can 425 * find we are in fact the owner and must disregard the 426 * rt_mutex_wait_proxy_lock() failure. 427 * 428 * Returns: 429 * true - did the cleanup, we done. 430 * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, 431 * caller should disregards its return value. 432 * 433 * Special API call for PI-futex support 434 */ 435 bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock, 436 struct rt_mutex_waiter *waiter) 437 { 438 bool cleanup = false; 439 440 raw_spin_lock_irq(&lock->wait_lock); 441 /* 442 * Do an unconditional try-lock, this deals with the lock stealing 443 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() 444 * sets a NULL owner. 445 * 446 * We're not interested in the return value, because the subsequent 447 * test on rt_mutex_owner() will infer that. If the trylock succeeded, 448 * we will own the lock and it will have removed the waiter. If we 449 * failed the trylock, we're still not owner and we need to remove 450 * ourselves. 451 */ 452 try_to_take_rt_mutex(lock, current, waiter); 453 /* 454 * Unless we're the owner; we're still enqueued on the wait_list. 455 * So check if we became owner, if not, take us off the wait_list. 456 */ 457 if (rt_mutex_owner(lock) != current) { 458 remove_waiter(lock, waiter); 459 cleanup = true; 460 } 461 /* 462 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 463 * have to fix that up. 464 */ 465 fixup_rt_mutex_waiters(lock, false); 466 467 raw_spin_unlock_irq(&lock->wait_lock); 468 469 return cleanup; 470 } 471 472 /* 473 * Recheck the pi chain, in case we got a priority setting 474 * 475 * Called from sched_setscheduler 476 */ 477 void __sched rt_mutex_adjust_pi(struct task_struct *task) 478 { 479 struct rt_mutex_waiter *waiter; 480 struct rt_mutex_base *next_lock; 481 unsigned long flags; 482 483 raw_spin_lock_irqsave(&task->pi_lock, flags); 484 485 waiter = task->pi_blocked_on; 486 if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { 487 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 488 return; 489 } 490 next_lock = waiter->lock; 491 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 492 493 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 494 get_task_struct(task); 495 496 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, 497 next_lock, NULL, task); 498 } 499 500 /* 501 * Performs the wakeup of the top-waiter and re-enables preemption. 502 */ 503 void __sched rt_mutex_postunlock(struct rt_wake_q_head *wqh) 504 { 505 rt_mutex_wake_up_q(wqh); 506 } 507 508 #ifdef CONFIG_DEBUG_RT_MUTEXES 509 void rt_mutex_debug_task_free(struct task_struct *task) 510 { 511 DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); 512 DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); 513 } 514 #endif 515 516 #ifdef CONFIG_PREEMPT_RT 517 /* Mutexes */ 518 void __mutex_rt_init(struct mutex *mutex, const char *name, 519 struct lock_class_key *key) 520 { 521 debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); 522 lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP); 523 } 524 EXPORT_SYMBOL(__mutex_rt_init); 525 526 static __always_inline int __mutex_lock_common(struct mutex *lock, 527 unsigned int state, 528 unsigned int subclass, 529 struct lockdep_map *nest_lock, 530 unsigned long ip) 531 { 532 int ret; 533 534 might_sleep(); 535 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); 536 ret = __rt_mutex_lock(&lock->rtmutex, state); 537 if (ret) 538 mutex_release(&lock->dep_map, ip); 539 else 540 lock_acquired(&lock->dep_map, ip); 541 return ret; 542 } 543 544 #ifdef CONFIG_DEBUG_LOCK_ALLOC 545 void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) 546 { 547 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); 548 } 549 EXPORT_SYMBOL_GPL(mutex_lock_nested); 550 551 void __sched _mutex_lock_nest_lock(struct mutex *lock, 552 struct lockdep_map *nest_lock) 553 { 554 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest_lock, _RET_IP_); 555 } 556 EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); 557 558 int __sched mutex_lock_interruptible_nested(struct mutex *lock, 559 unsigned int subclass) 560 { 561 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_); 562 } 563 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 564 565 int __sched _mutex_lock_killable(struct mutex *lock, unsigned int subclass, 566 struct lockdep_map *nest_lock) 567 { 568 return __mutex_lock_common(lock, TASK_KILLABLE, subclass, nest_lock, _RET_IP_); 569 } 570 EXPORT_SYMBOL_GPL(_mutex_lock_killable); 571 572 void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) 573 { 574 int token; 575 576 might_sleep(); 577 578 token = io_schedule_prepare(); 579 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); 580 io_schedule_finish(token); 581 } 582 EXPORT_SYMBOL_GPL(mutex_lock_io_nested); 583 584 int __sched _mutex_trylock_nest_lock(struct mutex *lock, 585 struct lockdep_map *nest_lock) 586 { 587 int ret; 588 589 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) 590 return 0; 591 592 ret = __rt_mutex_trylock(&lock->rtmutex); 593 if (ret) 594 mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_); 595 596 return ret; 597 } 598 EXPORT_SYMBOL_GPL(_mutex_trylock_nest_lock); 599 #else /* CONFIG_DEBUG_LOCK_ALLOC */ 600 601 void __sched mutex_lock(struct mutex *lock) 602 { 603 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); 604 } 605 EXPORT_SYMBOL(mutex_lock); 606 607 int __sched mutex_lock_interruptible(struct mutex *lock) 608 { 609 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); 610 } 611 EXPORT_SYMBOL(mutex_lock_interruptible); 612 613 int __sched mutex_lock_killable(struct mutex *lock) 614 { 615 return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); 616 } 617 EXPORT_SYMBOL(mutex_lock_killable); 618 619 void __sched mutex_lock_io(struct mutex *lock) 620 { 621 int token = io_schedule_prepare(); 622 623 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); 624 io_schedule_finish(token); 625 } 626 EXPORT_SYMBOL(mutex_lock_io); 627 628 int __sched mutex_trylock(struct mutex *lock) 629 { 630 if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) 631 return 0; 632 633 return __rt_mutex_trylock(&lock->rtmutex); 634 } 635 EXPORT_SYMBOL(mutex_trylock); 636 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ 637 638 void __sched mutex_unlock(struct mutex *lock) 639 { 640 mutex_release(&lock->dep_map, _RET_IP_); 641 __rt_mutex_unlock(&lock->rtmutex); 642 } 643 EXPORT_SYMBOL(mutex_unlock); 644 645 #endif /* CONFIG_PREEMPT_RT */ 646