1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/mutex.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/sched.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/sysent.h> 44 #include <sys/systm.h> 45 #include <sys/sysproto.h> 46 #include <sys/eventhandler.h> 47 #include <sys/umtx.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_map.h> 53 #include <vm/vm_object.h> 54 55 #include <machine/cpu.h> 56 57 #ifdef COMPAT_IA32 58 #include <compat/freebsd32/freebsd32_proto.h> 59 #endif 60 61 #define TYPE_SIMPLE_LOCK 0 62 #define TYPE_SIMPLE_WAIT 1 63 #define TYPE_NORMAL_UMUTEX 2 64 #define TYPE_PI_UMUTEX 3 65 #define TYPE_PP_UMUTEX 4 66 #define TYPE_CV 5 67 68 /* Key to represent a unique userland synchronous object */ 69 struct umtx_key { 70 int hash; 71 int type; 72 int shared; 73 union { 74 struct { 75 vm_object_t object; 76 uintptr_t offset; 77 } shared; 78 struct { 79 struct vmspace *vs; 80 uintptr_t addr; 81 } private; 82 struct { 83 void *a; 84 uintptr_t b; 85 } both; 86 } info; 87 }; 88 89 /* Priority inheritance mutex info. */ 90 struct umtx_pi { 91 /* Owner thread */ 92 struct thread *pi_owner; 93 94 /* Reference count */ 95 int pi_refcount; 96 97 /* List entry to link umtx holding by thread */ 98 TAILQ_ENTRY(umtx_pi) pi_link; 99 100 /* List entry in hash */ 101 TAILQ_ENTRY(umtx_pi) pi_hashlink; 102 103 /* List for waiters */ 104 TAILQ_HEAD(,umtx_q) pi_blocked; 105 106 /* Identify a userland lock object */ 107 struct umtx_key pi_key; 108 }; 109 110 /* A userland synchronous object user. */ 111 struct umtx_q { 112 /* Linked list for the hash. */ 113 TAILQ_ENTRY(umtx_q) uq_link; 114 115 /* Umtx key. */ 116 struct umtx_key uq_key; 117 118 /* Umtx flags. */ 119 int uq_flags; 120 #define UQF_UMTXQ 0x0001 121 122 /* The thread waits on. */ 123 struct thread *uq_thread; 124 125 /* 126 * Blocked on PI mutex. read can use chain lock 127 * or sched_lock, write must have both chain lock and 128 * sched_lock being hold. 129 */ 130 struct umtx_pi *uq_pi_blocked; 131 132 /* On blocked list */ 133 TAILQ_ENTRY(umtx_q) uq_lockq; 134 135 /* Thread contending with us */ 136 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 137 138 /* Inherited priority from PP mutex */ 139 u_char uq_inherited_pri; 140 }; 141 142 TAILQ_HEAD(umtxq_head, umtx_q); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_head uc_queue; 151 152 /* Busy flag */ 153 char uc_busy; 154 155 /* Chain lock waiters */ 156 int uc_waiters; 157 158 /* All PI in the list */ 159 TAILQ_HEAD(,umtx_pi) uc_pi_list; 160 }; 161 162 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 163 164 /* 165 * Don't propagate time-sharing priority, there is a security reason, 166 * a user can simply introduce PI-mutex, let thread A lock the mutex, 167 * and let another thread B block on the mutex, because B is 168 * sleeping, its priority will be boosted, this causes A's priority to 169 * be boosted via priority propagating too and will never be lowered even 170 * if it is using 100%CPU, this is unfair to other processes. 171 */ 172 173 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 174 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 175 PRI_MAX_TIMESHARE : (td)->td_user_pri) 176 177 #define GOLDEN_RATIO_PRIME 2654404609U 178 #define UMTX_CHAINS 128 179 #define UMTX_SHIFTS (__WORD_BIT - 7) 180 181 #define THREAD_SHARE 0 182 #define PROCESS_SHARE 1 183 #define AUTO_SHARE 2 184 185 #define GET_SHARE(flags) \ 186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 187 188 static uma_zone_t umtx_pi_zone; 189 static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 190 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 191 static int umtx_pi_allocated; 192 193 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 194 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 195 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 196 SYSCTL_DECL(_kern_threads); 197 static int umtx_dflt_spins = 0; 198 SYSCTL_INT(_kern_threads, OID_AUTO, umtx_dflt_spins, CTLFLAG_RW, 199 &umtx_dflt_spins, 0, "default umtx spin count"); 200 static int umtx_max_spins = 3000; 201 SYSCTL_INT(_kern_threads, OID_AUTO, umtx_max_spins, CTLFLAG_RW, 202 &umtx_max_spins, 0, "max umtx spin count"); 203 204 static void umtxq_sysinit(void *); 205 static void umtxq_hash(struct umtx_key *key); 206 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 207 static void umtxq_lock(struct umtx_key *key); 208 static void umtxq_unlock(struct umtx_key *key); 209 static void umtxq_busy(struct umtx_key *key); 210 static void umtxq_unbusy(struct umtx_key *key); 211 static void umtxq_insert(struct umtx_q *uq); 212 static void umtxq_remove(struct umtx_q *uq); 213 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 214 static int umtxq_count(struct umtx_key *key); 215 static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 216 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 217 static int umtx_key_get(void *addr, int type, int share, 218 struct umtx_key *key); 219 static void umtx_key_release(struct umtx_key *key); 220 static struct umtx_pi *umtx_pi_alloc(int); 221 static void umtx_pi_free(struct umtx_pi *pi); 222 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 223 static void umtx_thread_cleanup(struct thread *td); 224 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 225 struct image_params *imgp __unused); 226 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 227 228 static void 229 umtxq_sysinit(void *arg __unused) 230 { 231 int i; 232 233 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 234 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 235 for (i = 0; i < UMTX_CHAINS; ++i) { 236 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL, 237 MTX_DEF | MTX_DUPOK); 238 TAILQ_INIT(&umtxq_chains[i].uc_queue); 239 TAILQ_INIT(&umtxq_chains[i].uc_pi_list); 240 umtxq_chains[i].uc_busy = 0; 241 umtxq_chains[i].uc_waiters = 0; 242 } 243 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 244 EVENTHANDLER_PRI_ANY); 245 } 246 247 struct umtx_q * 248 umtxq_alloc(void) 249 { 250 struct umtx_q *uq; 251 252 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 253 TAILQ_INIT(&uq->uq_pi_contested); 254 uq->uq_inherited_pri = PRI_MAX; 255 return (uq); 256 } 257 258 void 259 umtxq_free(struct umtx_q *uq) 260 { 261 free(uq, M_UMTX); 262 } 263 264 static inline void 265 umtxq_hash(struct umtx_key *key) 266 { 267 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 268 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 269 } 270 271 static inline int 272 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 273 { 274 return (k1->type == k2->type && 275 k1->info.both.a == k2->info.both.a && 276 k1->info.both.b == k2->info.both.b); 277 } 278 279 static inline struct umtxq_chain * 280 umtxq_getchain(struct umtx_key *key) 281 { 282 return (&umtxq_chains[key->hash]); 283 } 284 285 /* 286 * Set chain to busy state when following operation 287 * may be blocked (kernel mutex can not be used). 288 */ 289 static inline void 290 umtxq_busy(struct umtx_key *key) 291 { 292 struct umtxq_chain *uc; 293 294 uc = umtxq_getchain(key); 295 mtx_assert(&uc->uc_lock, MA_OWNED); 296 while (uc->uc_busy != 0) { 297 uc->uc_waiters++; 298 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 299 uc->uc_waiters--; 300 } 301 uc->uc_busy = 1; 302 } 303 304 /* 305 * Unbusy a chain. 306 */ 307 static inline void 308 umtxq_unbusy(struct umtx_key *key) 309 { 310 struct umtxq_chain *uc; 311 312 uc = umtxq_getchain(key); 313 mtx_assert(&uc->uc_lock, MA_OWNED); 314 KASSERT(uc->uc_busy != 0, ("not busy")); 315 uc->uc_busy = 0; 316 if (uc->uc_waiters) 317 wakeup_one(uc); 318 } 319 320 /* 321 * Lock a chain. 322 */ 323 static inline void 324 umtxq_lock(struct umtx_key *key) 325 { 326 struct umtxq_chain *uc; 327 328 uc = umtxq_getchain(key); 329 mtx_lock(&uc->uc_lock); 330 } 331 332 /* 333 * Unlock a chain. 334 */ 335 static inline void 336 umtxq_unlock(struct umtx_key *key) 337 { 338 struct umtxq_chain *uc; 339 340 uc = umtxq_getchain(key); 341 mtx_unlock(&uc->uc_lock); 342 } 343 344 /* 345 * Insert a thread onto the umtx queue. 346 */ 347 static inline void 348 umtxq_insert(struct umtx_q *uq) 349 { 350 struct umtxq_chain *uc; 351 352 uc = umtxq_getchain(&uq->uq_key); 353 UMTXQ_LOCKED_ASSERT(uc); 354 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link); 355 uq->uq_flags |= UQF_UMTXQ; 356 } 357 358 /* 359 * Remove thread from the umtx queue. 360 */ 361 static inline void 362 umtxq_remove(struct umtx_q *uq) 363 { 364 struct umtxq_chain *uc; 365 366 uc = umtxq_getchain(&uq->uq_key); 367 UMTXQ_LOCKED_ASSERT(uc); 368 if (uq->uq_flags & UQF_UMTXQ) { 369 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link); 370 uq->uq_flags &= ~UQF_UMTXQ; 371 } 372 } 373 374 /* 375 * Check if there are multiple waiters 376 */ 377 static int 378 umtxq_count(struct umtx_key *key) 379 { 380 struct umtxq_chain *uc; 381 struct umtx_q *uq; 382 int count = 0; 383 384 uc = umtxq_getchain(key); 385 UMTXQ_LOCKED_ASSERT(uc); 386 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 387 if (umtx_key_match(&uq->uq_key, key)) { 388 if (++count > 1) 389 break; 390 } 391 } 392 return (count); 393 } 394 395 /* 396 * Check if there are multiple PI waiters and returns first 397 * waiter. 398 */ 399 static int 400 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 401 { 402 struct umtxq_chain *uc; 403 struct umtx_q *uq; 404 int count = 0; 405 406 *first = NULL; 407 uc = umtxq_getchain(key); 408 UMTXQ_LOCKED_ASSERT(uc); 409 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 410 if (umtx_key_match(&uq->uq_key, key)) { 411 if (++count > 1) 412 break; 413 *first = uq; 414 } 415 } 416 return (count); 417 } 418 419 /* 420 * Wake up threads waiting on an userland object. 421 */ 422 static int 423 umtxq_signal(struct umtx_key *key, int n_wake) 424 { 425 struct umtxq_chain *uc; 426 struct umtx_q *uq, *next; 427 int ret; 428 429 ret = 0; 430 uc = umtxq_getchain(key); 431 UMTXQ_LOCKED_ASSERT(uc); 432 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) { 433 if (umtx_key_match(&uq->uq_key, key)) { 434 umtxq_remove(uq); 435 wakeup(uq); 436 if (++ret >= n_wake) 437 break; 438 } 439 } 440 return (ret); 441 } 442 443 /* 444 * Wake up specified thread. 445 */ 446 static inline void 447 umtxq_signal_thread(struct umtx_q *uq) 448 { 449 struct umtxq_chain *uc; 450 451 uc = umtxq_getchain(&uq->uq_key); 452 UMTXQ_LOCKED_ASSERT(uc); 453 umtxq_remove(uq); 454 wakeup(uq); 455 } 456 457 /* 458 * Put thread into sleep state, before sleeping, check if 459 * thread was removed from umtx queue. 460 */ 461 static inline int 462 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 463 { 464 struct umtxq_chain *uc; 465 int error; 466 467 uc = umtxq_getchain(&uq->uq_key); 468 UMTXQ_LOCKED_ASSERT(uc); 469 if (!(uq->uq_flags & UQF_UMTXQ)) 470 return (0); 471 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 472 if (error == EWOULDBLOCK) 473 error = ETIMEDOUT; 474 return (error); 475 } 476 477 /* 478 * Convert userspace address into unique logical address. 479 */ 480 static int 481 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 482 { 483 struct thread *td = curthread; 484 vm_map_t map; 485 vm_map_entry_t entry; 486 vm_pindex_t pindex; 487 vm_prot_t prot; 488 boolean_t wired; 489 490 key->type = type; 491 if (share == THREAD_SHARE) { 492 key->shared = 0; 493 key->info.private.vs = td->td_proc->p_vmspace; 494 key->info.private.addr = (uintptr_t)addr; 495 } else { 496 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 497 map = &td->td_proc->p_vmspace->vm_map; 498 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 499 &entry, &key->info.shared.object, &pindex, &prot, 500 &wired) != KERN_SUCCESS) { 501 return EFAULT; 502 } 503 504 if ((share == PROCESS_SHARE) || 505 (share == AUTO_SHARE && 506 VM_INHERIT_SHARE == entry->inheritance)) { 507 key->shared = 1; 508 key->info.shared.offset = entry->offset + entry->start - 509 (vm_offset_t)addr; 510 vm_object_reference(key->info.shared.object); 511 } else { 512 key->shared = 0; 513 key->info.private.vs = td->td_proc->p_vmspace; 514 key->info.private.addr = (uintptr_t)addr; 515 } 516 vm_map_lookup_done(map, entry); 517 } 518 519 umtxq_hash(key); 520 return (0); 521 } 522 523 /* 524 * Release key. 525 */ 526 static inline void 527 umtx_key_release(struct umtx_key *key) 528 { 529 if (key->shared) 530 vm_object_deallocate(key->info.shared.object); 531 } 532 533 /* 534 * Lock a umtx object. 535 */ 536 static int 537 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 538 { 539 struct umtx_q *uq; 540 u_long owner; 541 u_long old; 542 int error = 0; 543 544 uq = td->td_umtxq; 545 546 /* 547 * Care must be exercised when dealing with umtx structure. It 548 * can fault on any access. 549 */ 550 for (;;) { 551 /* 552 * Try the uncontested case. This should be done in userland. 553 */ 554 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 555 556 /* The acquire succeeded. */ 557 if (owner == UMTX_UNOWNED) 558 return (0); 559 560 /* The address was invalid. */ 561 if (owner == -1) 562 return (EFAULT); 563 564 /* If no one owns it but it is contested try to acquire it. */ 565 if (owner == UMTX_CONTESTED) { 566 owner = casuword(&umtx->u_owner, 567 UMTX_CONTESTED, id | UMTX_CONTESTED); 568 569 if (owner == UMTX_CONTESTED) 570 return (0); 571 572 /* The address was invalid. */ 573 if (owner == -1) 574 return (EFAULT); 575 576 /* If this failed the lock has changed, restart. */ 577 continue; 578 } 579 580 /* 581 * If we caught a signal, we have retried and now 582 * exit immediately. 583 */ 584 if (error != 0) 585 return (error); 586 587 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 588 AUTO_SHARE, &uq->uq_key)) != 0) 589 return (error); 590 591 umtxq_lock(&uq->uq_key); 592 umtxq_busy(&uq->uq_key); 593 umtxq_insert(uq); 594 umtxq_unbusy(&uq->uq_key); 595 umtxq_unlock(&uq->uq_key); 596 597 /* 598 * Set the contested bit so that a release in user space 599 * knows to use the system call for unlock. If this fails 600 * either some one else has acquired the lock or it has been 601 * released. 602 */ 603 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 604 605 /* The address was invalid. */ 606 if (old == -1) { 607 umtxq_lock(&uq->uq_key); 608 umtxq_remove(uq); 609 umtxq_unlock(&uq->uq_key); 610 umtx_key_release(&uq->uq_key); 611 return (EFAULT); 612 } 613 614 /* 615 * We set the contested bit, sleep. Otherwise the lock changed 616 * and we need to retry or we lost a race to the thread 617 * unlocking the umtx. 618 */ 619 umtxq_lock(&uq->uq_key); 620 if (old == owner) 621 error = umtxq_sleep(uq, "umtx", timo); 622 umtxq_remove(uq); 623 umtxq_unlock(&uq->uq_key); 624 umtx_key_release(&uq->uq_key); 625 } 626 627 return (0); 628 } 629 630 /* 631 * Lock a umtx object. 632 */ 633 static int 634 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 635 struct timespec *timeout) 636 { 637 struct timespec ts, ts2, ts3; 638 struct timeval tv; 639 int error; 640 641 if (timeout == NULL) { 642 error = _do_lock_umtx(td, umtx, id, 0); 643 /* Mutex locking is restarted if it is interrupted. */ 644 if (error == EINTR) 645 error = ERESTART; 646 } else { 647 getnanouptime(&ts); 648 timespecadd(&ts, timeout); 649 TIMESPEC_TO_TIMEVAL(&tv, timeout); 650 for (;;) { 651 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 652 if (error != ETIMEDOUT) 653 break; 654 getnanouptime(&ts2); 655 if (timespeccmp(&ts2, &ts, >=)) { 656 error = ETIMEDOUT; 657 break; 658 } 659 ts3 = ts; 660 timespecsub(&ts3, &ts2); 661 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 662 } 663 /* Timed-locking is not restarted. */ 664 if (error == ERESTART) 665 error = EINTR; 666 } 667 return (error); 668 } 669 670 /* 671 * Unlock a umtx object. 672 */ 673 static int 674 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 675 { 676 struct umtx_key key; 677 u_long owner; 678 u_long old; 679 int error; 680 int count; 681 682 /* 683 * Make sure we own this mtx. 684 */ 685 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 686 if (owner == -1) 687 return (EFAULT); 688 689 if ((owner & ~UMTX_CONTESTED) != id) 690 return (EPERM); 691 692 /* This should be done in userland */ 693 if ((owner & UMTX_CONTESTED) == 0) { 694 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 695 if (old == -1) 696 return (EFAULT); 697 if (old == owner) 698 return (0); 699 owner = old; 700 } 701 702 /* We should only ever be in here for contested locks */ 703 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 704 &key)) != 0) 705 return (error); 706 707 umtxq_lock(&key); 708 umtxq_busy(&key); 709 count = umtxq_count(&key); 710 umtxq_unlock(&key); 711 712 /* 713 * When unlocking the umtx, it must be marked as unowned if 714 * there is zero or one thread only waiting for it. 715 * Otherwise, it must be marked as contested. 716 */ 717 old = casuword(&umtx->u_owner, owner, 718 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 719 umtxq_lock(&key); 720 umtxq_signal(&key,1); 721 umtxq_unbusy(&key); 722 umtxq_unlock(&key); 723 umtx_key_release(&key); 724 if (old == -1) 725 return (EFAULT); 726 if (old != owner) 727 return (EINVAL); 728 return (0); 729 } 730 731 #ifdef COMPAT_IA32 732 733 /* 734 * Lock a umtx object. 735 */ 736 static int 737 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 738 { 739 struct umtx_q *uq; 740 uint32_t owner; 741 uint32_t old; 742 int error = 0; 743 744 uq = td->td_umtxq; 745 746 /* 747 * Care must be exercised when dealing with umtx structure. It 748 * can fault on any access. 749 */ 750 for (;;) { 751 /* 752 * Try the uncontested case. This should be done in userland. 753 */ 754 owner = casuword32(m, UMUTEX_UNOWNED, id); 755 756 /* The acquire succeeded. */ 757 if (owner == UMUTEX_UNOWNED) 758 return (0); 759 760 /* The address was invalid. */ 761 if (owner == -1) 762 return (EFAULT); 763 764 /* If no one owns it but it is contested try to acquire it. */ 765 if (owner == UMUTEX_CONTESTED) { 766 owner = casuword32(m, 767 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 768 if (owner == UMUTEX_CONTESTED) 769 return (0); 770 771 /* The address was invalid. */ 772 if (owner == -1) 773 return (EFAULT); 774 775 /* If this failed the lock has changed, restart. */ 776 continue; 777 } 778 779 /* 780 * If we caught a signal, we have retried and now 781 * exit immediately. 782 */ 783 if (error != 0) 784 return (error); 785 786 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 787 AUTO_SHARE, &uq->uq_key)) != 0) 788 return (error); 789 790 umtxq_lock(&uq->uq_key); 791 umtxq_busy(&uq->uq_key); 792 umtxq_insert(uq); 793 umtxq_unbusy(&uq->uq_key); 794 umtxq_unlock(&uq->uq_key); 795 796 /* 797 * Set the contested bit so that a release in user space 798 * knows to use the system call for unlock. If this fails 799 * either some one else has acquired the lock or it has been 800 * released. 801 */ 802 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 803 804 /* The address was invalid. */ 805 if (old == -1) { 806 umtxq_lock(&uq->uq_key); 807 umtxq_remove(uq); 808 umtxq_unlock(&uq->uq_key); 809 umtx_key_release(&uq->uq_key); 810 return (EFAULT); 811 } 812 813 /* 814 * We set the contested bit, sleep. Otherwise the lock changed 815 * and we need to retry or we lost a race to the thread 816 * unlocking the umtx. 817 */ 818 umtxq_lock(&uq->uq_key); 819 if (old == owner) 820 error = umtxq_sleep(uq, "umtx", timo); 821 umtxq_remove(uq); 822 umtxq_unlock(&uq->uq_key); 823 umtx_key_release(&uq->uq_key); 824 } 825 826 return (0); 827 } 828 829 /* 830 * Lock a umtx object. 831 */ 832 static int 833 do_lock_umtx32(struct thread *td, void *m, uint32_t id, 834 struct timespec *timeout) 835 { 836 struct timespec ts, ts2, ts3; 837 struct timeval tv; 838 int error; 839 840 if (timeout == NULL) { 841 error = _do_lock_umtx32(td, m, id, 0); 842 /* Mutex locking is restarted if it is interrupted. */ 843 if (error == EINTR) 844 error = ERESTART; 845 } else { 846 getnanouptime(&ts); 847 timespecadd(&ts, timeout); 848 TIMESPEC_TO_TIMEVAL(&tv, timeout); 849 for (;;) { 850 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 851 if (error != ETIMEDOUT) 852 break; 853 getnanouptime(&ts2); 854 if (timespeccmp(&ts2, &ts, >=)) { 855 error = ETIMEDOUT; 856 break; 857 } 858 ts3 = ts; 859 timespecsub(&ts3, &ts2); 860 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 861 } 862 /* Timed-locking is not restarted. */ 863 if (error == ERESTART) 864 error = EINTR; 865 } 866 return (error); 867 } 868 869 /* 870 * Unlock a umtx object. 871 */ 872 static int 873 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 874 { 875 struct umtx_key key; 876 uint32_t owner; 877 uint32_t old; 878 int error; 879 int count; 880 881 /* 882 * Make sure we own this mtx. 883 */ 884 owner = fuword32(m); 885 if (owner == -1) 886 return (EFAULT); 887 888 if ((owner & ~UMUTEX_CONTESTED) != id) 889 return (EPERM); 890 891 /* This should be done in userland */ 892 if ((owner & UMUTEX_CONTESTED) == 0) { 893 old = casuword32(m, owner, UMUTEX_UNOWNED); 894 if (old == -1) 895 return (EFAULT); 896 if (old == owner) 897 return (0); 898 owner = old; 899 } 900 901 /* We should only ever be in here for contested locks */ 902 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 903 &key)) != 0) 904 return (error); 905 906 umtxq_lock(&key); 907 umtxq_busy(&key); 908 count = umtxq_count(&key); 909 umtxq_unlock(&key); 910 911 /* 912 * When unlocking the umtx, it must be marked as unowned if 913 * there is zero or one thread only waiting for it. 914 * Otherwise, it must be marked as contested. 915 */ 916 old = casuword32(m, owner, 917 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 918 umtxq_lock(&key); 919 umtxq_signal(&key,1); 920 umtxq_unbusy(&key); 921 umtxq_unlock(&key); 922 umtx_key_release(&key); 923 if (old == -1) 924 return (EFAULT); 925 if (old != owner) 926 return (EINVAL); 927 return (0); 928 } 929 #endif 930 931 /* 932 * Fetch and compare value, sleep on the address if value is not changed. 933 */ 934 static int 935 do_wait(struct thread *td, void *addr, u_long id, 936 struct timespec *timeout, int compat32) 937 { 938 struct umtx_q *uq; 939 struct timespec ts, ts2, ts3; 940 struct timeval tv; 941 u_long tmp; 942 int error = 0; 943 944 uq = td->td_umtxq; 945 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 946 &uq->uq_key)) != 0) 947 return (error); 948 949 umtxq_lock(&uq->uq_key); 950 umtxq_insert(uq); 951 umtxq_unlock(&uq->uq_key); 952 if (compat32 == 0) 953 tmp = fuword(addr); 954 else 955 tmp = fuword32(addr); 956 if (tmp != id) { 957 umtxq_lock(&uq->uq_key); 958 umtxq_remove(uq); 959 umtxq_unlock(&uq->uq_key); 960 } else if (timeout == NULL) { 961 umtxq_lock(&uq->uq_key); 962 error = umtxq_sleep(uq, "uwait", 0); 963 umtxq_remove(uq); 964 umtxq_unlock(&uq->uq_key); 965 } else { 966 getnanouptime(&ts); 967 timespecadd(&ts, timeout); 968 TIMESPEC_TO_TIMEVAL(&tv, timeout); 969 umtxq_lock(&uq->uq_key); 970 for (;;) { 971 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 972 if (!(uq->uq_flags & UQF_UMTXQ)) 973 break; 974 if (error != ETIMEDOUT) 975 break; 976 umtxq_unlock(&uq->uq_key); 977 getnanouptime(&ts2); 978 if (timespeccmp(&ts2, &ts, >=)) { 979 error = ETIMEDOUT; 980 umtxq_lock(&uq->uq_key); 981 break; 982 } 983 ts3 = ts; 984 timespecsub(&ts3, &ts2); 985 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 986 umtxq_lock(&uq->uq_key); 987 } 988 umtxq_remove(uq); 989 umtxq_unlock(&uq->uq_key); 990 } 991 umtx_key_release(&uq->uq_key); 992 if (error == ERESTART) 993 error = EINTR; 994 return (error); 995 } 996 997 /* 998 * Wake up threads sleeping on the specified address. 999 */ 1000 int 1001 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 1002 { 1003 struct umtx_key key; 1004 int ret; 1005 1006 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 1007 &key)) != 0) 1008 return (ret); 1009 umtxq_lock(&key); 1010 ret = umtxq_signal(&key, n_wake); 1011 umtxq_unlock(&key); 1012 umtx_key_release(&key); 1013 return (0); 1014 } 1015 1016 /* 1017 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1018 */ 1019 static int 1020 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1021 int try) 1022 { 1023 struct umtx_q *uq; 1024 uint32_t owner, old, id; 1025 #ifdef SMP 1026 int spincount; 1027 #endif 1028 int error = 0; 1029 1030 id = td->td_tid; 1031 uq = td->td_umtxq; 1032 1033 #ifdef SMP 1034 if (smp_cpus > 1) { 1035 spincount = fuword32(&m->m_spincount); 1036 if (spincount == 0) 1037 spincount = umtx_dflt_spins; 1038 if (spincount > umtx_max_spins) 1039 spincount = umtx_max_spins; 1040 } else 1041 spincount = 0; 1042 #endif 1043 1044 /* 1045 * Care must be exercised when dealing with umtx structure. It 1046 * can fault on any access. 1047 */ 1048 for (;;) { 1049 #ifdef SMP 1050 try_unowned: 1051 #endif 1052 /* 1053 * Try the uncontested case. This should be done in userland. 1054 */ 1055 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1056 1057 /* The acquire succeeded. */ 1058 if (owner == UMUTEX_UNOWNED) 1059 return (0); 1060 1061 /* The address was invalid. */ 1062 if (owner == -1) 1063 return (EFAULT); 1064 1065 /* If no one owns it but it is contested try to acquire it. */ 1066 if (owner == UMUTEX_CONTESTED) { 1067 #ifdef SMP 1068 try_contested: 1069 #endif 1070 owner = casuword32(&m->m_owner, 1071 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1072 1073 if (owner == UMUTEX_CONTESTED) 1074 return (0); 1075 1076 /* The address was invalid. */ 1077 if (owner == -1) 1078 return (EFAULT); 1079 1080 /* If this failed the lock has changed, restart. */ 1081 continue; 1082 } 1083 1084 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1085 (owner & ~UMUTEX_CONTESTED) == id) 1086 return (EDEADLK); 1087 1088 if (try != 0) 1089 return (EBUSY); 1090 1091 #ifdef SMP 1092 if (spincount > 0 && (owner & ~UMUTEX_CONTESTED) != id) { 1093 int i, found = 0; 1094 struct pcpu *pcpu = NULL; 1095 1096 /* Look for a cpu the owner is running on */ 1097 for (i = 0; i < MAXCPU; i++) { 1098 if (CPU_ABSENT(i)) 1099 continue; 1100 pcpu = pcpu_find(i); 1101 if ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid) { 1102 found = 1; 1103 break; 1104 } 1105 } 1106 1107 if (__predict_false(!found)) 1108 goto end_spin; 1109 1110 while ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid && 1111 (owner & ~UMUTEX_CONTESTED) != id) { 1112 if (--spincount <= 0) 1113 break; 1114 if ((td->td_flags & 1115 (TDF_NEEDRESCHED|TDF_ASTPENDING|TDF_NEEDSIGCHK)) || 1116 P_SHOULDSTOP(td->td_proc)) 1117 break; 1118 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1119 if (owner == UMUTEX_UNOWNED) 1120 goto try_unowned; 1121 if (owner == UMUTEX_CONTESTED) 1122 goto try_contested; 1123 cpu_spinwait(); 1124 } 1125 } 1126 end_spin: 1127 spincount = 0; 1128 1129 #endif 1130 1131 /* 1132 * If we caught a signal, we have retried and now 1133 * exit immediately. 1134 */ 1135 if (error != 0) 1136 return (error); 1137 1138 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1139 GET_SHARE(flags), &uq->uq_key)) != 0) 1140 return (error); 1141 1142 umtxq_lock(&uq->uq_key); 1143 umtxq_busy(&uq->uq_key); 1144 umtxq_insert(uq); 1145 umtxq_unbusy(&uq->uq_key); 1146 umtxq_unlock(&uq->uq_key); 1147 1148 /* 1149 * Set the contested bit so that a release in user space 1150 * knows to use the system call for unlock. If this fails 1151 * either some one else has acquired the lock or it has been 1152 * released. 1153 */ 1154 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1155 1156 /* The address was invalid. */ 1157 if (old == -1) { 1158 umtxq_lock(&uq->uq_key); 1159 umtxq_remove(uq); 1160 umtxq_unlock(&uq->uq_key); 1161 umtx_key_release(&uq->uq_key); 1162 return (EFAULT); 1163 } 1164 1165 /* 1166 * We set the contested bit, sleep. Otherwise the lock changed 1167 * and we need to retry or we lost a race to the thread 1168 * unlocking the umtx. 1169 */ 1170 umtxq_lock(&uq->uq_key); 1171 if (old == owner) 1172 error = umtxq_sleep(uq, "umtxn", timo); 1173 umtxq_remove(uq); 1174 umtxq_unlock(&uq->uq_key); 1175 umtx_key_release(&uq->uq_key); 1176 } 1177 1178 return (0); 1179 } 1180 1181 /* 1182 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1183 */ 1184 /* 1185 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1186 */ 1187 static int 1188 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1189 { 1190 struct umtx_key key; 1191 uint32_t owner, old, id; 1192 int error; 1193 int count; 1194 1195 id = td->td_tid; 1196 /* 1197 * Make sure we own this mtx. 1198 */ 1199 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1200 if (owner == -1) 1201 return (EFAULT); 1202 1203 if ((owner & ~UMUTEX_CONTESTED) != id) 1204 return (EPERM); 1205 1206 /* This should be done in userland */ 1207 if ((owner & UMUTEX_CONTESTED) == 0) { 1208 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1209 if (old == -1) 1210 return (EFAULT); 1211 if (old == owner) 1212 return (0); 1213 owner = old; 1214 } 1215 1216 /* We should only ever be in here for contested locks */ 1217 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1218 &key)) != 0) 1219 return (error); 1220 1221 umtxq_lock(&key); 1222 umtxq_busy(&key); 1223 count = umtxq_count(&key); 1224 umtxq_unlock(&key); 1225 1226 /* 1227 * When unlocking the umtx, it must be marked as unowned if 1228 * there is zero or one thread only waiting for it. 1229 * Otherwise, it must be marked as contested. 1230 */ 1231 old = casuword32(&m->m_owner, owner, 1232 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1233 umtxq_lock(&key); 1234 umtxq_signal(&key,1); 1235 umtxq_unbusy(&key); 1236 umtxq_unlock(&key); 1237 umtx_key_release(&key); 1238 if (old == -1) 1239 return (EFAULT); 1240 if (old != owner) 1241 return (EINVAL); 1242 return (0); 1243 } 1244 1245 static inline struct umtx_pi * 1246 umtx_pi_alloc(int flags) 1247 { 1248 struct umtx_pi *pi; 1249 1250 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1251 TAILQ_INIT(&pi->pi_blocked); 1252 atomic_add_int(&umtx_pi_allocated, 1); 1253 return (pi); 1254 } 1255 1256 static inline void 1257 umtx_pi_free(struct umtx_pi *pi) 1258 { 1259 uma_zfree(umtx_pi_zone, pi); 1260 atomic_add_int(&umtx_pi_allocated, -1); 1261 } 1262 1263 /* 1264 * Adjust the thread's position on a pi_state after its priority has been 1265 * changed. 1266 */ 1267 static int 1268 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1269 { 1270 struct umtx_q *uq, *uq1, *uq2; 1271 struct thread *td1; 1272 1273 mtx_assert(&sched_lock, MA_OWNED); 1274 if (pi == NULL) 1275 return (0); 1276 1277 uq = td->td_umtxq; 1278 1279 /* 1280 * Check if the thread needs to be moved on the blocked chain. 1281 * It needs to be moved if either its priority is lower than 1282 * the previous thread or higher than the next thread. 1283 */ 1284 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1285 uq2 = TAILQ_NEXT(uq, uq_lockq); 1286 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1287 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1288 /* 1289 * Remove thread from blocked chain and determine where 1290 * it should be moved to. 1291 */ 1292 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1293 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1294 td1 = uq1->uq_thread; 1295 MPASS(td1->td_proc->p_magic == P_MAGIC); 1296 if (UPRI(td1) > UPRI(td)) 1297 break; 1298 } 1299 1300 if (uq1 == NULL) 1301 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1302 else 1303 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1304 } 1305 return (1); 1306 } 1307 1308 /* 1309 * Propagate priority when a thread is blocked on POSIX 1310 * PI mutex. 1311 */ 1312 static void 1313 umtx_propagate_priority(struct thread *td) 1314 { 1315 struct umtx_q *uq; 1316 struct umtx_pi *pi; 1317 int pri; 1318 1319 mtx_assert(&sched_lock, MA_OWNED); 1320 pri = UPRI(td); 1321 uq = td->td_umtxq; 1322 pi = uq->uq_pi_blocked; 1323 if (pi == NULL) 1324 return; 1325 1326 for (;;) { 1327 td = pi->pi_owner; 1328 if (td == NULL) 1329 return; 1330 1331 MPASS(td->td_proc != NULL); 1332 MPASS(td->td_proc->p_magic == P_MAGIC); 1333 1334 if (UPRI(td) <= pri) 1335 return; 1336 1337 sched_lend_user_prio(td, pri); 1338 1339 /* 1340 * Pick up the lock that td is blocked on. 1341 */ 1342 uq = td->td_umtxq; 1343 pi = uq->uq_pi_blocked; 1344 /* Resort td on the list if needed. */ 1345 if (!umtx_pi_adjust_thread(pi, td)) 1346 break; 1347 } 1348 } 1349 1350 /* 1351 * Unpropagate priority for a PI mutex when a thread blocked on 1352 * it is interrupted by signal or resumed by others. 1353 */ 1354 static void 1355 umtx_unpropagate_priority(struct umtx_pi *pi) 1356 { 1357 struct umtx_q *uq, *uq_owner; 1358 struct umtx_pi *pi2; 1359 int pri; 1360 1361 mtx_assert(&sched_lock, MA_OWNED); 1362 1363 while (pi != NULL && pi->pi_owner != NULL) { 1364 pri = PRI_MAX; 1365 uq_owner = pi->pi_owner->td_umtxq; 1366 1367 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1368 uq = TAILQ_FIRST(&pi2->pi_blocked); 1369 if (uq != NULL) { 1370 if (pri > UPRI(uq->uq_thread)) 1371 pri = UPRI(uq->uq_thread); 1372 } 1373 } 1374 1375 if (pri > uq_owner->uq_inherited_pri) 1376 pri = uq_owner->uq_inherited_pri; 1377 sched_unlend_user_prio(pi->pi_owner, pri); 1378 pi = uq_owner->uq_pi_blocked; 1379 } 1380 } 1381 1382 /* 1383 * Insert a PI mutex into owned list. 1384 */ 1385 static void 1386 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1387 { 1388 struct umtx_q *uq_owner; 1389 1390 uq_owner = owner->td_umtxq; 1391 mtx_assert(&sched_lock, MA_OWNED); 1392 if (pi->pi_owner != NULL) 1393 panic("pi_ower != NULL"); 1394 pi->pi_owner = owner; 1395 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1396 } 1397 1398 /* 1399 * Claim ownership of a PI mutex. 1400 */ 1401 static int 1402 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1403 { 1404 struct umtx_q *uq, *uq_owner; 1405 1406 uq_owner = owner->td_umtxq; 1407 mtx_lock_spin(&sched_lock); 1408 if (pi->pi_owner == owner) { 1409 mtx_unlock_spin(&sched_lock); 1410 return (0); 1411 } 1412 1413 if (pi->pi_owner != NULL) { 1414 /* 1415 * userland may have already messed the mutex, sigh. 1416 */ 1417 mtx_unlock_spin(&sched_lock); 1418 return (EPERM); 1419 } 1420 umtx_pi_setowner(pi, owner); 1421 uq = TAILQ_FIRST(&pi->pi_blocked); 1422 if (uq != NULL) { 1423 int pri; 1424 1425 pri = UPRI(uq->uq_thread); 1426 if (pri < UPRI(owner)) 1427 sched_lend_user_prio(owner, pri); 1428 } 1429 mtx_unlock_spin(&sched_lock); 1430 return (0); 1431 } 1432 1433 /* 1434 * Adjust a thread's order position in its blocked PI mutex, 1435 * this may result new priority propagating process. 1436 */ 1437 void 1438 umtx_pi_adjust(struct thread *td, u_char oldpri) 1439 { 1440 struct umtx_q *uq; 1441 struct umtx_pi *pi; 1442 1443 uq = td->td_umtxq; 1444 1445 mtx_assert(&sched_lock, MA_OWNED); 1446 MPASS(TD_ON_UPILOCK(td)); 1447 1448 /* 1449 * Pick up the lock that td is blocked on. 1450 */ 1451 pi = uq->uq_pi_blocked; 1452 MPASS(pi != NULL); 1453 1454 /* Resort the turnstile on the list. */ 1455 if (!umtx_pi_adjust_thread(pi, td)) 1456 return; 1457 1458 /* 1459 * If our priority was lowered and we are at the head of the 1460 * turnstile, then propagate our new priority up the chain. 1461 */ 1462 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1463 umtx_propagate_priority(td); 1464 } 1465 1466 /* 1467 * Sleep on a PI mutex. 1468 */ 1469 static int 1470 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1471 uint32_t owner, const char *wmesg, int timo) 1472 { 1473 struct umtxq_chain *uc; 1474 struct thread *td, *td1; 1475 struct umtx_q *uq1; 1476 int pri; 1477 int error = 0; 1478 1479 td = uq->uq_thread; 1480 KASSERT(td == curthread, ("inconsistent uq_thread")); 1481 uc = umtxq_getchain(&uq->uq_key); 1482 UMTXQ_LOCKED_ASSERT(uc); 1483 umtxq_insert(uq); 1484 if (pi->pi_owner == NULL) { 1485 /* XXX 1486 * Current, We only support process private PI-mutex, 1487 * non-contended PI-mutexes are locked in userland. 1488 * Process shared PI-mutex should always be initialized 1489 * by kernel and be registered in kernel, locking should 1490 * always be done by kernel to avoid security problems. 1491 * For process private PI-mutex, we can find owner 1492 * thread and boost its priority safely. 1493 */ 1494 PROC_LOCK(curproc); 1495 td1 = thread_find(curproc, owner); 1496 mtx_lock_spin(&sched_lock); 1497 if (td1 != NULL && pi->pi_owner == NULL) { 1498 uq1 = td1->td_umtxq; 1499 umtx_pi_setowner(pi, td1); 1500 } 1501 PROC_UNLOCK(curproc); 1502 } else { 1503 mtx_lock_spin(&sched_lock); 1504 } 1505 1506 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1507 pri = UPRI(uq1->uq_thread); 1508 if (pri > UPRI(td)) 1509 break; 1510 } 1511 1512 if (uq1 != NULL) 1513 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1514 else 1515 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1516 1517 uq->uq_pi_blocked = pi; 1518 td->td_flags |= TDF_UPIBLOCKED; 1519 mtx_unlock_spin(&sched_lock); 1520 umtxq_unlock(&uq->uq_key); 1521 1522 mtx_lock_spin(&sched_lock); 1523 umtx_propagate_priority(td); 1524 mtx_unlock_spin(&sched_lock); 1525 1526 umtxq_lock(&uq->uq_key); 1527 if (uq->uq_flags & UQF_UMTXQ) { 1528 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1529 if (error == EWOULDBLOCK) 1530 error = ETIMEDOUT; 1531 if (uq->uq_flags & UQF_UMTXQ) { 1532 umtxq_busy(&uq->uq_key); 1533 umtxq_remove(uq); 1534 umtxq_unbusy(&uq->uq_key); 1535 } 1536 } 1537 umtxq_unlock(&uq->uq_key); 1538 1539 mtx_lock_spin(&sched_lock); 1540 uq->uq_pi_blocked = NULL; 1541 td->td_flags &= ~TDF_UPIBLOCKED; 1542 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1543 umtx_unpropagate_priority(pi); 1544 mtx_unlock_spin(&sched_lock); 1545 1546 umtxq_lock(&uq->uq_key); 1547 1548 return (error); 1549 } 1550 1551 /* 1552 * Add reference count for a PI mutex. 1553 */ 1554 static void 1555 umtx_pi_ref(struct umtx_pi *pi) 1556 { 1557 struct umtxq_chain *uc; 1558 1559 uc = umtxq_getchain(&pi->pi_key); 1560 UMTXQ_LOCKED_ASSERT(uc); 1561 pi->pi_refcount++; 1562 } 1563 1564 /* 1565 * Decrease reference count for a PI mutex, if the counter 1566 * is decreased to zero, its memory space is freed. 1567 */ 1568 static void 1569 umtx_pi_unref(struct umtx_pi *pi) 1570 { 1571 struct umtxq_chain *uc; 1572 int free = 0; 1573 1574 uc = umtxq_getchain(&pi->pi_key); 1575 UMTXQ_LOCKED_ASSERT(uc); 1576 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1577 if (--pi->pi_refcount == 0) { 1578 mtx_lock_spin(&sched_lock); 1579 if (pi->pi_owner != NULL) { 1580 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1581 pi, pi_link); 1582 pi->pi_owner = NULL; 1583 } 1584 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1585 ("blocked queue not empty")); 1586 mtx_unlock_spin(&sched_lock); 1587 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1588 free = 1; 1589 } 1590 if (free) 1591 umtx_pi_free(pi); 1592 } 1593 1594 /* 1595 * Find a PI mutex in hash table. 1596 */ 1597 static struct umtx_pi * 1598 umtx_pi_lookup(struct umtx_key *key) 1599 { 1600 struct umtxq_chain *uc; 1601 struct umtx_pi *pi; 1602 1603 uc = umtxq_getchain(key); 1604 UMTXQ_LOCKED_ASSERT(uc); 1605 1606 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1607 if (umtx_key_match(&pi->pi_key, key)) { 1608 return (pi); 1609 } 1610 } 1611 return (NULL); 1612 } 1613 1614 /* 1615 * Insert a PI mutex into hash table. 1616 */ 1617 static inline void 1618 umtx_pi_insert(struct umtx_pi *pi) 1619 { 1620 struct umtxq_chain *uc; 1621 1622 uc = umtxq_getchain(&pi->pi_key); 1623 UMTXQ_LOCKED_ASSERT(uc); 1624 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1625 } 1626 1627 /* 1628 * Lock a PI mutex. 1629 */ 1630 static int 1631 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1632 int try) 1633 { 1634 struct umtx_q *uq; 1635 struct umtx_pi *pi, *new_pi; 1636 uint32_t id, owner, old; 1637 int error; 1638 1639 id = td->td_tid; 1640 uq = td->td_umtxq; 1641 1642 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1643 &uq->uq_key)) != 0) 1644 return (error); 1645 umtxq_lock(&uq->uq_key); 1646 pi = umtx_pi_lookup(&uq->uq_key); 1647 if (pi == NULL) { 1648 new_pi = umtx_pi_alloc(M_NOWAIT); 1649 if (new_pi == NULL) { 1650 umtxq_unlock(&uq->uq_key); 1651 new_pi = umtx_pi_alloc(M_WAITOK); 1652 new_pi->pi_key = uq->uq_key; 1653 umtxq_lock(&uq->uq_key); 1654 pi = umtx_pi_lookup(&uq->uq_key); 1655 if (pi != NULL) { 1656 umtx_pi_free(new_pi); 1657 new_pi = NULL; 1658 } 1659 } 1660 if (new_pi != NULL) { 1661 new_pi->pi_key = uq->uq_key; 1662 umtx_pi_insert(new_pi); 1663 pi = new_pi; 1664 } 1665 } 1666 umtx_pi_ref(pi); 1667 umtxq_unlock(&uq->uq_key); 1668 1669 /* 1670 * Care must be exercised when dealing with umtx structure. It 1671 * can fault on any access. 1672 */ 1673 for (;;) { 1674 /* 1675 * Try the uncontested case. This should be done in userland. 1676 */ 1677 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1678 1679 /* The acquire succeeded. */ 1680 if (owner == UMUTEX_UNOWNED) { 1681 error = 0; 1682 break; 1683 } 1684 1685 /* The address was invalid. */ 1686 if (owner == -1) { 1687 error = EFAULT; 1688 break; 1689 } 1690 1691 /* If no one owns it but it is contested try to acquire it. */ 1692 if (owner == UMUTEX_CONTESTED) { 1693 owner = casuword32(&m->m_owner, 1694 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1695 1696 if (owner == UMUTEX_CONTESTED) { 1697 umtxq_lock(&uq->uq_key); 1698 error = umtx_pi_claim(pi, td); 1699 umtxq_unlock(&uq->uq_key); 1700 break; 1701 } 1702 1703 /* The address was invalid. */ 1704 if (owner == -1) { 1705 error = EFAULT; 1706 break; 1707 } 1708 1709 /* If this failed the lock has changed, restart. */ 1710 continue; 1711 } 1712 1713 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1714 (owner & ~UMUTEX_CONTESTED) == id) { 1715 error = EDEADLK; 1716 break; 1717 } 1718 1719 if (try != 0) { 1720 error = EBUSY; 1721 break; 1722 } 1723 1724 /* 1725 * If we caught a signal, we have retried and now 1726 * exit immediately. 1727 */ 1728 if (error != 0) 1729 break; 1730 1731 umtxq_lock(&uq->uq_key); 1732 umtxq_busy(&uq->uq_key); 1733 umtxq_unlock(&uq->uq_key); 1734 1735 /* 1736 * Set the contested bit so that a release in user space 1737 * knows to use the system call for unlock. If this fails 1738 * either some one else has acquired the lock or it has been 1739 * released. 1740 */ 1741 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1742 1743 /* The address was invalid. */ 1744 if (old == -1) { 1745 umtxq_lock(&uq->uq_key); 1746 umtxq_unbusy(&uq->uq_key); 1747 umtxq_unlock(&uq->uq_key); 1748 error = EFAULT; 1749 break; 1750 } 1751 1752 umtxq_lock(&uq->uq_key); 1753 umtxq_unbusy(&uq->uq_key); 1754 /* 1755 * We set the contested bit, sleep. Otherwise the lock changed 1756 * and we need to retry or we lost a race to the thread 1757 * unlocking the umtx. 1758 */ 1759 if (old == owner) 1760 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1761 "umtxpi", timo); 1762 umtxq_unlock(&uq->uq_key); 1763 } 1764 1765 umtxq_lock(&uq->uq_key); 1766 umtx_pi_unref(pi); 1767 umtxq_unlock(&uq->uq_key); 1768 1769 umtx_key_release(&uq->uq_key); 1770 return (error); 1771 } 1772 1773 /* 1774 * Unlock a PI mutex. 1775 */ 1776 static int 1777 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1778 { 1779 struct umtx_key key; 1780 struct umtx_q *uq_first, *uq_first2, *uq_me; 1781 struct umtx_pi *pi, *pi2; 1782 uint32_t owner, old, id; 1783 int error; 1784 int count; 1785 int pri; 1786 1787 id = td->td_tid; 1788 /* 1789 * Make sure we own this mtx. 1790 */ 1791 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1792 if (owner == -1) 1793 return (EFAULT); 1794 1795 if ((owner & ~UMUTEX_CONTESTED) != id) 1796 return (EPERM); 1797 1798 /* This should be done in userland */ 1799 if ((owner & UMUTEX_CONTESTED) == 0) { 1800 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1801 if (old == -1) 1802 return (EFAULT); 1803 if (old == owner) 1804 return (0); 1805 owner = old; 1806 } 1807 1808 /* We should only ever be in here for contested locks */ 1809 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1810 &key)) != 0) 1811 return (error); 1812 1813 umtxq_lock(&key); 1814 umtxq_busy(&key); 1815 count = umtxq_count_pi(&key, &uq_first); 1816 if (uq_first != NULL) { 1817 pi = uq_first->uq_pi_blocked; 1818 if (pi->pi_owner != curthread) { 1819 umtxq_unbusy(&key); 1820 umtxq_unlock(&key); 1821 /* userland messed the mutex */ 1822 return (EPERM); 1823 } 1824 uq_me = curthread->td_umtxq; 1825 mtx_lock_spin(&sched_lock); 1826 pi->pi_owner = NULL; 1827 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1828 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1829 pri = PRI_MAX; 1830 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1831 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1832 if (uq_first2 != NULL) { 1833 if (pri > UPRI(uq_first2->uq_thread)) 1834 pri = UPRI(uq_first2->uq_thread); 1835 } 1836 } 1837 sched_unlend_user_prio(curthread, pri); 1838 mtx_unlock_spin(&sched_lock); 1839 } 1840 umtxq_unlock(&key); 1841 1842 /* 1843 * When unlocking the umtx, it must be marked as unowned if 1844 * there is zero or one thread only waiting for it. 1845 * Otherwise, it must be marked as contested. 1846 */ 1847 old = casuword32(&m->m_owner, owner, 1848 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1849 1850 umtxq_lock(&key); 1851 if (uq_first != NULL) 1852 umtxq_signal_thread(uq_first); 1853 umtxq_unbusy(&key); 1854 umtxq_unlock(&key); 1855 umtx_key_release(&key); 1856 if (old == -1) 1857 return (EFAULT); 1858 if (old != owner) 1859 return (EINVAL); 1860 return (0); 1861 } 1862 1863 /* 1864 * Lock a PP mutex. 1865 */ 1866 static int 1867 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1868 int try) 1869 { 1870 struct umtx_q *uq, *uq2; 1871 struct umtx_pi *pi; 1872 uint32_t ceiling; 1873 uint32_t owner, id; 1874 int error, pri, old_inherited_pri, su; 1875 1876 id = td->td_tid; 1877 uq = td->td_umtxq; 1878 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1879 &uq->uq_key)) != 0) 1880 return (error); 1881 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1882 for (;;) { 1883 old_inherited_pri = uq->uq_inherited_pri; 1884 umtxq_lock(&uq->uq_key); 1885 umtxq_busy(&uq->uq_key); 1886 umtxq_unlock(&uq->uq_key); 1887 1888 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1889 if (ceiling > RTP_PRIO_MAX) { 1890 error = EINVAL; 1891 goto out; 1892 } 1893 1894 mtx_lock_spin(&sched_lock); 1895 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1896 mtx_unlock_spin(&sched_lock); 1897 error = EINVAL; 1898 goto out; 1899 } 1900 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1901 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1902 if (uq->uq_inherited_pri < UPRI(td)) 1903 sched_lend_user_prio(td, uq->uq_inherited_pri); 1904 } 1905 mtx_unlock_spin(&sched_lock); 1906 1907 owner = casuword32(&m->m_owner, 1908 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1909 1910 if (owner == UMUTEX_CONTESTED) { 1911 error = 0; 1912 break; 1913 } 1914 1915 /* The address was invalid. */ 1916 if (owner == -1) { 1917 error = EFAULT; 1918 break; 1919 } 1920 1921 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1922 (owner & ~UMUTEX_CONTESTED) == id) { 1923 error = EDEADLK; 1924 break; 1925 } 1926 1927 if (try != 0) { 1928 error = EBUSY; 1929 break; 1930 } 1931 1932 /* 1933 * If we caught a signal, we have retried and now 1934 * exit immediately. 1935 */ 1936 if (error != 0) 1937 break; 1938 1939 umtxq_lock(&uq->uq_key); 1940 umtxq_insert(uq); 1941 umtxq_unbusy(&uq->uq_key); 1942 error = umtxq_sleep(uq, "umtxpp", timo); 1943 umtxq_remove(uq); 1944 umtxq_unlock(&uq->uq_key); 1945 1946 mtx_lock_spin(&sched_lock); 1947 uq->uq_inherited_pri = old_inherited_pri; 1948 pri = PRI_MAX; 1949 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1950 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1951 if (uq2 != NULL) { 1952 if (pri > UPRI(uq2->uq_thread)) 1953 pri = UPRI(uq2->uq_thread); 1954 } 1955 } 1956 if (pri > uq->uq_inherited_pri) 1957 pri = uq->uq_inherited_pri; 1958 sched_unlend_user_prio(td, pri); 1959 mtx_unlock_spin(&sched_lock); 1960 } 1961 1962 if (error != 0) { 1963 mtx_lock_spin(&sched_lock); 1964 uq->uq_inherited_pri = old_inherited_pri; 1965 pri = PRI_MAX; 1966 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1967 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1968 if (uq2 != NULL) { 1969 if (pri > UPRI(uq2->uq_thread)) 1970 pri = UPRI(uq2->uq_thread); 1971 } 1972 } 1973 if (pri > uq->uq_inherited_pri) 1974 pri = uq->uq_inherited_pri; 1975 sched_unlend_user_prio(td, pri); 1976 mtx_unlock_spin(&sched_lock); 1977 } 1978 1979 out: 1980 umtxq_lock(&uq->uq_key); 1981 umtxq_unbusy(&uq->uq_key); 1982 umtxq_unlock(&uq->uq_key); 1983 umtx_key_release(&uq->uq_key); 1984 return (error); 1985 } 1986 1987 /* 1988 * Unlock a PP mutex. 1989 */ 1990 static int 1991 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1992 { 1993 struct umtx_key key; 1994 struct umtx_q *uq, *uq2; 1995 struct umtx_pi *pi; 1996 uint32_t owner, id; 1997 uint32_t rceiling; 1998 int error, pri, new_inherited_pri, su; 1999 2000 id = td->td_tid; 2001 uq = td->td_umtxq; 2002 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2003 2004 /* 2005 * Make sure we own this mtx. 2006 */ 2007 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2008 if (owner == -1) 2009 return (EFAULT); 2010 2011 if ((owner & ~UMUTEX_CONTESTED) != id) 2012 return (EPERM); 2013 2014 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2015 if (error != 0) 2016 return (error); 2017 2018 if (rceiling == -1) 2019 new_inherited_pri = PRI_MAX; 2020 else { 2021 rceiling = RTP_PRIO_MAX - rceiling; 2022 if (rceiling > RTP_PRIO_MAX) 2023 return (EINVAL); 2024 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2025 } 2026 2027 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2028 &key)) != 0) 2029 return (error); 2030 umtxq_lock(&key); 2031 umtxq_busy(&key); 2032 umtxq_unlock(&key); 2033 /* 2034 * For priority protected mutex, always set unlocked state 2035 * to UMUTEX_CONTESTED, so that userland always enters kernel 2036 * to lock the mutex, it is necessary because thread priority 2037 * has to be adjusted for such mutex. 2038 */ 2039 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2040 UMUTEX_CONTESTED); 2041 2042 umtxq_lock(&key); 2043 if (error == 0) 2044 umtxq_signal(&key, 1); 2045 umtxq_unbusy(&key); 2046 umtxq_unlock(&key); 2047 2048 if (error == -1) 2049 error = EFAULT; 2050 else { 2051 mtx_lock_spin(&sched_lock); 2052 if (su != 0) 2053 uq->uq_inherited_pri = new_inherited_pri; 2054 pri = PRI_MAX; 2055 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2056 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2057 if (uq2 != NULL) { 2058 if (pri > UPRI(uq2->uq_thread)) 2059 pri = UPRI(uq2->uq_thread); 2060 } 2061 } 2062 if (pri > uq->uq_inherited_pri) 2063 pri = uq->uq_inherited_pri; 2064 sched_unlend_user_prio(td, pri); 2065 mtx_unlock_spin(&sched_lock); 2066 } 2067 umtx_key_release(&key); 2068 return (error); 2069 } 2070 2071 static int 2072 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2073 uint32_t *old_ceiling) 2074 { 2075 struct umtx_q *uq; 2076 uint32_t save_ceiling; 2077 uint32_t owner, id; 2078 uint32_t flags; 2079 int error; 2080 2081 flags = fuword32(&m->m_flags); 2082 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2083 return (EINVAL); 2084 if (ceiling > RTP_PRIO_MAX) 2085 return (EINVAL); 2086 id = td->td_tid; 2087 uq = td->td_umtxq; 2088 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2089 &uq->uq_key)) != 0) 2090 return (error); 2091 for (;;) { 2092 umtxq_lock(&uq->uq_key); 2093 umtxq_busy(&uq->uq_key); 2094 umtxq_unlock(&uq->uq_key); 2095 2096 save_ceiling = fuword32(&m->m_ceilings[0]); 2097 2098 owner = casuword32(&m->m_owner, 2099 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2100 2101 if (owner == UMUTEX_CONTESTED) { 2102 suword32(&m->m_ceilings[0], ceiling); 2103 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2104 UMUTEX_CONTESTED); 2105 error = 0; 2106 break; 2107 } 2108 2109 /* The address was invalid. */ 2110 if (owner == -1) { 2111 error = EFAULT; 2112 break; 2113 } 2114 2115 if ((owner & ~UMUTEX_CONTESTED) == id) { 2116 suword32(&m->m_ceilings[0], ceiling); 2117 error = 0; 2118 break; 2119 } 2120 2121 /* 2122 * If we caught a signal, we have retried and now 2123 * exit immediately. 2124 */ 2125 if (error != 0) 2126 break; 2127 2128 /* 2129 * We set the contested bit, sleep. Otherwise the lock changed 2130 * and we need to retry or we lost a race to the thread 2131 * unlocking the umtx. 2132 */ 2133 umtxq_lock(&uq->uq_key); 2134 umtxq_insert(uq); 2135 umtxq_unbusy(&uq->uq_key); 2136 error = umtxq_sleep(uq, "umtxpp", 0); 2137 umtxq_remove(uq); 2138 umtxq_unlock(&uq->uq_key); 2139 } 2140 umtxq_lock(&uq->uq_key); 2141 if (error == 0) 2142 umtxq_signal(&uq->uq_key, INT_MAX); 2143 umtxq_unbusy(&uq->uq_key); 2144 umtxq_unlock(&uq->uq_key); 2145 umtx_key_release(&uq->uq_key); 2146 if (error == 0 && old_ceiling != NULL) 2147 suword32(old_ceiling, save_ceiling); 2148 return (error); 2149 } 2150 2151 static int 2152 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2153 int try) 2154 { 2155 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2156 case 0: 2157 return (_do_lock_normal(td, m, flags, timo, try)); 2158 case UMUTEX_PRIO_INHERIT: 2159 return (_do_lock_pi(td, m, flags, timo, try)); 2160 case UMUTEX_PRIO_PROTECT: 2161 return (_do_lock_pp(td, m, flags, timo, try)); 2162 } 2163 return (EINVAL); 2164 } 2165 2166 /* 2167 * Lock a userland POSIX mutex. 2168 */ 2169 static int 2170 do_lock_umutex(struct thread *td, struct umutex *m, 2171 struct timespec *timeout, int try) 2172 { 2173 struct timespec ts, ts2, ts3; 2174 struct timeval tv; 2175 uint32_t flags; 2176 int error; 2177 2178 flags = fuword32(&m->m_flags); 2179 if (flags == -1) 2180 return (EFAULT); 2181 2182 if (timeout == NULL) { 2183 error = _do_lock_umutex(td, m, flags, 0, try); 2184 /* Mutex locking is restarted if it is interrupted. */ 2185 if (error == EINTR) 2186 error = ERESTART; 2187 } else { 2188 getnanouptime(&ts); 2189 timespecadd(&ts, timeout); 2190 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2191 for (;;) { 2192 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 2193 if (error != ETIMEDOUT) 2194 break; 2195 getnanouptime(&ts2); 2196 if (timespeccmp(&ts2, &ts, >=)) { 2197 error = ETIMEDOUT; 2198 break; 2199 } 2200 ts3 = ts; 2201 timespecsub(&ts3, &ts2); 2202 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2203 } 2204 /* Timed-locking is not restarted. */ 2205 if (error == ERESTART) 2206 error = EINTR; 2207 } 2208 return (error); 2209 } 2210 2211 /* 2212 * Unlock a userland POSIX mutex. 2213 */ 2214 static int 2215 do_unlock_umutex(struct thread *td, struct umutex *m) 2216 { 2217 uint32_t flags; 2218 2219 flags = fuword32(&m->m_flags); 2220 if (flags == -1) 2221 return (EFAULT); 2222 2223 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2224 case 0: 2225 return (do_unlock_normal(td, m, flags)); 2226 case UMUTEX_PRIO_INHERIT: 2227 return (do_unlock_pi(td, m, flags)); 2228 case UMUTEX_PRIO_PROTECT: 2229 return (do_unlock_pp(td, m, flags)); 2230 } 2231 2232 return (EINVAL); 2233 } 2234 2235 static int 2236 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2237 struct timespec *timeout, u_long wflags) 2238 { 2239 struct umtx_q *uq; 2240 struct timeval tv; 2241 struct timespec cts, ets, tts; 2242 uint32_t flags; 2243 int error; 2244 2245 uq = td->td_umtxq; 2246 flags = fuword32(&cv->c_flags); 2247 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2248 if (error != 0) 2249 return (error); 2250 umtxq_lock(&uq->uq_key); 2251 umtxq_busy(&uq->uq_key); 2252 umtxq_insert(uq); 2253 umtxq_unlock(&uq->uq_key); 2254 2255 /* 2256 * The magic thing is we should set c_has_waiters to 1 before 2257 * releasing user mutex. 2258 */ 2259 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2260 2261 umtxq_lock(&uq->uq_key); 2262 umtxq_unbusy(&uq->uq_key); 2263 umtxq_unlock(&uq->uq_key); 2264 2265 error = do_unlock_umutex(td, m); 2266 2267 umtxq_lock(&uq->uq_key); 2268 if (error == 0) { 2269 if ((wflags & UMTX_CHECK_UNPARKING) && 2270 (td->td_pflags & TDP_WAKEUP)) { 2271 td->td_pflags &= ~TDP_WAKEUP; 2272 error = EINTR; 2273 } else if (timeout == NULL) { 2274 error = umtxq_sleep(uq, "ucond", 0); 2275 } else { 2276 getnanouptime(&ets); 2277 timespecadd(&ets, timeout); 2278 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2279 for (;;) { 2280 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2281 if (error != ETIMEDOUT) 2282 break; 2283 getnanouptime(&cts); 2284 if (timespeccmp(&cts, &ets, >=)) { 2285 error = ETIMEDOUT; 2286 break; 2287 } 2288 tts = ets; 2289 timespecsub(&tts, &cts); 2290 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2291 } 2292 } 2293 } 2294 2295 if (error != 0) { 2296 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2297 /* 2298 * If we concurrently got do_cv_signal()d 2299 * and we got an error or UNIX signals or a timeout, 2300 * then, perform another umtxq_signal to avoid 2301 * consuming the wakeup. This may cause supurious 2302 * wakeup for another thread which was just queued, 2303 * but SUSV3 explicitly allows supurious wakeup to 2304 * occur, and indeed a kernel based implementation 2305 * can not avoid it. 2306 */ 2307 if (!umtxq_signal(&uq->uq_key, 1)) 2308 error = 0; 2309 } 2310 if (error == ERESTART) 2311 error = EINTR; 2312 } 2313 umtxq_remove(uq); 2314 umtxq_unlock(&uq->uq_key); 2315 umtx_key_release(&uq->uq_key); 2316 return (error); 2317 } 2318 2319 /* 2320 * Signal a userland condition variable. 2321 */ 2322 static int 2323 do_cv_signal(struct thread *td, struct ucond *cv) 2324 { 2325 struct umtx_key key; 2326 int error, cnt, nwake; 2327 uint32_t flags; 2328 2329 flags = fuword32(&cv->c_flags); 2330 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2331 return (error); 2332 umtxq_lock(&key); 2333 umtxq_busy(&key); 2334 cnt = umtxq_count(&key); 2335 nwake = umtxq_signal(&key, 1); 2336 if (cnt <= nwake) { 2337 umtxq_unlock(&key); 2338 error = suword32( 2339 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2340 umtxq_lock(&key); 2341 } 2342 umtxq_unbusy(&key); 2343 umtxq_unlock(&key); 2344 umtx_key_release(&key); 2345 return (error); 2346 } 2347 2348 static int 2349 do_cv_broadcast(struct thread *td, struct ucond *cv) 2350 { 2351 struct umtx_key key; 2352 int error; 2353 uint32_t flags; 2354 2355 flags = fuword32(&cv->c_flags); 2356 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2357 return (error); 2358 2359 umtxq_lock(&key); 2360 umtxq_busy(&key); 2361 umtxq_signal(&key, INT_MAX); 2362 umtxq_unlock(&key); 2363 2364 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2365 2366 umtxq_lock(&key); 2367 umtxq_unbusy(&key); 2368 umtxq_unlock(&key); 2369 2370 umtx_key_release(&key); 2371 return (error); 2372 } 2373 2374 int 2375 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2376 /* struct umtx *umtx */ 2377 { 2378 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2379 } 2380 2381 int 2382 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2383 /* struct umtx *umtx */ 2384 { 2385 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2386 } 2387 2388 static int 2389 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2390 { 2391 struct timespec *ts, timeout; 2392 int error; 2393 2394 /* Allow a null timespec (wait forever). */ 2395 if (uap->uaddr2 == NULL) 2396 ts = NULL; 2397 else { 2398 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2399 if (error != 0) 2400 return (error); 2401 if (timeout.tv_nsec >= 1000000000 || 2402 timeout.tv_nsec < 0) { 2403 return (EINVAL); 2404 } 2405 ts = &timeout; 2406 } 2407 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2408 } 2409 2410 static int 2411 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2412 { 2413 return (do_unlock_umtx(td, uap->obj, uap->val)); 2414 } 2415 2416 static int 2417 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2418 { 2419 struct timespec *ts, timeout; 2420 int error; 2421 2422 if (uap->uaddr2 == NULL) 2423 ts = NULL; 2424 else { 2425 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2426 if (error != 0) 2427 return (error); 2428 if (timeout.tv_nsec >= 1000000000 || 2429 timeout.tv_nsec < 0) 2430 return (EINVAL); 2431 ts = &timeout; 2432 } 2433 return do_wait(td, uap->obj, uap->val, ts, 0); 2434 } 2435 2436 static int 2437 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2438 { 2439 return (kern_umtx_wake(td, uap->obj, uap->val)); 2440 } 2441 2442 static int 2443 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2444 { 2445 struct timespec *ts, timeout; 2446 int error; 2447 2448 /* Allow a null timespec (wait forever). */ 2449 if (uap->uaddr2 == NULL) 2450 ts = NULL; 2451 else { 2452 error = copyin(uap->uaddr2, &timeout, 2453 sizeof(timeout)); 2454 if (error != 0) 2455 return (error); 2456 if (timeout.tv_nsec >= 1000000000 || 2457 timeout.tv_nsec < 0) { 2458 return (EINVAL); 2459 } 2460 ts = &timeout; 2461 } 2462 return do_lock_umutex(td, uap->obj, ts, 0); 2463 } 2464 2465 static int 2466 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2467 { 2468 return do_lock_umutex(td, uap->obj, NULL, 1); 2469 } 2470 2471 static int 2472 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2473 { 2474 return do_unlock_umutex(td, uap->obj); 2475 } 2476 2477 static int 2478 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2479 { 2480 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2481 } 2482 2483 static int 2484 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2485 { 2486 struct timespec *ts, timeout; 2487 int error; 2488 2489 /* Allow a null timespec (wait forever). */ 2490 if (uap->uaddr2 == NULL) 2491 ts = NULL; 2492 else { 2493 error = copyin(uap->uaddr2, &timeout, 2494 sizeof(timeout)); 2495 if (error != 0) 2496 return (error); 2497 if (timeout.tv_nsec >= 1000000000 || 2498 timeout.tv_nsec < 0) { 2499 return (EINVAL); 2500 } 2501 ts = &timeout; 2502 } 2503 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2504 } 2505 2506 static int 2507 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2508 { 2509 return do_cv_signal(td, uap->obj); 2510 } 2511 2512 static int 2513 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2514 { 2515 return do_cv_broadcast(td, uap->obj); 2516 } 2517 2518 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 2519 2520 static _umtx_op_func op_table[] = { 2521 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 2522 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 2523 __umtx_op_wait, /* UMTX_OP_WAIT */ 2524 __umtx_op_wake, /* UMTX_OP_WAKE */ 2525 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 2526 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2527 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2528 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2529 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 2530 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2531 __umtx_op_cv_broadcast /* UMTX_OP_CV_BROADCAST */ 2532 }; 2533 2534 int 2535 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 2536 { 2537 if ((unsigned)uap->op < UMTX_OP_MAX) 2538 return (*op_table[uap->op])(td, uap); 2539 return (EINVAL); 2540 } 2541 2542 #ifdef COMPAT_IA32 2543 2544 int 2545 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 2546 /* struct umtx *umtx */ 2547 { 2548 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 2549 } 2550 2551 int 2552 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 2553 /* struct umtx *umtx */ 2554 { 2555 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 2556 } 2557 2558 struct timespec32 { 2559 u_int32_t tv_sec; 2560 u_int32_t tv_nsec; 2561 }; 2562 2563 static inline int 2564 copyin_timeout32(void *addr, struct timespec *tsp) 2565 { 2566 struct timespec32 ts32; 2567 int error; 2568 2569 error = copyin(addr, &ts32, sizeof(struct timespec32)); 2570 if (error == 0) { 2571 tsp->tv_sec = ts32.tv_sec; 2572 tsp->tv_nsec = ts32.tv_nsec; 2573 } 2574 return (error); 2575 } 2576 2577 static int 2578 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2579 { 2580 struct timespec *ts, timeout; 2581 int error; 2582 2583 /* Allow a null timespec (wait forever). */ 2584 if (uap->uaddr2 == NULL) 2585 ts = NULL; 2586 else { 2587 error = copyin_timeout32(uap->uaddr2, &timeout); 2588 if (error != 0) 2589 return (error); 2590 if (timeout.tv_nsec >= 1000000000 || 2591 timeout.tv_nsec < 0) { 2592 return (EINVAL); 2593 } 2594 ts = &timeout; 2595 } 2596 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 2597 } 2598 2599 static int 2600 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2601 { 2602 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 2603 } 2604 2605 static int 2606 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2607 { 2608 struct timespec *ts, timeout; 2609 int error; 2610 2611 if (uap->uaddr2 == NULL) 2612 ts = NULL; 2613 else { 2614 error = copyin_timeout32(uap->uaddr2, &timeout); 2615 if (error != 0) 2616 return (error); 2617 if (timeout.tv_nsec >= 1000000000 || 2618 timeout.tv_nsec < 0) 2619 return (EINVAL); 2620 ts = &timeout; 2621 } 2622 return do_wait(td, uap->obj, uap->val, ts, 1); 2623 } 2624 2625 static int 2626 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 2627 { 2628 struct timespec *ts, timeout; 2629 int error; 2630 2631 /* Allow a null timespec (wait forever). */ 2632 if (uap->uaddr2 == NULL) 2633 ts = NULL; 2634 else { 2635 error = copyin_timeout32(uap->uaddr2, &timeout); 2636 if (error != 0) 2637 return (error); 2638 if (timeout.tv_nsec >= 1000000000 || 2639 timeout.tv_nsec < 0) 2640 return (EINVAL); 2641 ts = &timeout; 2642 } 2643 return do_lock_umutex(td, uap->obj, ts, 0); 2644 } 2645 2646 static int 2647 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2648 { 2649 struct timespec *ts, timeout; 2650 int error; 2651 2652 /* Allow a null timespec (wait forever). */ 2653 if (uap->uaddr2 == NULL) 2654 ts = NULL; 2655 else { 2656 error = copyin_timeout32(uap->uaddr2, &timeout); 2657 if (error != 0) 2658 return (error); 2659 if (timeout.tv_nsec >= 1000000000 || 2660 timeout.tv_nsec < 0) 2661 return (EINVAL); 2662 ts = &timeout; 2663 } 2664 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2665 } 2666 2667 static _umtx_op_func op_table_compat32[] = { 2668 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 2669 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 2670 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 2671 __umtx_op_wake, /* UMTX_OP_WAKE */ 2672 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2673 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 2674 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2675 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2676 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 2677 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2678 __umtx_op_cv_broadcast /* UMTX_OP_CV_BROADCAST */ 2679 }; 2680 2681 int 2682 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 2683 { 2684 if ((unsigned)uap->op < UMTX_OP_MAX) 2685 return (*op_table_compat32[uap->op])(td, 2686 (struct _umtx_op_args *)uap); 2687 return (EINVAL); 2688 } 2689 #endif 2690 2691 void 2692 umtx_thread_init(struct thread *td) 2693 { 2694 td->td_umtxq = umtxq_alloc(); 2695 td->td_umtxq->uq_thread = td; 2696 } 2697 2698 void 2699 umtx_thread_fini(struct thread *td) 2700 { 2701 umtxq_free(td->td_umtxq); 2702 } 2703 2704 /* 2705 * It will be called when new thread is created, e.g fork(). 2706 */ 2707 void 2708 umtx_thread_alloc(struct thread *td) 2709 { 2710 struct umtx_q *uq; 2711 2712 uq = td->td_umtxq; 2713 uq->uq_inherited_pri = PRI_MAX; 2714 2715 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 2716 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 2717 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 2718 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 2719 } 2720 2721 /* 2722 * exec() hook. 2723 */ 2724 static void 2725 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 2726 struct image_params *imgp __unused) 2727 { 2728 umtx_thread_cleanup(curthread); 2729 } 2730 2731 /* 2732 * thread_exit() hook. 2733 */ 2734 void 2735 umtx_thread_exit(struct thread *td) 2736 { 2737 umtx_thread_cleanup(td); 2738 } 2739 2740 /* 2741 * clean up umtx data. 2742 */ 2743 static void 2744 umtx_thread_cleanup(struct thread *td) 2745 { 2746 struct umtx_q *uq; 2747 struct umtx_pi *pi; 2748 2749 if ((uq = td->td_umtxq) == NULL) 2750 return; 2751 2752 mtx_lock_spin(&sched_lock); 2753 uq->uq_inherited_pri = PRI_MAX; 2754 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 2755 pi->pi_owner = NULL; 2756 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 2757 } 2758 td->td_flags &= ~TDF_UBORROWING; 2759 mtx_unlock_spin(&sched_lock); 2760 } 2761