1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/mutex.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/sched.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/sysent.h> 44 #include <sys/systm.h> 45 #include <sys/sysproto.h> 46 #include <sys/eventhandler.h> 47 #include <sys/umtx.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_map.h> 53 #include <vm/vm_object.h> 54 55 #include <machine/cpu.h> 56 57 #ifdef COMPAT_IA32 58 #include <compat/freebsd32/freebsd32_proto.h> 59 #endif 60 61 #define TYPE_SIMPLE_LOCK 0 62 #define TYPE_SIMPLE_WAIT 1 63 #define TYPE_NORMAL_UMUTEX 2 64 #define TYPE_PI_UMUTEX 3 65 #define TYPE_PP_UMUTEX 4 66 #define TYPE_CV 5 67 68 /* Key to represent a unique userland synchronous object */ 69 struct umtx_key { 70 int hash; 71 int type; 72 int shared; 73 union { 74 struct { 75 vm_object_t object; 76 uintptr_t offset; 77 } shared; 78 struct { 79 struct vmspace *vs; 80 uintptr_t addr; 81 } private; 82 struct { 83 void *a; 84 uintptr_t b; 85 } both; 86 } info; 87 }; 88 89 /* Priority inheritance mutex info. */ 90 struct umtx_pi { 91 /* Owner thread */ 92 struct thread *pi_owner; 93 94 /* Reference count */ 95 int pi_refcount; 96 97 /* List entry to link umtx holding by thread */ 98 TAILQ_ENTRY(umtx_pi) pi_link; 99 100 /* List entry in hash */ 101 TAILQ_ENTRY(umtx_pi) pi_hashlink; 102 103 /* List for waiters */ 104 TAILQ_HEAD(,umtx_q) pi_blocked; 105 106 /* Identify a userland lock object */ 107 struct umtx_key pi_key; 108 }; 109 110 /* A userland synchronous object user. */ 111 struct umtx_q { 112 /* Linked list for the hash. */ 113 TAILQ_ENTRY(umtx_q) uq_link; 114 115 /* Umtx key. */ 116 struct umtx_key uq_key; 117 118 /* Umtx flags. */ 119 int uq_flags; 120 #define UQF_UMTXQ 0x0001 121 122 /* The thread waits on. */ 123 struct thread *uq_thread; 124 125 /* 126 * Blocked on PI mutex. read can use chain lock 127 * or umtx_lock, write must have both chain lock and 128 * umtx_lock being hold. 129 */ 130 struct umtx_pi *uq_pi_blocked; 131 132 /* On blocked list */ 133 TAILQ_ENTRY(umtx_q) uq_lockq; 134 135 /* Thread contending with us */ 136 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 137 138 /* Inherited priority from PP mutex */ 139 u_char uq_inherited_pri; 140 }; 141 142 TAILQ_HEAD(umtxq_head, umtx_q); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_head uc_queue; 151 152 /* Busy flag */ 153 char uc_busy; 154 155 /* Chain lock waiters */ 156 int uc_waiters; 157 158 /* All PI in the list */ 159 TAILQ_HEAD(,umtx_pi) uc_pi_list; 160 }; 161 162 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 163 164 /* 165 * Don't propagate time-sharing priority, there is a security reason, 166 * a user can simply introduce PI-mutex, let thread A lock the mutex, 167 * and let another thread B block on the mutex, because B is 168 * sleeping, its priority will be boosted, this causes A's priority to 169 * be boosted via priority propagating too and will never be lowered even 170 * if it is using 100%CPU, this is unfair to other processes. 171 */ 172 173 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 174 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 175 PRI_MAX_TIMESHARE : (td)->td_user_pri) 176 177 #define GOLDEN_RATIO_PRIME 2654404609U 178 #define UMTX_CHAINS 128 179 #define UMTX_SHIFTS (__WORD_BIT - 7) 180 181 #define THREAD_SHARE 0 182 #define PROCESS_SHARE 1 183 #define AUTO_SHARE 2 184 185 #define GET_SHARE(flags) \ 186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 187 188 static uma_zone_t umtx_pi_zone; 189 static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 190 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 191 static int umtx_pi_allocated; 192 193 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 194 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 195 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 196 197 static void umtxq_sysinit(void *); 198 static void umtxq_hash(struct umtx_key *key); 199 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 200 static void umtxq_lock(struct umtx_key *key); 201 static void umtxq_unlock(struct umtx_key *key); 202 static void umtxq_busy(struct umtx_key *key); 203 static void umtxq_unbusy(struct umtx_key *key); 204 static void umtxq_insert(struct umtx_q *uq); 205 static void umtxq_remove(struct umtx_q *uq); 206 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 207 static int umtxq_count(struct umtx_key *key); 208 static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 209 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 210 static int umtx_key_get(void *addr, int type, int share, 211 struct umtx_key *key); 212 static void umtx_key_release(struct umtx_key *key); 213 static struct umtx_pi *umtx_pi_alloc(int); 214 static void umtx_pi_free(struct umtx_pi *pi); 215 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 216 static void umtx_thread_cleanup(struct thread *td); 217 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 218 struct image_params *imgp __unused); 219 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 220 221 static struct mtx umtx_lock; 222 223 static void 224 umtxq_sysinit(void *arg __unused) 225 { 226 int i; 227 228 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 229 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 230 for (i = 0; i < UMTX_CHAINS; ++i) { 231 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL, 232 MTX_DEF | MTX_DUPOK); 233 TAILQ_INIT(&umtxq_chains[i].uc_queue); 234 TAILQ_INIT(&umtxq_chains[i].uc_pi_list); 235 umtxq_chains[i].uc_busy = 0; 236 umtxq_chains[i].uc_waiters = 0; 237 } 238 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 239 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 240 EVENTHANDLER_PRI_ANY); 241 } 242 243 struct umtx_q * 244 umtxq_alloc(void) 245 { 246 struct umtx_q *uq; 247 248 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 249 TAILQ_INIT(&uq->uq_pi_contested); 250 uq->uq_inherited_pri = PRI_MAX; 251 return (uq); 252 } 253 254 void 255 umtxq_free(struct umtx_q *uq) 256 { 257 free(uq, M_UMTX); 258 } 259 260 static inline void 261 umtxq_hash(struct umtx_key *key) 262 { 263 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 264 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 265 } 266 267 static inline int 268 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 269 { 270 return (k1->type == k2->type && 271 k1->info.both.a == k2->info.both.a && 272 k1->info.both.b == k2->info.both.b); 273 } 274 275 static inline struct umtxq_chain * 276 umtxq_getchain(struct umtx_key *key) 277 { 278 return (&umtxq_chains[key->hash]); 279 } 280 281 /* 282 * Set chain to busy state when following operation 283 * may be blocked (kernel mutex can not be used). 284 */ 285 static inline void 286 umtxq_busy(struct umtx_key *key) 287 { 288 struct umtxq_chain *uc; 289 290 uc = umtxq_getchain(key); 291 mtx_assert(&uc->uc_lock, MA_OWNED); 292 while (uc->uc_busy != 0) { 293 uc->uc_waiters++; 294 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 295 uc->uc_waiters--; 296 } 297 uc->uc_busy = 1; 298 } 299 300 /* 301 * Unbusy a chain. 302 */ 303 static inline void 304 umtxq_unbusy(struct umtx_key *key) 305 { 306 struct umtxq_chain *uc; 307 308 uc = umtxq_getchain(key); 309 mtx_assert(&uc->uc_lock, MA_OWNED); 310 KASSERT(uc->uc_busy != 0, ("not busy")); 311 uc->uc_busy = 0; 312 if (uc->uc_waiters) 313 wakeup_one(uc); 314 } 315 316 /* 317 * Lock a chain. 318 */ 319 static inline void 320 umtxq_lock(struct umtx_key *key) 321 { 322 struct umtxq_chain *uc; 323 324 uc = umtxq_getchain(key); 325 mtx_lock(&uc->uc_lock); 326 } 327 328 /* 329 * Unlock a chain. 330 */ 331 static inline void 332 umtxq_unlock(struct umtx_key *key) 333 { 334 struct umtxq_chain *uc; 335 336 uc = umtxq_getchain(key); 337 mtx_unlock(&uc->uc_lock); 338 } 339 340 /* 341 * Insert a thread onto the umtx queue. 342 */ 343 static inline void 344 umtxq_insert(struct umtx_q *uq) 345 { 346 struct umtxq_chain *uc; 347 348 uc = umtxq_getchain(&uq->uq_key); 349 UMTXQ_LOCKED_ASSERT(uc); 350 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link); 351 uq->uq_flags |= UQF_UMTXQ; 352 } 353 354 /* 355 * Remove thread from the umtx queue. 356 */ 357 static inline void 358 umtxq_remove(struct umtx_q *uq) 359 { 360 struct umtxq_chain *uc; 361 362 uc = umtxq_getchain(&uq->uq_key); 363 UMTXQ_LOCKED_ASSERT(uc); 364 if (uq->uq_flags & UQF_UMTXQ) { 365 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link); 366 uq->uq_flags &= ~UQF_UMTXQ; 367 } 368 } 369 370 /* 371 * Check if there are multiple waiters 372 */ 373 static int 374 umtxq_count(struct umtx_key *key) 375 { 376 struct umtxq_chain *uc; 377 struct umtx_q *uq; 378 int count = 0; 379 380 uc = umtxq_getchain(key); 381 UMTXQ_LOCKED_ASSERT(uc); 382 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 383 if (umtx_key_match(&uq->uq_key, key)) { 384 if (++count > 1) 385 break; 386 } 387 } 388 return (count); 389 } 390 391 /* 392 * Check if there are multiple PI waiters and returns first 393 * waiter. 394 */ 395 static int 396 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 397 { 398 struct umtxq_chain *uc; 399 struct umtx_q *uq; 400 int count = 0; 401 402 *first = NULL; 403 uc = umtxq_getchain(key); 404 UMTXQ_LOCKED_ASSERT(uc); 405 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 406 if (umtx_key_match(&uq->uq_key, key)) { 407 if (++count > 1) 408 break; 409 *first = uq; 410 } 411 } 412 return (count); 413 } 414 415 /* 416 * Wake up threads waiting on an userland object. 417 */ 418 static int 419 umtxq_signal(struct umtx_key *key, int n_wake) 420 { 421 struct umtxq_chain *uc; 422 struct umtx_q *uq, *next; 423 int ret; 424 425 ret = 0; 426 uc = umtxq_getchain(key); 427 UMTXQ_LOCKED_ASSERT(uc); 428 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) { 429 if (umtx_key_match(&uq->uq_key, key)) { 430 umtxq_remove(uq); 431 wakeup(uq); 432 if (++ret >= n_wake) 433 break; 434 } 435 } 436 return (ret); 437 } 438 439 /* 440 * Wake up specified thread. 441 */ 442 static inline void 443 umtxq_signal_thread(struct umtx_q *uq) 444 { 445 struct umtxq_chain *uc; 446 447 uc = umtxq_getchain(&uq->uq_key); 448 UMTXQ_LOCKED_ASSERT(uc); 449 umtxq_remove(uq); 450 wakeup(uq); 451 } 452 453 /* 454 * Put thread into sleep state, before sleeping, check if 455 * thread was removed from umtx queue. 456 */ 457 static inline int 458 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 459 { 460 struct umtxq_chain *uc; 461 int error; 462 463 uc = umtxq_getchain(&uq->uq_key); 464 UMTXQ_LOCKED_ASSERT(uc); 465 if (!(uq->uq_flags & UQF_UMTXQ)) 466 return (0); 467 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 468 if (error == EWOULDBLOCK) 469 error = ETIMEDOUT; 470 return (error); 471 } 472 473 /* 474 * Convert userspace address into unique logical address. 475 */ 476 static int 477 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 478 { 479 struct thread *td = curthread; 480 vm_map_t map; 481 vm_map_entry_t entry; 482 vm_pindex_t pindex; 483 vm_prot_t prot; 484 boolean_t wired; 485 486 key->type = type; 487 if (share == THREAD_SHARE) { 488 key->shared = 0; 489 key->info.private.vs = td->td_proc->p_vmspace; 490 key->info.private.addr = (uintptr_t)addr; 491 } else { 492 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 493 map = &td->td_proc->p_vmspace->vm_map; 494 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 495 &entry, &key->info.shared.object, &pindex, &prot, 496 &wired) != KERN_SUCCESS) { 497 return EFAULT; 498 } 499 500 if ((share == PROCESS_SHARE) || 501 (share == AUTO_SHARE && 502 VM_INHERIT_SHARE == entry->inheritance)) { 503 key->shared = 1; 504 key->info.shared.offset = entry->offset + entry->start - 505 (vm_offset_t)addr; 506 vm_object_reference(key->info.shared.object); 507 } else { 508 key->shared = 0; 509 key->info.private.vs = td->td_proc->p_vmspace; 510 key->info.private.addr = (uintptr_t)addr; 511 } 512 vm_map_lookup_done(map, entry); 513 } 514 515 umtxq_hash(key); 516 return (0); 517 } 518 519 /* 520 * Release key. 521 */ 522 static inline void 523 umtx_key_release(struct umtx_key *key) 524 { 525 if (key->shared) 526 vm_object_deallocate(key->info.shared.object); 527 } 528 529 /* 530 * Lock a umtx object. 531 */ 532 static int 533 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 534 { 535 struct umtx_q *uq; 536 u_long owner; 537 u_long old; 538 int error = 0; 539 540 uq = td->td_umtxq; 541 542 /* 543 * Care must be exercised when dealing with umtx structure. It 544 * can fault on any access. 545 */ 546 for (;;) { 547 /* 548 * Try the uncontested case. This should be done in userland. 549 */ 550 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 551 552 /* The acquire succeeded. */ 553 if (owner == UMTX_UNOWNED) 554 return (0); 555 556 /* The address was invalid. */ 557 if (owner == -1) 558 return (EFAULT); 559 560 /* If no one owns it but it is contested try to acquire it. */ 561 if (owner == UMTX_CONTESTED) { 562 owner = casuword(&umtx->u_owner, 563 UMTX_CONTESTED, id | UMTX_CONTESTED); 564 565 if (owner == UMTX_CONTESTED) 566 return (0); 567 568 /* The address was invalid. */ 569 if (owner == -1) 570 return (EFAULT); 571 572 /* If this failed the lock has changed, restart. */ 573 continue; 574 } 575 576 /* 577 * If we caught a signal, we have retried and now 578 * exit immediately. 579 */ 580 if (error != 0) 581 return (error); 582 583 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 584 AUTO_SHARE, &uq->uq_key)) != 0) 585 return (error); 586 587 umtxq_lock(&uq->uq_key); 588 umtxq_busy(&uq->uq_key); 589 umtxq_insert(uq); 590 umtxq_unbusy(&uq->uq_key); 591 umtxq_unlock(&uq->uq_key); 592 593 /* 594 * Set the contested bit so that a release in user space 595 * knows to use the system call for unlock. If this fails 596 * either some one else has acquired the lock or it has been 597 * released. 598 */ 599 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 600 601 /* The address was invalid. */ 602 if (old == -1) { 603 umtxq_lock(&uq->uq_key); 604 umtxq_remove(uq); 605 umtxq_unlock(&uq->uq_key); 606 umtx_key_release(&uq->uq_key); 607 return (EFAULT); 608 } 609 610 /* 611 * We set the contested bit, sleep. Otherwise the lock changed 612 * and we need to retry or we lost a race to the thread 613 * unlocking the umtx. 614 */ 615 umtxq_lock(&uq->uq_key); 616 if (old == owner) 617 error = umtxq_sleep(uq, "umtx", timo); 618 umtxq_remove(uq); 619 umtxq_unlock(&uq->uq_key); 620 umtx_key_release(&uq->uq_key); 621 } 622 623 return (0); 624 } 625 626 /* 627 * Lock a umtx object. 628 */ 629 static int 630 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 631 struct timespec *timeout) 632 { 633 struct timespec ts, ts2, ts3; 634 struct timeval tv; 635 int error; 636 637 if (timeout == NULL) { 638 error = _do_lock_umtx(td, umtx, id, 0); 639 /* Mutex locking is restarted if it is interrupted. */ 640 if (error == EINTR) 641 error = ERESTART; 642 } else { 643 getnanouptime(&ts); 644 timespecadd(&ts, timeout); 645 TIMESPEC_TO_TIMEVAL(&tv, timeout); 646 for (;;) { 647 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 648 if (error != ETIMEDOUT) 649 break; 650 getnanouptime(&ts2); 651 if (timespeccmp(&ts2, &ts, >=)) { 652 error = ETIMEDOUT; 653 break; 654 } 655 ts3 = ts; 656 timespecsub(&ts3, &ts2); 657 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 658 } 659 /* Timed-locking is not restarted. */ 660 if (error == ERESTART) 661 error = EINTR; 662 } 663 return (error); 664 } 665 666 /* 667 * Unlock a umtx object. 668 */ 669 static int 670 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 671 { 672 struct umtx_key key; 673 u_long owner; 674 u_long old; 675 int error; 676 int count; 677 678 /* 679 * Make sure we own this mtx. 680 */ 681 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 682 if (owner == -1) 683 return (EFAULT); 684 685 if ((owner & ~UMTX_CONTESTED) != id) 686 return (EPERM); 687 688 /* This should be done in userland */ 689 if ((owner & UMTX_CONTESTED) == 0) { 690 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 691 if (old == -1) 692 return (EFAULT); 693 if (old == owner) 694 return (0); 695 owner = old; 696 } 697 698 /* We should only ever be in here for contested locks */ 699 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 700 &key)) != 0) 701 return (error); 702 703 umtxq_lock(&key); 704 umtxq_busy(&key); 705 count = umtxq_count(&key); 706 umtxq_unlock(&key); 707 708 /* 709 * When unlocking the umtx, it must be marked as unowned if 710 * there is zero or one thread only waiting for it. 711 * Otherwise, it must be marked as contested. 712 */ 713 old = casuword(&umtx->u_owner, owner, 714 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 715 umtxq_lock(&key); 716 umtxq_signal(&key,1); 717 umtxq_unbusy(&key); 718 umtxq_unlock(&key); 719 umtx_key_release(&key); 720 if (old == -1) 721 return (EFAULT); 722 if (old != owner) 723 return (EINVAL); 724 return (0); 725 } 726 727 #ifdef COMPAT_IA32 728 729 /* 730 * Lock a umtx object. 731 */ 732 static int 733 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 734 { 735 struct umtx_q *uq; 736 uint32_t owner; 737 uint32_t old; 738 int error = 0; 739 740 uq = td->td_umtxq; 741 742 /* 743 * Care must be exercised when dealing with umtx structure. It 744 * can fault on any access. 745 */ 746 for (;;) { 747 /* 748 * Try the uncontested case. This should be done in userland. 749 */ 750 owner = casuword32(m, UMUTEX_UNOWNED, id); 751 752 /* The acquire succeeded. */ 753 if (owner == UMUTEX_UNOWNED) 754 return (0); 755 756 /* The address was invalid. */ 757 if (owner == -1) 758 return (EFAULT); 759 760 /* If no one owns it but it is contested try to acquire it. */ 761 if (owner == UMUTEX_CONTESTED) { 762 owner = casuword32(m, 763 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 764 if (owner == UMUTEX_CONTESTED) 765 return (0); 766 767 /* The address was invalid. */ 768 if (owner == -1) 769 return (EFAULT); 770 771 /* If this failed the lock has changed, restart. */ 772 continue; 773 } 774 775 /* 776 * If we caught a signal, we have retried and now 777 * exit immediately. 778 */ 779 if (error != 0) 780 return (error); 781 782 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 783 AUTO_SHARE, &uq->uq_key)) != 0) 784 return (error); 785 786 umtxq_lock(&uq->uq_key); 787 umtxq_busy(&uq->uq_key); 788 umtxq_insert(uq); 789 umtxq_unbusy(&uq->uq_key); 790 umtxq_unlock(&uq->uq_key); 791 792 /* 793 * Set the contested bit so that a release in user space 794 * knows to use the system call for unlock. If this fails 795 * either some one else has acquired the lock or it has been 796 * released. 797 */ 798 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 799 800 /* The address was invalid. */ 801 if (old == -1) { 802 umtxq_lock(&uq->uq_key); 803 umtxq_remove(uq); 804 umtxq_unlock(&uq->uq_key); 805 umtx_key_release(&uq->uq_key); 806 return (EFAULT); 807 } 808 809 /* 810 * We set the contested bit, sleep. Otherwise the lock changed 811 * and we need to retry or we lost a race to the thread 812 * unlocking the umtx. 813 */ 814 umtxq_lock(&uq->uq_key); 815 if (old == owner) 816 error = umtxq_sleep(uq, "umtx", timo); 817 umtxq_remove(uq); 818 umtxq_unlock(&uq->uq_key); 819 umtx_key_release(&uq->uq_key); 820 } 821 822 return (0); 823 } 824 825 /* 826 * Lock a umtx object. 827 */ 828 static int 829 do_lock_umtx32(struct thread *td, void *m, uint32_t id, 830 struct timespec *timeout) 831 { 832 struct timespec ts, ts2, ts3; 833 struct timeval tv; 834 int error; 835 836 if (timeout == NULL) { 837 error = _do_lock_umtx32(td, m, id, 0); 838 /* Mutex locking is restarted if it is interrupted. */ 839 if (error == EINTR) 840 error = ERESTART; 841 } else { 842 getnanouptime(&ts); 843 timespecadd(&ts, timeout); 844 TIMESPEC_TO_TIMEVAL(&tv, timeout); 845 for (;;) { 846 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 847 if (error != ETIMEDOUT) 848 break; 849 getnanouptime(&ts2); 850 if (timespeccmp(&ts2, &ts, >=)) { 851 error = ETIMEDOUT; 852 break; 853 } 854 ts3 = ts; 855 timespecsub(&ts3, &ts2); 856 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 857 } 858 /* Timed-locking is not restarted. */ 859 if (error == ERESTART) 860 error = EINTR; 861 } 862 return (error); 863 } 864 865 /* 866 * Unlock a umtx object. 867 */ 868 static int 869 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 870 { 871 struct umtx_key key; 872 uint32_t owner; 873 uint32_t old; 874 int error; 875 int count; 876 877 /* 878 * Make sure we own this mtx. 879 */ 880 owner = fuword32(m); 881 if (owner == -1) 882 return (EFAULT); 883 884 if ((owner & ~UMUTEX_CONTESTED) != id) 885 return (EPERM); 886 887 /* This should be done in userland */ 888 if ((owner & UMUTEX_CONTESTED) == 0) { 889 old = casuword32(m, owner, UMUTEX_UNOWNED); 890 if (old == -1) 891 return (EFAULT); 892 if (old == owner) 893 return (0); 894 owner = old; 895 } 896 897 /* We should only ever be in here for contested locks */ 898 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 899 &key)) != 0) 900 return (error); 901 902 umtxq_lock(&key); 903 umtxq_busy(&key); 904 count = umtxq_count(&key); 905 umtxq_unlock(&key); 906 907 /* 908 * When unlocking the umtx, it must be marked as unowned if 909 * there is zero or one thread only waiting for it. 910 * Otherwise, it must be marked as contested. 911 */ 912 old = casuword32(m, owner, 913 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 914 umtxq_lock(&key); 915 umtxq_signal(&key,1); 916 umtxq_unbusy(&key); 917 umtxq_unlock(&key); 918 umtx_key_release(&key); 919 if (old == -1) 920 return (EFAULT); 921 if (old != owner) 922 return (EINVAL); 923 return (0); 924 } 925 #endif 926 927 /* 928 * Fetch and compare value, sleep on the address if value is not changed. 929 */ 930 static int 931 do_wait(struct thread *td, void *addr, u_long id, 932 struct timespec *timeout, int compat32) 933 { 934 struct umtx_q *uq; 935 struct timespec ts, ts2, ts3; 936 struct timeval tv; 937 u_long tmp; 938 int error = 0; 939 940 uq = td->td_umtxq; 941 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 942 &uq->uq_key)) != 0) 943 return (error); 944 945 umtxq_lock(&uq->uq_key); 946 umtxq_insert(uq); 947 umtxq_unlock(&uq->uq_key); 948 if (compat32 == 0) 949 tmp = fuword(addr); 950 else 951 tmp = fuword32(addr); 952 if (tmp != id) { 953 umtxq_lock(&uq->uq_key); 954 umtxq_remove(uq); 955 umtxq_unlock(&uq->uq_key); 956 } else if (timeout == NULL) { 957 umtxq_lock(&uq->uq_key); 958 error = umtxq_sleep(uq, "uwait", 0); 959 umtxq_remove(uq); 960 umtxq_unlock(&uq->uq_key); 961 } else { 962 getnanouptime(&ts); 963 timespecadd(&ts, timeout); 964 TIMESPEC_TO_TIMEVAL(&tv, timeout); 965 umtxq_lock(&uq->uq_key); 966 for (;;) { 967 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 968 if (!(uq->uq_flags & UQF_UMTXQ)) 969 break; 970 if (error != ETIMEDOUT) 971 break; 972 umtxq_unlock(&uq->uq_key); 973 getnanouptime(&ts2); 974 if (timespeccmp(&ts2, &ts, >=)) { 975 error = ETIMEDOUT; 976 umtxq_lock(&uq->uq_key); 977 break; 978 } 979 ts3 = ts; 980 timespecsub(&ts3, &ts2); 981 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 982 umtxq_lock(&uq->uq_key); 983 } 984 umtxq_remove(uq); 985 umtxq_unlock(&uq->uq_key); 986 } 987 umtx_key_release(&uq->uq_key); 988 if (error == ERESTART) 989 error = EINTR; 990 return (error); 991 } 992 993 /* 994 * Wake up threads sleeping on the specified address. 995 */ 996 int 997 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 998 { 999 struct umtx_key key; 1000 int ret; 1001 1002 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 1003 &key)) != 0) 1004 return (ret); 1005 umtxq_lock(&key); 1006 ret = umtxq_signal(&key, n_wake); 1007 umtxq_unlock(&key); 1008 umtx_key_release(&key); 1009 return (0); 1010 } 1011 1012 /* 1013 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1014 */ 1015 static int 1016 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1017 int try) 1018 { 1019 struct umtx_q *uq; 1020 uint32_t owner, old, id; 1021 int error = 0; 1022 1023 id = td->td_tid; 1024 uq = td->td_umtxq; 1025 1026 /* 1027 * Care must be exercised when dealing with umtx structure. It 1028 * can fault on any access. 1029 */ 1030 for (;;) { 1031 /* 1032 * Try the uncontested case. This should be done in userland. 1033 */ 1034 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1035 1036 /* The acquire succeeded. */ 1037 if (owner == UMUTEX_UNOWNED) 1038 return (0); 1039 1040 /* The address was invalid. */ 1041 if (owner == -1) 1042 return (EFAULT); 1043 1044 /* If no one owns it but it is contested try to acquire it. */ 1045 if (owner == UMUTEX_CONTESTED) { 1046 owner = casuword32(&m->m_owner, 1047 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1048 1049 if (owner == UMUTEX_CONTESTED) 1050 return (0); 1051 1052 /* The address was invalid. */ 1053 if (owner == -1) 1054 return (EFAULT); 1055 1056 /* If this failed the lock has changed, restart. */ 1057 continue; 1058 } 1059 1060 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1061 (owner & ~UMUTEX_CONTESTED) == id) 1062 return (EDEADLK); 1063 1064 if (try != 0) 1065 return (EBUSY); 1066 1067 /* 1068 * If we caught a signal, we have retried and now 1069 * exit immediately. 1070 */ 1071 if (error != 0) 1072 return (error); 1073 1074 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1075 GET_SHARE(flags), &uq->uq_key)) != 0) 1076 return (error); 1077 1078 umtxq_lock(&uq->uq_key); 1079 umtxq_busy(&uq->uq_key); 1080 umtxq_insert(uq); 1081 umtxq_unbusy(&uq->uq_key); 1082 umtxq_unlock(&uq->uq_key); 1083 1084 /* 1085 * Set the contested bit so that a release in user space 1086 * knows to use the system call for unlock. If this fails 1087 * either some one else has acquired the lock or it has been 1088 * released. 1089 */ 1090 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1091 1092 /* The address was invalid. */ 1093 if (old == -1) { 1094 umtxq_lock(&uq->uq_key); 1095 umtxq_remove(uq); 1096 umtxq_unlock(&uq->uq_key); 1097 umtx_key_release(&uq->uq_key); 1098 return (EFAULT); 1099 } 1100 1101 /* 1102 * We set the contested bit, sleep. Otherwise the lock changed 1103 * and we need to retry or we lost a race to the thread 1104 * unlocking the umtx. 1105 */ 1106 umtxq_lock(&uq->uq_key); 1107 if (old == owner) 1108 error = umtxq_sleep(uq, "umtxn", timo); 1109 umtxq_remove(uq); 1110 umtxq_unlock(&uq->uq_key); 1111 umtx_key_release(&uq->uq_key); 1112 } 1113 1114 return (0); 1115 } 1116 1117 /* 1118 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1119 */ 1120 /* 1121 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1122 */ 1123 static int 1124 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1125 { 1126 struct umtx_key key; 1127 uint32_t owner, old, id; 1128 int error; 1129 int count; 1130 1131 id = td->td_tid; 1132 /* 1133 * Make sure we own this mtx. 1134 */ 1135 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1136 if (owner == -1) 1137 return (EFAULT); 1138 1139 if ((owner & ~UMUTEX_CONTESTED) != id) 1140 return (EPERM); 1141 1142 /* This should be done in userland */ 1143 if ((owner & UMUTEX_CONTESTED) == 0) { 1144 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1145 if (old == -1) 1146 return (EFAULT); 1147 if (old == owner) 1148 return (0); 1149 owner = old; 1150 } 1151 1152 /* We should only ever be in here for contested locks */ 1153 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1154 &key)) != 0) 1155 return (error); 1156 1157 umtxq_lock(&key); 1158 umtxq_busy(&key); 1159 count = umtxq_count(&key); 1160 umtxq_unlock(&key); 1161 1162 /* 1163 * When unlocking the umtx, it must be marked as unowned if 1164 * there is zero or one thread only waiting for it. 1165 * Otherwise, it must be marked as contested. 1166 */ 1167 old = casuword32(&m->m_owner, owner, 1168 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1169 umtxq_lock(&key); 1170 umtxq_signal(&key,1); 1171 umtxq_unbusy(&key); 1172 umtxq_unlock(&key); 1173 umtx_key_release(&key); 1174 if (old == -1) 1175 return (EFAULT); 1176 if (old != owner) 1177 return (EINVAL); 1178 return (0); 1179 } 1180 1181 static inline struct umtx_pi * 1182 umtx_pi_alloc(int flags) 1183 { 1184 struct umtx_pi *pi; 1185 1186 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1187 TAILQ_INIT(&pi->pi_blocked); 1188 atomic_add_int(&umtx_pi_allocated, 1); 1189 return (pi); 1190 } 1191 1192 static inline void 1193 umtx_pi_free(struct umtx_pi *pi) 1194 { 1195 uma_zfree(umtx_pi_zone, pi); 1196 atomic_add_int(&umtx_pi_allocated, -1); 1197 } 1198 1199 /* 1200 * Adjust the thread's position on a pi_state after its priority has been 1201 * changed. 1202 */ 1203 static int 1204 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1205 { 1206 struct umtx_q *uq, *uq1, *uq2; 1207 struct thread *td1; 1208 1209 mtx_assert(&umtx_lock, MA_OWNED); 1210 if (pi == NULL) 1211 return (0); 1212 1213 uq = td->td_umtxq; 1214 1215 /* 1216 * Check if the thread needs to be moved on the blocked chain. 1217 * It needs to be moved if either its priority is lower than 1218 * the previous thread or higher than the next thread. 1219 */ 1220 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1221 uq2 = TAILQ_NEXT(uq, uq_lockq); 1222 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1223 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1224 /* 1225 * Remove thread from blocked chain and determine where 1226 * it should be moved to. 1227 */ 1228 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1229 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1230 td1 = uq1->uq_thread; 1231 MPASS(td1->td_proc->p_magic == P_MAGIC); 1232 if (UPRI(td1) > UPRI(td)) 1233 break; 1234 } 1235 1236 if (uq1 == NULL) 1237 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1238 else 1239 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1240 } 1241 return (1); 1242 } 1243 1244 /* 1245 * Propagate priority when a thread is blocked on POSIX 1246 * PI mutex. 1247 */ 1248 static void 1249 umtx_propagate_priority(struct thread *td) 1250 { 1251 struct umtx_q *uq; 1252 struct umtx_pi *pi; 1253 int pri; 1254 1255 mtx_assert(&umtx_lock, MA_OWNED); 1256 pri = UPRI(td); 1257 uq = td->td_umtxq; 1258 pi = uq->uq_pi_blocked; 1259 if (pi == NULL) 1260 return; 1261 1262 for (;;) { 1263 td = pi->pi_owner; 1264 if (td == NULL) 1265 return; 1266 1267 MPASS(td->td_proc != NULL); 1268 MPASS(td->td_proc->p_magic == P_MAGIC); 1269 1270 if (UPRI(td) <= pri) 1271 return; 1272 1273 thread_lock(td); 1274 sched_lend_user_prio(td, pri); 1275 thread_unlock(td); 1276 1277 /* 1278 * Pick up the lock that td is blocked on. 1279 */ 1280 uq = td->td_umtxq; 1281 pi = uq->uq_pi_blocked; 1282 /* Resort td on the list if needed. */ 1283 if (!umtx_pi_adjust_thread(pi, td)) 1284 break; 1285 } 1286 } 1287 1288 /* 1289 * Unpropagate priority for a PI mutex when a thread blocked on 1290 * it is interrupted by signal or resumed by others. 1291 */ 1292 static void 1293 umtx_unpropagate_priority(struct umtx_pi *pi) 1294 { 1295 struct umtx_q *uq, *uq_owner; 1296 struct umtx_pi *pi2; 1297 int pri; 1298 1299 mtx_assert(&umtx_lock, MA_OWNED); 1300 1301 while (pi != NULL && pi->pi_owner != NULL) { 1302 pri = PRI_MAX; 1303 uq_owner = pi->pi_owner->td_umtxq; 1304 1305 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1306 uq = TAILQ_FIRST(&pi2->pi_blocked); 1307 if (uq != NULL) { 1308 if (pri > UPRI(uq->uq_thread)) 1309 pri = UPRI(uq->uq_thread); 1310 } 1311 } 1312 1313 if (pri > uq_owner->uq_inherited_pri) 1314 pri = uq_owner->uq_inherited_pri; 1315 thread_lock(pi->pi_owner); 1316 sched_unlend_user_prio(pi->pi_owner, pri); 1317 thread_unlock(pi->pi_owner); 1318 pi = uq_owner->uq_pi_blocked; 1319 } 1320 } 1321 1322 /* 1323 * Insert a PI mutex into owned list. 1324 */ 1325 static void 1326 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1327 { 1328 struct umtx_q *uq_owner; 1329 1330 uq_owner = owner->td_umtxq; 1331 mtx_assert(&umtx_lock, MA_OWNED); 1332 if (pi->pi_owner != NULL) 1333 panic("pi_ower != NULL"); 1334 pi->pi_owner = owner; 1335 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1336 } 1337 1338 /* 1339 * Claim ownership of a PI mutex. 1340 */ 1341 static int 1342 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1343 { 1344 struct umtx_q *uq, *uq_owner; 1345 1346 uq_owner = owner->td_umtxq; 1347 mtx_lock_spin(&umtx_lock); 1348 if (pi->pi_owner == owner) { 1349 mtx_unlock_spin(&umtx_lock); 1350 return (0); 1351 } 1352 1353 if (pi->pi_owner != NULL) { 1354 /* 1355 * userland may have already messed the mutex, sigh. 1356 */ 1357 mtx_unlock_spin(&umtx_lock); 1358 return (EPERM); 1359 } 1360 umtx_pi_setowner(pi, owner); 1361 uq = TAILQ_FIRST(&pi->pi_blocked); 1362 if (uq != NULL) { 1363 int pri; 1364 1365 pri = UPRI(uq->uq_thread); 1366 thread_lock(owner); 1367 if (pri < UPRI(owner)) 1368 sched_lend_user_prio(owner, pri); 1369 thread_unlock(owner); 1370 } 1371 mtx_unlock_spin(&umtx_lock); 1372 return (0); 1373 } 1374 1375 /* 1376 * Adjust a thread's order position in its blocked PI mutex, 1377 * this may result new priority propagating process. 1378 */ 1379 void 1380 umtx_pi_adjust(struct thread *td, u_char oldpri) 1381 { 1382 struct umtx_q *uq; 1383 struct umtx_pi *pi; 1384 1385 uq = td->td_umtxq; 1386 1387 mtx_assert(&umtx_lock, MA_OWNED); 1388 MPASS(TD_ON_UPILOCK(td)); 1389 1390 /* 1391 * Pick up the lock that td is blocked on. 1392 */ 1393 pi = uq->uq_pi_blocked; 1394 MPASS(pi != NULL); 1395 1396 /* Resort the turnstile on the list. */ 1397 if (!umtx_pi_adjust_thread(pi, td)) 1398 return; 1399 1400 /* 1401 * If our priority was lowered and we are at the head of the 1402 * turnstile, then propagate our new priority up the chain. 1403 */ 1404 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1405 umtx_propagate_priority(td); 1406 } 1407 1408 /* 1409 * Sleep on a PI mutex. 1410 */ 1411 static int 1412 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1413 uint32_t owner, const char *wmesg, int timo) 1414 { 1415 struct umtxq_chain *uc; 1416 struct thread *td, *td1; 1417 struct umtx_q *uq1; 1418 int pri; 1419 int error = 0; 1420 1421 td = uq->uq_thread; 1422 KASSERT(td == curthread, ("inconsistent uq_thread")); 1423 uc = umtxq_getchain(&uq->uq_key); 1424 UMTXQ_LOCKED_ASSERT(uc); 1425 umtxq_insert(uq); 1426 if (pi->pi_owner == NULL) { 1427 /* XXX 1428 * Current, We only support process private PI-mutex, 1429 * non-contended PI-mutexes are locked in userland. 1430 * Process shared PI-mutex should always be initialized 1431 * by kernel and be registered in kernel, locking should 1432 * always be done by kernel to avoid security problems. 1433 * For process private PI-mutex, we can find owner 1434 * thread and boost its priority safely. 1435 */ 1436 PROC_LOCK(curproc); 1437 td1 = thread_find(curproc, owner); 1438 mtx_lock_spin(&umtx_lock); 1439 if (td1 != NULL && pi->pi_owner == NULL) { 1440 uq1 = td1->td_umtxq; 1441 umtx_pi_setowner(pi, td1); 1442 } 1443 PROC_UNLOCK(curproc); 1444 } else { 1445 mtx_lock_spin(&umtx_lock); 1446 } 1447 1448 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1449 pri = UPRI(uq1->uq_thread); 1450 if (pri > UPRI(td)) 1451 break; 1452 } 1453 1454 if (uq1 != NULL) 1455 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1456 else 1457 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1458 1459 uq->uq_pi_blocked = pi; 1460 td->td_flags |= TDF_UPIBLOCKED; 1461 mtx_unlock_spin(&umtx_lock); 1462 umtxq_unlock(&uq->uq_key); 1463 1464 mtx_lock_spin(&umtx_lock); 1465 umtx_propagate_priority(td); 1466 mtx_unlock_spin(&umtx_lock); 1467 1468 umtxq_lock(&uq->uq_key); 1469 if (uq->uq_flags & UQF_UMTXQ) { 1470 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1471 if (error == EWOULDBLOCK) 1472 error = ETIMEDOUT; 1473 if (uq->uq_flags & UQF_UMTXQ) { 1474 umtxq_busy(&uq->uq_key); 1475 umtxq_remove(uq); 1476 umtxq_unbusy(&uq->uq_key); 1477 } 1478 } 1479 umtxq_unlock(&uq->uq_key); 1480 1481 mtx_lock_spin(&umtx_lock); 1482 uq->uq_pi_blocked = NULL; 1483 td->td_flags &= ~TDF_UPIBLOCKED; 1484 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1485 umtx_unpropagate_priority(pi); 1486 mtx_unlock_spin(&umtx_lock); 1487 1488 umtxq_lock(&uq->uq_key); 1489 1490 return (error); 1491 } 1492 1493 /* 1494 * Add reference count for a PI mutex. 1495 */ 1496 static void 1497 umtx_pi_ref(struct umtx_pi *pi) 1498 { 1499 struct umtxq_chain *uc; 1500 1501 uc = umtxq_getchain(&pi->pi_key); 1502 UMTXQ_LOCKED_ASSERT(uc); 1503 pi->pi_refcount++; 1504 } 1505 1506 /* 1507 * Decrease reference count for a PI mutex, if the counter 1508 * is decreased to zero, its memory space is freed. 1509 */ 1510 static void 1511 umtx_pi_unref(struct umtx_pi *pi) 1512 { 1513 struct umtxq_chain *uc; 1514 int free = 0; 1515 1516 uc = umtxq_getchain(&pi->pi_key); 1517 UMTXQ_LOCKED_ASSERT(uc); 1518 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1519 if (--pi->pi_refcount == 0) { 1520 mtx_lock_spin(&umtx_lock); 1521 if (pi->pi_owner != NULL) { 1522 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1523 pi, pi_link); 1524 pi->pi_owner = NULL; 1525 } 1526 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1527 ("blocked queue not empty")); 1528 mtx_unlock_spin(&umtx_lock); 1529 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1530 free = 1; 1531 } 1532 if (free) 1533 umtx_pi_free(pi); 1534 } 1535 1536 /* 1537 * Find a PI mutex in hash table. 1538 */ 1539 static struct umtx_pi * 1540 umtx_pi_lookup(struct umtx_key *key) 1541 { 1542 struct umtxq_chain *uc; 1543 struct umtx_pi *pi; 1544 1545 uc = umtxq_getchain(key); 1546 UMTXQ_LOCKED_ASSERT(uc); 1547 1548 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1549 if (umtx_key_match(&pi->pi_key, key)) { 1550 return (pi); 1551 } 1552 } 1553 return (NULL); 1554 } 1555 1556 /* 1557 * Insert a PI mutex into hash table. 1558 */ 1559 static inline void 1560 umtx_pi_insert(struct umtx_pi *pi) 1561 { 1562 struct umtxq_chain *uc; 1563 1564 uc = umtxq_getchain(&pi->pi_key); 1565 UMTXQ_LOCKED_ASSERT(uc); 1566 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1567 } 1568 1569 /* 1570 * Lock a PI mutex. 1571 */ 1572 static int 1573 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1574 int try) 1575 { 1576 struct umtx_q *uq; 1577 struct umtx_pi *pi, *new_pi; 1578 uint32_t id, owner, old; 1579 int error; 1580 1581 id = td->td_tid; 1582 uq = td->td_umtxq; 1583 1584 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1585 &uq->uq_key)) != 0) 1586 return (error); 1587 umtxq_lock(&uq->uq_key); 1588 pi = umtx_pi_lookup(&uq->uq_key); 1589 if (pi == NULL) { 1590 new_pi = umtx_pi_alloc(M_NOWAIT); 1591 if (new_pi == NULL) { 1592 umtxq_unlock(&uq->uq_key); 1593 new_pi = umtx_pi_alloc(M_WAITOK); 1594 new_pi->pi_key = uq->uq_key; 1595 umtxq_lock(&uq->uq_key); 1596 pi = umtx_pi_lookup(&uq->uq_key); 1597 if (pi != NULL) { 1598 umtx_pi_free(new_pi); 1599 new_pi = NULL; 1600 } 1601 } 1602 if (new_pi != NULL) { 1603 new_pi->pi_key = uq->uq_key; 1604 umtx_pi_insert(new_pi); 1605 pi = new_pi; 1606 } 1607 } 1608 umtx_pi_ref(pi); 1609 umtxq_unlock(&uq->uq_key); 1610 1611 /* 1612 * Care must be exercised when dealing with umtx structure. It 1613 * can fault on any access. 1614 */ 1615 for (;;) { 1616 /* 1617 * Try the uncontested case. This should be done in userland. 1618 */ 1619 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1620 1621 /* The acquire succeeded. */ 1622 if (owner == UMUTEX_UNOWNED) { 1623 error = 0; 1624 break; 1625 } 1626 1627 /* The address was invalid. */ 1628 if (owner == -1) { 1629 error = EFAULT; 1630 break; 1631 } 1632 1633 /* If no one owns it but it is contested try to acquire it. */ 1634 if (owner == UMUTEX_CONTESTED) { 1635 owner = casuword32(&m->m_owner, 1636 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1637 1638 if (owner == UMUTEX_CONTESTED) { 1639 umtxq_lock(&uq->uq_key); 1640 error = umtx_pi_claim(pi, td); 1641 umtxq_unlock(&uq->uq_key); 1642 break; 1643 } 1644 1645 /* The address was invalid. */ 1646 if (owner == -1) { 1647 error = EFAULT; 1648 break; 1649 } 1650 1651 /* If this failed the lock has changed, restart. */ 1652 continue; 1653 } 1654 1655 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1656 (owner & ~UMUTEX_CONTESTED) == id) { 1657 error = EDEADLK; 1658 break; 1659 } 1660 1661 if (try != 0) { 1662 error = EBUSY; 1663 break; 1664 } 1665 1666 /* 1667 * If we caught a signal, we have retried and now 1668 * exit immediately. 1669 */ 1670 if (error != 0) 1671 break; 1672 1673 umtxq_lock(&uq->uq_key); 1674 umtxq_busy(&uq->uq_key); 1675 umtxq_unlock(&uq->uq_key); 1676 1677 /* 1678 * Set the contested bit so that a release in user space 1679 * knows to use the system call for unlock. If this fails 1680 * either some one else has acquired the lock or it has been 1681 * released. 1682 */ 1683 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1684 1685 /* The address was invalid. */ 1686 if (old == -1) { 1687 umtxq_lock(&uq->uq_key); 1688 umtxq_unbusy(&uq->uq_key); 1689 umtxq_unlock(&uq->uq_key); 1690 error = EFAULT; 1691 break; 1692 } 1693 1694 umtxq_lock(&uq->uq_key); 1695 umtxq_unbusy(&uq->uq_key); 1696 /* 1697 * We set the contested bit, sleep. Otherwise the lock changed 1698 * and we need to retry or we lost a race to the thread 1699 * unlocking the umtx. 1700 */ 1701 if (old == owner) 1702 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1703 "umtxpi", timo); 1704 umtxq_unlock(&uq->uq_key); 1705 } 1706 1707 umtxq_lock(&uq->uq_key); 1708 umtx_pi_unref(pi); 1709 umtxq_unlock(&uq->uq_key); 1710 1711 umtx_key_release(&uq->uq_key); 1712 return (error); 1713 } 1714 1715 /* 1716 * Unlock a PI mutex. 1717 */ 1718 static int 1719 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1720 { 1721 struct umtx_key key; 1722 struct umtx_q *uq_first, *uq_first2, *uq_me; 1723 struct umtx_pi *pi, *pi2; 1724 uint32_t owner, old, id; 1725 int error; 1726 int count; 1727 int pri; 1728 1729 id = td->td_tid; 1730 /* 1731 * Make sure we own this mtx. 1732 */ 1733 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1734 if (owner == -1) 1735 return (EFAULT); 1736 1737 if ((owner & ~UMUTEX_CONTESTED) != id) 1738 return (EPERM); 1739 1740 /* This should be done in userland */ 1741 if ((owner & UMUTEX_CONTESTED) == 0) { 1742 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1743 if (old == -1) 1744 return (EFAULT); 1745 if (old == owner) 1746 return (0); 1747 owner = old; 1748 } 1749 1750 /* We should only ever be in here for contested locks */ 1751 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1752 &key)) != 0) 1753 return (error); 1754 1755 umtxq_lock(&key); 1756 umtxq_busy(&key); 1757 count = umtxq_count_pi(&key, &uq_first); 1758 if (uq_first != NULL) { 1759 pi = uq_first->uq_pi_blocked; 1760 if (pi->pi_owner != curthread) { 1761 umtxq_unbusy(&key); 1762 umtxq_unlock(&key); 1763 /* userland messed the mutex */ 1764 return (EPERM); 1765 } 1766 uq_me = curthread->td_umtxq; 1767 mtx_lock_spin(&umtx_lock); 1768 pi->pi_owner = NULL; 1769 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1770 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1771 pri = PRI_MAX; 1772 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1773 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1774 if (uq_first2 != NULL) { 1775 if (pri > UPRI(uq_first2->uq_thread)) 1776 pri = UPRI(uq_first2->uq_thread); 1777 } 1778 } 1779 thread_lock(curthread); 1780 sched_unlend_user_prio(curthread, pri); 1781 thread_unlock(curthread); 1782 mtx_unlock_spin(&umtx_lock); 1783 } 1784 umtxq_unlock(&key); 1785 1786 /* 1787 * When unlocking the umtx, it must be marked as unowned if 1788 * there is zero or one thread only waiting for it. 1789 * Otherwise, it must be marked as contested. 1790 */ 1791 old = casuword32(&m->m_owner, owner, 1792 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1793 1794 umtxq_lock(&key); 1795 if (uq_first != NULL) 1796 umtxq_signal_thread(uq_first); 1797 umtxq_unbusy(&key); 1798 umtxq_unlock(&key); 1799 umtx_key_release(&key); 1800 if (old == -1) 1801 return (EFAULT); 1802 if (old != owner) 1803 return (EINVAL); 1804 return (0); 1805 } 1806 1807 /* 1808 * Lock a PP mutex. 1809 */ 1810 static int 1811 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1812 int try) 1813 { 1814 struct umtx_q *uq, *uq2; 1815 struct umtx_pi *pi; 1816 uint32_t ceiling; 1817 uint32_t owner, id; 1818 int error, pri, old_inherited_pri, su; 1819 1820 id = td->td_tid; 1821 uq = td->td_umtxq; 1822 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1823 &uq->uq_key)) != 0) 1824 return (error); 1825 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1826 for (;;) { 1827 old_inherited_pri = uq->uq_inherited_pri; 1828 umtxq_lock(&uq->uq_key); 1829 umtxq_busy(&uq->uq_key); 1830 umtxq_unlock(&uq->uq_key); 1831 1832 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1833 if (ceiling > RTP_PRIO_MAX) { 1834 error = EINVAL; 1835 goto out; 1836 } 1837 1838 mtx_lock_spin(&umtx_lock); 1839 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1840 mtx_unlock_spin(&umtx_lock); 1841 error = EINVAL; 1842 goto out; 1843 } 1844 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1845 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1846 thread_lock(td); 1847 if (uq->uq_inherited_pri < UPRI(td)) 1848 sched_lend_user_prio(td, uq->uq_inherited_pri); 1849 thread_unlock(td); 1850 } 1851 mtx_unlock_spin(&umtx_lock); 1852 1853 owner = casuword32(&m->m_owner, 1854 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1855 1856 if (owner == UMUTEX_CONTESTED) { 1857 error = 0; 1858 break; 1859 } 1860 1861 /* The address was invalid. */ 1862 if (owner == -1) { 1863 error = EFAULT; 1864 break; 1865 } 1866 1867 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1868 (owner & ~UMUTEX_CONTESTED) == id) { 1869 error = EDEADLK; 1870 break; 1871 } 1872 1873 if (try != 0) { 1874 error = EBUSY; 1875 break; 1876 } 1877 1878 /* 1879 * If we caught a signal, we have retried and now 1880 * exit immediately. 1881 */ 1882 if (error != 0) 1883 break; 1884 1885 umtxq_lock(&uq->uq_key); 1886 umtxq_insert(uq); 1887 umtxq_unbusy(&uq->uq_key); 1888 error = umtxq_sleep(uq, "umtxpp", timo); 1889 umtxq_remove(uq); 1890 umtxq_unlock(&uq->uq_key); 1891 1892 mtx_lock_spin(&umtx_lock); 1893 uq->uq_inherited_pri = old_inherited_pri; 1894 pri = PRI_MAX; 1895 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1896 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1897 if (uq2 != NULL) { 1898 if (pri > UPRI(uq2->uq_thread)) 1899 pri = UPRI(uq2->uq_thread); 1900 } 1901 } 1902 if (pri > uq->uq_inherited_pri) 1903 pri = uq->uq_inherited_pri; 1904 thread_lock(td); 1905 sched_unlend_user_prio(td, pri); 1906 thread_unlock(td); 1907 mtx_unlock_spin(&umtx_lock); 1908 } 1909 1910 if (error != 0) { 1911 mtx_lock_spin(&umtx_lock); 1912 uq->uq_inherited_pri = old_inherited_pri; 1913 pri = PRI_MAX; 1914 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1915 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1916 if (uq2 != NULL) { 1917 if (pri > UPRI(uq2->uq_thread)) 1918 pri = UPRI(uq2->uq_thread); 1919 } 1920 } 1921 if (pri > uq->uq_inherited_pri) 1922 pri = uq->uq_inherited_pri; 1923 thread_lock(td); 1924 sched_unlend_user_prio(td, pri); 1925 thread_unlock(td); 1926 mtx_unlock_spin(&umtx_lock); 1927 } 1928 1929 out: 1930 umtxq_lock(&uq->uq_key); 1931 umtxq_unbusy(&uq->uq_key); 1932 umtxq_unlock(&uq->uq_key); 1933 umtx_key_release(&uq->uq_key); 1934 return (error); 1935 } 1936 1937 /* 1938 * Unlock a PP mutex. 1939 */ 1940 static int 1941 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1942 { 1943 struct umtx_key key; 1944 struct umtx_q *uq, *uq2; 1945 struct umtx_pi *pi; 1946 uint32_t owner, id; 1947 uint32_t rceiling; 1948 int error, pri, new_inherited_pri, su; 1949 1950 id = td->td_tid; 1951 uq = td->td_umtxq; 1952 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1953 1954 /* 1955 * Make sure we own this mtx. 1956 */ 1957 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1958 if (owner == -1) 1959 return (EFAULT); 1960 1961 if ((owner & ~UMUTEX_CONTESTED) != id) 1962 return (EPERM); 1963 1964 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 1965 if (error != 0) 1966 return (error); 1967 1968 if (rceiling == -1) 1969 new_inherited_pri = PRI_MAX; 1970 else { 1971 rceiling = RTP_PRIO_MAX - rceiling; 1972 if (rceiling > RTP_PRIO_MAX) 1973 return (EINVAL); 1974 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 1975 } 1976 1977 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1978 &key)) != 0) 1979 return (error); 1980 umtxq_lock(&key); 1981 umtxq_busy(&key); 1982 umtxq_unlock(&key); 1983 /* 1984 * For priority protected mutex, always set unlocked state 1985 * to UMUTEX_CONTESTED, so that userland always enters kernel 1986 * to lock the mutex, it is necessary because thread priority 1987 * has to be adjusted for such mutex. 1988 */ 1989 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 1990 UMUTEX_CONTESTED); 1991 1992 umtxq_lock(&key); 1993 if (error == 0) 1994 umtxq_signal(&key, 1); 1995 umtxq_unbusy(&key); 1996 umtxq_unlock(&key); 1997 1998 if (error == -1) 1999 error = EFAULT; 2000 else { 2001 mtx_lock_spin(&umtx_lock); 2002 if (su != 0) 2003 uq->uq_inherited_pri = new_inherited_pri; 2004 pri = PRI_MAX; 2005 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2006 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2007 if (uq2 != NULL) { 2008 if (pri > UPRI(uq2->uq_thread)) 2009 pri = UPRI(uq2->uq_thread); 2010 } 2011 } 2012 if (pri > uq->uq_inherited_pri) 2013 pri = uq->uq_inherited_pri; 2014 thread_lock(td); 2015 sched_unlend_user_prio(td, pri); 2016 thread_unlock(td); 2017 mtx_unlock_spin(&umtx_lock); 2018 } 2019 umtx_key_release(&key); 2020 return (error); 2021 } 2022 2023 static int 2024 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2025 uint32_t *old_ceiling) 2026 { 2027 struct umtx_q *uq; 2028 uint32_t save_ceiling; 2029 uint32_t owner, id; 2030 uint32_t flags; 2031 int error; 2032 2033 flags = fuword32(&m->m_flags); 2034 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2035 return (EINVAL); 2036 if (ceiling > RTP_PRIO_MAX) 2037 return (EINVAL); 2038 id = td->td_tid; 2039 uq = td->td_umtxq; 2040 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2041 &uq->uq_key)) != 0) 2042 return (error); 2043 for (;;) { 2044 umtxq_lock(&uq->uq_key); 2045 umtxq_busy(&uq->uq_key); 2046 umtxq_unlock(&uq->uq_key); 2047 2048 save_ceiling = fuword32(&m->m_ceilings[0]); 2049 2050 owner = casuword32(&m->m_owner, 2051 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2052 2053 if (owner == UMUTEX_CONTESTED) { 2054 suword32(&m->m_ceilings[0], ceiling); 2055 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2056 UMUTEX_CONTESTED); 2057 error = 0; 2058 break; 2059 } 2060 2061 /* The address was invalid. */ 2062 if (owner == -1) { 2063 error = EFAULT; 2064 break; 2065 } 2066 2067 if ((owner & ~UMUTEX_CONTESTED) == id) { 2068 suword32(&m->m_ceilings[0], ceiling); 2069 error = 0; 2070 break; 2071 } 2072 2073 /* 2074 * If we caught a signal, we have retried and now 2075 * exit immediately. 2076 */ 2077 if (error != 0) 2078 break; 2079 2080 /* 2081 * We set the contested bit, sleep. Otherwise the lock changed 2082 * and we need to retry or we lost a race to the thread 2083 * unlocking the umtx. 2084 */ 2085 umtxq_lock(&uq->uq_key); 2086 umtxq_insert(uq); 2087 umtxq_unbusy(&uq->uq_key); 2088 error = umtxq_sleep(uq, "umtxpp", 0); 2089 umtxq_remove(uq); 2090 umtxq_unlock(&uq->uq_key); 2091 } 2092 umtxq_lock(&uq->uq_key); 2093 if (error == 0) 2094 umtxq_signal(&uq->uq_key, INT_MAX); 2095 umtxq_unbusy(&uq->uq_key); 2096 umtxq_unlock(&uq->uq_key); 2097 umtx_key_release(&uq->uq_key); 2098 if (error == 0 && old_ceiling != NULL) 2099 suword32(old_ceiling, save_ceiling); 2100 return (error); 2101 } 2102 2103 static int 2104 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2105 int try) 2106 { 2107 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2108 case 0: 2109 return (_do_lock_normal(td, m, flags, timo, try)); 2110 case UMUTEX_PRIO_INHERIT: 2111 return (_do_lock_pi(td, m, flags, timo, try)); 2112 case UMUTEX_PRIO_PROTECT: 2113 return (_do_lock_pp(td, m, flags, timo, try)); 2114 } 2115 return (EINVAL); 2116 } 2117 2118 /* 2119 * Lock a userland POSIX mutex. 2120 */ 2121 static int 2122 do_lock_umutex(struct thread *td, struct umutex *m, 2123 struct timespec *timeout, int try) 2124 { 2125 struct timespec ts, ts2, ts3; 2126 struct timeval tv; 2127 uint32_t flags; 2128 int error; 2129 2130 flags = fuword32(&m->m_flags); 2131 if (flags == -1) 2132 return (EFAULT); 2133 2134 if (timeout == NULL) { 2135 error = _do_lock_umutex(td, m, flags, 0, try); 2136 /* Mutex locking is restarted if it is interrupted. */ 2137 if (error == EINTR) 2138 error = ERESTART; 2139 } else { 2140 getnanouptime(&ts); 2141 timespecadd(&ts, timeout); 2142 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2143 for (;;) { 2144 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 2145 if (error != ETIMEDOUT) 2146 break; 2147 getnanouptime(&ts2); 2148 if (timespeccmp(&ts2, &ts, >=)) { 2149 error = ETIMEDOUT; 2150 break; 2151 } 2152 ts3 = ts; 2153 timespecsub(&ts3, &ts2); 2154 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2155 } 2156 /* Timed-locking is not restarted. */ 2157 if (error == ERESTART) 2158 error = EINTR; 2159 } 2160 return (error); 2161 } 2162 2163 /* 2164 * Unlock a userland POSIX mutex. 2165 */ 2166 static int 2167 do_unlock_umutex(struct thread *td, struct umutex *m) 2168 { 2169 uint32_t flags; 2170 2171 flags = fuword32(&m->m_flags); 2172 if (flags == -1) 2173 return (EFAULT); 2174 2175 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2176 case 0: 2177 return (do_unlock_normal(td, m, flags)); 2178 case UMUTEX_PRIO_INHERIT: 2179 return (do_unlock_pi(td, m, flags)); 2180 case UMUTEX_PRIO_PROTECT: 2181 return (do_unlock_pp(td, m, flags)); 2182 } 2183 2184 return (EINVAL); 2185 } 2186 2187 static int 2188 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2189 struct timespec *timeout, u_long wflags) 2190 { 2191 struct umtx_q *uq; 2192 struct timeval tv; 2193 struct timespec cts, ets, tts; 2194 uint32_t flags; 2195 int error; 2196 2197 uq = td->td_umtxq; 2198 flags = fuword32(&cv->c_flags); 2199 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2200 if (error != 0) 2201 return (error); 2202 umtxq_lock(&uq->uq_key); 2203 umtxq_busy(&uq->uq_key); 2204 umtxq_insert(uq); 2205 umtxq_unlock(&uq->uq_key); 2206 2207 /* 2208 * The magic thing is we should set c_has_waiters to 1 before 2209 * releasing user mutex. 2210 */ 2211 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2212 2213 umtxq_lock(&uq->uq_key); 2214 umtxq_unbusy(&uq->uq_key); 2215 umtxq_unlock(&uq->uq_key); 2216 2217 error = do_unlock_umutex(td, m); 2218 2219 umtxq_lock(&uq->uq_key); 2220 if (error == 0) { 2221 if ((wflags & UMTX_CHECK_UNPARKING) && 2222 (td->td_pflags & TDP_WAKEUP)) { 2223 td->td_pflags &= ~TDP_WAKEUP; 2224 error = EINTR; 2225 } else if (timeout == NULL) { 2226 error = umtxq_sleep(uq, "ucond", 0); 2227 } else { 2228 getnanouptime(&ets); 2229 timespecadd(&ets, timeout); 2230 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2231 for (;;) { 2232 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2233 if (error != ETIMEDOUT) 2234 break; 2235 getnanouptime(&cts); 2236 if (timespeccmp(&cts, &ets, >=)) { 2237 error = ETIMEDOUT; 2238 break; 2239 } 2240 tts = ets; 2241 timespecsub(&tts, &cts); 2242 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2243 } 2244 } 2245 } 2246 2247 if (error != 0) { 2248 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2249 /* 2250 * If we concurrently got do_cv_signal()d 2251 * and we got an error or UNIX signals or a timeout, 2252 * then, perform another umtxq_signal to avoid 2253 * consuming the wakeup. This may cause supurious 2254 * wakeup for another thread which was just queued, 2255 * but SUSV3 explicitly allows supurious wakeup to 2256 * occur, and indeed a kernel based implementation 2257 * can not avoid it. 2258 */ 2259 if (!umtxq_signal(&uq->uq_key, 1)) 2260 error = 0; 2261 } 2262 if (error == ERESTART) 2263 error = EINTR; 2264 } 2265 umtxq_remove(uq); 2266 umtxq_unlock(&uq->uq_key); 2267 umtx_key_release(&uq->uq_key); 2268 return (error); 2269 } 2270 2271 /* 2272 * Signal a userland condition variable. 2273 */ 2274 static int 2275 do_cv_signal(struct thread *td, struct ucond *cv) 2276 { 2277 struct umtx_key key; 2278 int error, cnt, nwake; 2279 uint32_t flags; 2280 2281 flags = fuword32(&cv->c_flags); 2282 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2283 return (error); 2284 umtxq_lock(&key); 2285 umtxq_busy(&key); 2286 cnt = umtxq_count(&key); 2287 nwake = umtxq_signal(&key, 1); 2288 if (cnt <= nwake) { 2289 umtxq_unlock(&key); 2290 error = suword32( 2291 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2292 umtxq_lock(&key); 2293 } 2294 umtxq_unbusy(&key); 2295 umtxq_unlock(&key); 2296 umtx_key_release(&key); 2297 return (error); 2298 } 2299 2300 static int 2301 do_cv_broadcast(struct thread *td, struct ucond *cv) 2302 { 2303 struct umtx_key key; 2304 int error; 2305 uint32_t flags; 2306 2307 flags = fuword32(&cv->c_flags); 2308 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2309 return (error); 2310 2311 umtxq_lock(&key); 2312 umtxq_busy(&key); 2313 umtxq_signal(&key, INT_MAX); 2314 umtxq_unlock(&key); 2315 2316 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2317 2318 umtxq_lock(&key); 2319 umtxq_unbusy(&key); 2320 umtxq_unlock(&key); 2321 2322 umtx_key_release(&key); 2323 return (error); 2324 } 2325 2326 int 2327 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2328 /* struct umtx *umtx */ 2329 { 2330 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2331 } 2332 2333 int 2334 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2335 /* struct umtx *umtx */ 2336 { 2337 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2338 } 2339 2340 static int 2341 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2342 { 2343 struct timespec *ts, timeout; 2344 int error; 2345 2346 /* Allow a null timespec (wait forever). */ 2347 if (uap->uaddr2 == NULL) 2348 ts = NULL; 2349 else { 2350 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2351 if (error != 0) 2352 return (error); 2353 if (timeout.tv_nsec >= 1000000000 || 2354 timeout.tv_nsec < 0) { 2355 return (EINVAL); 2356 } 2357 ts = &timeout; 2358 } 2359 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2360 } 2361 2362 static int 2363 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2364 { 2365 return (do_unlock_umtx(td, uap->obj, uap->val)); 2366 } 2367 2368 static int 2369 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2370 { 2371 struct timespec *ts, timeout; 2372 int error; 2373 2374 if (uap->uaddr2 == NULL) 2375 ts = NULL; 2376 else { 2377 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2378 if (error != 0) 2379 return (error); 2380 if (timeout.tv_nsec >= 1000000000 || 2381 timeout.tv_nsec < 0) 2382 return (EINVAL); 2383 ts = &timeout; 2384 } 2385 return do_wait(td, uap->obj, uap->val, ts, 0); 2386 } 2387 2388 static int 2389 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 2390 { 2391 struct timespec *ts, timeout; 2392 int error; 2393 2394 if (uap->uaddr2 == NULL) 2395 ts = NULL; 2396 else { 2397 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2398 if (error != 0) 2399 return (error); 2400 if (timeout.tv_nsec >= 1000000000 || 2401 timeout.tv_nsec < 0) 2402 return (EINVAL); 2403 ts = &timeout; 2404 } 2405 return do_wait(td, uap->obj, uap->val, ts, 1); 2406 } 2407 2408 static int 2409 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2410 { 2411 return (kern_umtx_wake(td, uap->obj, uap->val)); 2412 } 2413 2414 static int 2415 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2416 { 2417 struct timespec *ts, timeout; 2418 int error; 2419 2420 /* Allow a null timespec (wait forever). */ 2421 if (uap->uaddr2 == NULL) 2422 ts = NULL; 2423 else { 2424 error = copyin(uap->uaddr2, &timeout, 2425 sizeof(timeout)); 2426 if (error != 0) 2427 return (error); 2428 if (timeout.tv_nsec >= 1000000000 || 2429 timeout.tv_nsec < 0) { 2430 return (EINVAL); 2431 } 2432 ts = &timeout; 2433 } 2434 return do_lock_umutex(td, uap->obj, ts, 0); 2435 } 2436 2437 static int 2438 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2439 { 2440 return do_lock_umutex(td, uap->obj, NULL, 1); 2441 } 2442 2443 static int 2444 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2445 { 2446 return do_unlock_umutex(td, uap->obj); 2447 } 2448 2449 static int 2450 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2451 { 2452 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2453 } 2454 2455 static int 2456 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2457 { 2458 struct timespec *ts, timeout; 2459 int error; 2460 2461 /* Allow a null timespec (wait forever). */ 2462 if (uap->uaddr2 == NULL) 2463 ts = NULL; 2464 else { 2465 error = copyin(uap->uaddr2, &timeout, 2466 sizeof(timeout)); 2467 if (error != 0) 2468 return (error); 2469 if (timeout.tv_nsec >= 1000000000 || 2470 timeout.tv_nsec < 0) { 2471 return (EINVAL); 2472 } 2473 ts = &timeout; 2474 } 2475 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2476 } 2477 2478 static int 2479 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2480 { 2481 return do_cv_signal(td, uap->obj); 2482 } 2483 2484 static int 2485 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2486 { 2487 return do_cv_broadcast(td, uap->obj); 2488 } 2489 2490 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 2491 2492 static _umtx_op_func op_table[] = { 2493 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 2494 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 2495 __umtx_op_wait, /* UMTX_OP_WAIT */ 2496 __umtx_op_wake, /* UMTX_OP_WAKE */ 2497 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 2498 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2499 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2500 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2501 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 2502 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2503 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 2504 __umtx_op_wait_uint /* UMTX_OP_WAIT_UINT */ 2505 }; 2506 2507 int 2508 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 2509 { 2510 if ((unsigned)uap->op < UMTX_OP_MAX) 2511 return (*op_table[uap->op])(td, uap); 2512 return (EINVAL); 2513 } 2514 2515 #ifdef COMPAT_IA32 2516 int 2517 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 2518 /* struct umtx *umtx */ 2519 { 2520 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 2521 } 2522 2523 int 2524 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 2525 /* struct umtx *umtx */ 2526 { 2527 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 2528 } 2529 2530 struct timespec32 { 2531 u_int32_t tv_sec; 2532 u_int32_t tv_nsec; 2533 }; 2534 2535 static inline int 2536 copyin_timeout32(void *addr, struct timespec *tsp) 2537 { 2538 struct timespec32 ts32; 2539 int error; 2540 2541 error = copyin(addr, &ts32, sizeof(struct timespec32)); 2542 if (error == 0) { 2543 tsp->tv_sec = ts32.tv_sec; 2544 tsp->tv_nsec = ts32.tv_nsec; 2545 } 2546 return (error); 2547 } 2548 2549 static int 2550 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2551 { 2552 struct timespec *ts, timeout; 2553 int error; 2554 2555 /* Allow a null timespec (wait forever). */ 2556 if (uap->uaddr2 == NULL) 2557 ts = NULL; 2558 else { 2559 error = copyin_timeout32(uap->uaddr2, &timeout); 2560 if (error != 0) 2561 return (error); 2562 if (timeout.tv_nsec >= 1000000000 || 2563 timeout.tv_nsec < 0) { 2564 return (EINVAL); 2565 } 2566 ts = &timeout; 2567 } 2568 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 2569 } 2570 2571 static int 2572 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2573 { 2574 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 2575 } 2576 2577 static int 2578 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2579 { 2580 struct timespec *ts, timeout; 2581 int error; 2582 2583 if (uap->uaddr2 == NULL) 2584 ts = NULL; 2585 else { 2586 error = copyin_timeout32(uap->uaddr2, &timeout); 2587 if (error != 0) 2588 return (error); 2589 if (timeout.tv_nsec >= 1000000000 || 2590 timeout.tv_nsec < 0) 2591 return (EINVAL); 2592 ts = &timeout; 2593 } 2594 return do_wait(td, uap->obj, uap->val, ts, 1); 2595 } 2596 2597 static int 2598 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 2599 { 2600 struct timespec *ts, timeout; 2601 int error; 2602 2603 /* Allow a null timespec (wait forever). */ 2604 if (uap->uaddr2 == NULL) 2605 ts = NULL; 2606 else { 2607 error = copyin_timeout32(uap->uaddr2, &timeout); 2608 if (error != 0) 2609 return (error); 2610 if (timeout.tv_nsec >= 1000000000 || 2611 timeout.tv_nsec < 0) 2612 return (EINVAL); 2613 ts = &timeout; 2614 } 2615 return do_lock_umutex(td, uap->obj, ts, 0); 2616 } 2617 2618 static int 2619 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2620 { 2621 struct timespec *ts, timeout; 2622 int error; 2623 2624 /* Allow a null timespec (wait forever). */ 2625 if (uap->uaddr2 == NULL) 2626 ts = NULL; 2627 else { 2628 error = copyin_timeout32(uap->uaddr2, &timeout); 2629 if (error != 0) 2630 return (error); 2631 if (timeout.tv_nsec >= 1000000000 || 2632 timeout.tv_nsec < 0) 2633 return (EINVAL); 2634 ts = &timeout; 2635 } 2636 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2637 } 2638 2639 static _umtx_op_func op_table_compat32[] = { 2640 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 2641 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 2642 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 2643 __umtx_op_wake, /* UMTX_OP_WAKE */ 2644 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2645 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 2646 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2647 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2648 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 2649 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2650 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 2651 __umtx_op_wait_compat32 /* UMTX_OP_WAIT_UINT */ 2652 }; 2653 2654 int 2655 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 2656 { 2657 if ((unsigned)uap->op < UMTX_OP_MAX) 2658 return (*op_table_compat32[uap->op])(td, 2659 (struct _umtx_op_args *)uap); 2660 return (EINVAL); 2661 } 2662 #endif 2663 2664 void 2665 umtx_thread_init(struct thread *td) 2666 { 2667 td->td_umtxq = umtxq_alloc(); 2668 td->td_umtxq->uq_thread = td; 2669 } 2670 2671 void 2672 umtx_thread_fini(struct thread *td) 2673 { 2674 umtxq_free(td->td_umtxq); 2675 } 2676 2677 /* 2678 * It will be called when new thread is created, e.g fork(). 2679 */ 2680 void 2681 umtx_thread_alloc(struct thread *td) 2682 { 2683 struct umtx_q *uq; 2684 2685 uq = td->td_umtxq; 2686 uq->uq_inherited_pri = PRI_MAX; 2687 2688 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 2689 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 2690 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 2691 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 2692 } 2693 2694 /* 2695 * exec() hook. 2696 */ 2697 static void 2698 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 2699 struct image_params *imgp __unused) 2700 { 2701 umtx_thread_cleanup(curthread); 2702 } 2703 2704 /* 2705 * thread_exit() hook. 2706 */ 2707 void 2708 umtx_thread_exit(struct thread *td) 2709 { 2710 umtx_thread_cleanup(td); 2711 } 2712 2713 /* 2714 * clean up umtx data. 2715 */ 2716 static void 2717 umtx_thread_cleanup(struct thread *td) 2718 { 2719 struct umtx_q *uq; 2720 struct umtx_pi *pi; 2721 2722 if ((uq = td->td_umtxq) == NULL) 2723 return; 2724 2725 mtx_lock_spin(&umtx_lock); 2726 uq->uq_inherited_pri = PRI_MAX; 2727 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 2728 pi->pi_owner = NULL; 2729 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 2730 } 2731 td->td_flags &= ~TDF_UBORROWING; 2732 mtx_unlock_spin(&umtx_lock); 2733 } 2734