1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/malloc.h> 37 #include <sys/mutex.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/sched.h> 41 #include <sys/smp.h> 42 #include <sys/sysctl.h> 43 #include <sys/sysent.h> 44 #include <sys/systm.h> 45 #include <sys/sysproto.h> 46 #include <sys/eventhandler.h> 47 #include <sys/umtx.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_map.h> 53 #include <vm/vm_object.h> 54 55 #include <machine/cpu.h> 56 57 #ifdef COMPAT_IA32 58 #include <compat/freebsd32/freebsd32_proto.h> 59 #endif 60 61 #define TYPE_SIMPLE_WAIT 0 62 #define TYPE_CV 1 63 #define TYPE_SIMPLE_LOCK 2 64 #define TYPE_NORMAL_UMUTEX 3 65 #define TYPE_PI_UMUTEX 4 66 #define TYPE_PP_UMUTEX 5 67 #define TYPE_RWLOCK 6 68 69 /* Key to represent a unique userland synchronous object */ 70 struct umtx_key { 71 int hash; 72 int type; 73 int shared; 74 union { 75 struct { 76 vm_object_t object; 77 uintptr_t offset; 78 } shared; 79 struct { 80 struct vmspace *vs; 81 uintptr_t addr; 82 } private; 83 struct { 84 void *a; 85 uintptr_t b; 86 } both; 87 } info; 88 }; 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 }; 142 143 TAILQ_HEAD(umtxq_head, umtx_q); 144 145 /* Userland lock object's wait-queue chain */ 146 struct umtxq_chain { 147 /* Lock for this chain. */ 148 struct mtx uc_lock; 149 150 /* List of sleep queues. */ 151 struct umtxq_head uc_queue[2]; 152 #define UMTX_SHARED_QUEUE 0 153 #define UMTX_EXCLUSIVE_QUEUE 1 154 155 /* Busy flag */ 156 char uc_busy; 157 158 /* Chain lock waiters */ 159 int uc_waiters; 160 161 /* All PI in the list */ 162 TAILQ_HEAD(,umtx_pi) uc_pi_list; 163 }; 164 165 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 166 167 /* 168 * Don't propagate time-sharing priority, there is a security reason, 169 * a user can simply introduce PI-mutex, let thread A lock the mutex, 170 * and let another thread B block on the mutex, because B is 171 * sleeping, its priority will be boosted, this causes A's priority to 172 * be boosted via priority propagating too and will never be lowered even 173 * if it is using 100%CPU, this is unfair to other processes. 174 */ 175 176 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 177 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 178 PRI_MAX_TIMESHARE : (td)->td_user_pri) 179 180 #define GOLDEN_RATIO_PRIME 2654404609U 181 #define UMTX_CHAINS 128 182 #define UMTX_SHIFTS (__WORD_BIT - 7) 183 184 #define THREAD_SHARE 0 185 #define PROCESS_SHARE 1 186 #define AUTO_SHARE 2 187 188 #define GET_SHARE(flags) \ 189 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 190 191 #define BUSY_SPINS 200 192 193 static uma_zone_t umtx_pi_zone; 194 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 195 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 196 static int umtx_pi_allocated; 197 198 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 199 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 200 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 201 202 static void umtxq_sysinit(void *); 203 static void umtxq_hash(struct umtx_key *key); 204 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 205 static void umtxq_lock(struct umtx_key *key); 206 static void umtxq_unlock(struct umtx_key *key); 207 static void umtxq_busy(struct umtx_key *key); 208 static void umtxq_unbusy(struct umtx_key *key); 209 static void umtxq_insert_queue(struct umtx_q *uq, int q); 210 static void umtxq_remove_queue(struct umtx_q *uq, int q); 211 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 212 static int umtxq_count(struct umtx_key *key); 213 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 214 static int umtx_key_get(void *addr, int type, int share, 215 struct umtx_key *key); 216 static void umtx_key_release(struct umtx_key *key); 217 static struct umtx_pi *umtx_pi_alloc(int); 218 static void umtx_pi_free(struct umtx_pi *pi); 219 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri); 220 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 221 static void umtx_thread_cleanup(struct thread *td); 222 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 223 struct image_params *imgp __unused); 224 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 225 226 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 227 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 228 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 229 230 static struct mtx umtx_lock; 231 232 static void 233 umtxq_sysinit(void *arg __unused) 234 { 235 int i, j; 236 237 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 238 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 239 for (i = 0; i < 2; ++i) { 240 for (j = 0; j < UMTX_CHAINS; ++j) { 241 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 242 MTX_DEF | MTX_DUPOK); 243 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]); 244 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]); 245 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 246 umtxq_chains[i][j].uc_busy = 0; 247 umtxq_chains[i][j].uc_waiters = 0; 248 } 249 } 250 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 251 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 252 EVENTHANDLER_PRI_ANY); 253 } 254 255 struct umtx_q * 256 umtxq_alloc(void) 257 { 258 struct umtx_q *uq; 259 260 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 261 TAILQ_INIT(&uq->uq_pi_contested); 262 uq->uq_inherited_pri = PRI_MAX; 263 return (uq); 264 } 265 266 void 267 umtxq_free(struct umtx_q *uq) 268 { 269 free(uq, M_UMTX); 270 } 271 272 static inline void 273 umtxq_hash(struct umtx_key *key) 274 { 275 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 276 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 277 } 278 279 static inline int 280 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 281 { 282 return (k1->type == k2->type && 283 k1->info.both.a == k2->info.both.a && 284 k1->info.both.b == k2->info.both.b); 285 } 286 287 static inline struct umtxq_chain * 288 umtxq_getchain(struct umtx_key *key) 289 { 290 if (key->type <= TYPE_CV) 291 return (&umtxq_chains[1][key->hash]); 292 return (&umtxq_chains[0][key->hash]); 293 } 294 295 /* 296 * Lock a chain. 297 */ 298 static inline void 299 umtxq_lock(struct umtx_key *key) 300 { 301 struct umtxq_chain *uc; 302 303 uc = umtxq_getchain(key); 304 mtx_lock(&uc->uc_lock); 305 } 306 307 /* 308 * Unlock a chain. 309 */ 310 static inline void 311 umtxq_unlock(struct umtx_key *key) 312 { 313 struct umtxq_chain *uc; 314 315 uc = umtxq_getchain(key); 316 mtx_unlock(&uc->uc_lock); 317 } 318 319 /* 320 * Set chain to busy state when following operation 321 * may be blocked (kernel mutex can not be used). 322 */ 323 static inline void 324 umtxq_busy(struct umtx_key *key) 325 { 326 struct umtxq_chain *uc; 327 328 uc = umtxq_getchain(key); 329 mtx_assert(&uc->uc_lock, MA_OWNED); 330 if (uc->uc_busy) { 331 #ifdef SMP 332 if (smp_cpus > 1) { 333 int count = BUSY_SPINS; 334 if (count > 0) { 335 umtxq_unlock(key); 336 while (uc->uc_busy && --count > 0) 337 cpu_spinwait(); 338 umtxq_lock(key); 339 } 340 } 341 #endif 342 while (uc->uc_busy) { 343 uc->uc_waiters++; 344 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 345 uc->uc_waiters--; 346 } 347 } 348 uc->uc_busy = 1; 349 } 350 351 /* 352 * Unbusy a chain. 353 */ 354 static inline void 355 umtxq_unbusy(struct umtx_key *key) 356 { 357 struct umtxq_chain *uc; 358 359 uc = umtxq_getchain(key); 360 mtx_assert(&uc->uc_lock, MA_OWNED); 361 KASSERT(uc->uc_busy != 0, ("not busy")); 362 uc->uc_busy = 0; 363 if (uc->uc_waiters) 364 wakeup_one(uc); 365 } 366 367 static inline void 368 umtxq_insert_queue(struct umtx_q *uq, int q) 369 { 370 struct umtxq_chain *uc; 371 372 uc = umtxq_getchain(&uq->uq_key); 373 UMTXQ_LOCKED_ASSERT(uc); 374 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link); 375 uq->uq_flags |= UQF_UMTXQ; 376 } 377 378 static inline void 379 umtxq_remove_queue(struct umtx_q *uq, int q) 380 { 381 struct umtxq_chain *uc; 382 383 uc = umtxq_getchain(&uq->uq_key); 384 UMTXQ_LOCKED_ASSERT(uc); 385 if (uq->uq_flags & UQF_UMTXQ) { 386 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link); 387 uq->uq_flags &= ~UQF_UMTXQ; 388 } 389 } 390 391 /* 392 * Check if there are multiple waiters 393 */ 394 static int 395 umtxq_count(struct umtx_key *key) 396 { 397 struct umtxq_chain *uc; 398 struct umtx_q *uq; 399 int count = 0; 400 401 uc = umtxq_getchain(key); 402 UMTXQ_LOCKED_ASSERT(uc); 403 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { 404 if (umtx_key_match(&uq->uq_key, key)) { 405 if (++count > 1) 406 break; 407 } 408 } 409 return (count); 410 } 411 412 /* 413 * Check if there are multiple PI waiters and returns first 414 * waiter. 415 */ 416 static int 417 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 418 { 419 struct umtxq_chain *uc; 420 struct umtx_q *uq; 421 int count = 0; 422 423 *first = NULL; 424 uc = umtxq_getchain(key); 425 UMTXQ_LOCKED_ASSERT(uc); 426 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { 427 if (umtx_key_match(&uq->uq_key, key)) { 428 if (++count > 1) 429 break; 430 *first = uq; 431 } 432 } 433 return (count); 434 } 435 436 /* 437 * Wake up threads waiting on an userland object. 438 */ 439 440 static int 441 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 442 { 443 struct umtxq_chain *uc; 444 struct umtx_q *uq, *next; 445 int ret; 446 447 ret = 0; 448 uc = umtxq_getchain(key); 449 UMTXQ_LOCKED_ASSERT(uc); 450 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) { 451 if (umtx_key_match(&uq->uq_key, key)) { 452 umtxq_remove_queue(uq, q); 453 wakeup(uq); 454 if (++ret >= n_wake) 455 break; 456 } 457 } 458 return (ret); 459 } 460 461 462 /* 463 * Wake up specified thread. 464 */ 465 static inline void 466 umtxq_signal_thread(struct umtx_q *uq) 467 { 468 struct umtxq_chain *uc; 469 470 uc = umtxq_getchain(&uq->uq_key); 471 UMTXQ_LOCKED_ASSERT(uc); 472 umtxq_remove(uq); 473 wakeup(uq); 474 } 475 476 /* 477 * Put thread into sleep state, before sleeping, check if 478 * thread was removed from umtx queue. 479 */ 480 static inline int 481 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 482 { 483 struct umtxq_chain *uc; 484 int error; 485 486 uc = umtxq_getchain(&uq->uq_key); 487 UMTXQ_LOCKED_ASSERT(uc); 488 if (!(uq->uq_flags & UQF_UMTXQ)) 489 return (0); 490 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 491 if (error == EWOULDBLOCK) 492 error = ETIMEDOUT; 493 return (error); 494 } 495 496 /* 497 * Convert userspace address into unique logical address. 498 */ 499 static int 500 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 501 { 502 struct thread *td = curthread; 503 vm_map_t map; 504 vm_map_entry_t entry; 505 vm_pindex_t pindex; 506 vm_prot_t prot; 507 boolean_t wired; 508 509 key->type = type; 510 if (share == THREAD_SHARE) { 511 key->shared = 0; 512 key->info.private.vs = td->td_proc->p_vmspace; 513 key->info.private.addr = (uintptr_t)addr; 514 } else { 515 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 516 map = &td->td_proc->p_vmspace->vm_map; 517 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 518 &entry, &key->info.shared.object, &pindex, &prot, 519 &wired) != KERN_SUCCESS) { 520 return EFAULT; 521 } 522 523 if ((share == PROCESS_SHARE) || 524 (share == AUTO_SHARE && 525 VM_INHERIT_SHARE == entry->inheritance)) { 526 key->shared = 1; 527 key->info.shared.offset = entry->offset + entry->start - 528 (vm_offset_t)addr; 529 vm_object_reference(key->info.shared.object); 530 } else { 531 key->shared = 0; 532 key->info.private.vs = td->td_proc->p_vmspace; 533 key->info.private.addr = (uintptr_t)addr; 534 } 535 vm_map_lookup_done(map, entry); 536 } 537 538 umtxq_hash(key); 539 return (0); 540 } 541 542 /* 543 * Release key. 544 */ 545 static inline void 546 umtx_key_release(struct umtx_key *key) 547 { 548 if (key->shared) 549 vm_object_deallocate(key->info.shared.object); 550 } 551 552 /* 553 * Lock a umtx object. 554 */ 555 static int 556 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 557 { 558 struct umtx_q *uq; 559 u_long owner; 560 u_long old; 561 int error = 0; 562 563 uq = td->td_umtxq; 564 565 /* 566 * Care must be exercised when dealing with umtx structure. It 567 * can fault on any access. 568 */ 569 for (;;) { 570 /* 571 * Try the uncontested case. This should be done in userland. 572 */ 573 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 574 575 /* The acquire succeeded. */ 576 if (owner == UMTX_UNOWNED) 577 return (0); 578 579 /* The address was invalid. */ 580 if (owner == -1) 581 return (EFAULT); 582 583 /* If no one owns it but it is contested try to acquire it. */ 584 if (owner == UMTX_CONTESTED) { 585 owner = casuword(&umtx->u_owner, 586 UMTX_CONTESTED, id | UMTX_CONTESTED); 587 588 if (owner == UMTX_CONTESTED) 589 return (0); 590 591 /* The address was invalid. */ 592 if (owner == -1) 593 return (EFAULT); 594 595 /* If this failed the lock has changed, restart. */ 596 continue; 597 } 598 599 /* 600 * If we caught a signal, we have retried and now 601 * exit immediately. 602 */ 603 if (error != 0) 604 return (error); 605 606 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 607 AUTO_SHARE, &uq->uq_key)) != 0) 608 return (error); 609 610 umtxq_lock(&uq->uq_key); 611 umtxq_busy(&uq->uq_key); 612 umtxq_insert(uq); 613 umtxq_unbusy(&uq->uq_key); 614 umtxq_unlock(&uq->uq_key); 615 616 /* 617 * Set the contested bit so that a release in user space 618 * knows to use the system call for unlock. If this fails 619 * either some one else has acquired the lock or it has been 620 * released. 621 */ 622 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 623 624 /* The address was invalid. */ 625 if (old == -1) { 626 umtxq_lock(&uq->uq_key); 627 umtxq_remove(uq); 628 umtxq_unlock(&uq->uq_key); 629 umtx_key_release(&uq->uq_key); 630 return (EFAULT); 631 } 632 633 /* 634 * We set the contested bit, sleep. Otherwise the lock changed 635 * and we need to retry or we lost a race to the thread 636 * unlocking the umtx. 637 */ 638 umtxq_lock(&uq->uq_key); 639 if (old == owner) 640 error = umtxq_sleep(uq, "umtx", timo); 641 umtxq_remove(uq); 642 umtxq_unlock(&uq->uq_key); 643 umtx_key_release(&uq->uq_key); 644 } 645 646 return (0); 647 } 648 649 /* 650 * Lock a umtx object. 651 */ 652 static int 653 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 654 struct timespec *timeout) 655 { 656 struct timespec ts, ts2, ts3; 657 struct timeval tv; 658 int error; 659 660 if (timeout == NULL) { 661 error = _do_lock_umtx(td, umtx, id, 0); 662 /* Mutex locking is restarted if it is interrupted. */ 663 if (error == EINTR) 664 error = ERESTART; 665 } else { 666 getnanouptime(&ts); 667 timespecadd(&ts, timeout); 668 TIMESPEC_TO_TIMEVAL(&tv, timeout); 669 for (;;) { 670 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 671 if (error != ETIMEDOUT) 672 break; 673 getnanouptime(&ts2); 674 if (timespeccmp(&ts2, &ts, >=)) { 675 error = ETIMEDOUT; 676 break; 677 } 678 ts3 = ts; 679 timespecsub(&ts3, &ts2); 680 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 681 } 682 /* Timed-locking is not restarted. */ 683 if (error == ERESTART) 684 error = EINTR; 685 } 686 return (error); 687 } 688 689 /* 690 * Unlock a umtx object. 691 */ 692 static int 693 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 694 { 695 struct umtx_key key; 696 u_long owner; 697 u_long old; 698 int error; 699 int count; 700 701 /* 702 * Make sure we own this mtx. 703 */ 704 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 705 if (owner == -1) 706 return (EFAULT); 707 708 if ((owner & ~UMTX_CONTESTED) != id) 709 return (EPERM); 710 711 /* This should be done in userland */ 712 if ((owner & UMTX_CONTESTED) == 0) { 713 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 714 if (old == -1) 715 return (EFAULT); 716 if (old == owner) 717 return (0); 718 owner = old; 719 } 720 721 /* We should only ever be in here for contested locks */ 722 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 723 &key)) != 0) 724 return (error); 725 726 umtxq_lock(&key); 727 umtxq_busy(&key); 728 count = umtxq_count(&key); 729 umtxq_unlock(&key); 730 731 /* 732 * When unlocking the umtx, it must be marked as unowned if 733 * there is zero or one thread only waiting for it. 734 * Otherwise, it must be marked as contested. 735 */ 736 old = casuword(&umtx->u_owner, owner, 737 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 738 umtxq_lock(&key); 739 umtxq_signal(&key,1); 740 umtxq_unbusy(&key); 741 umtxq_unlock(&key); 742 umtx_key_release(&key); 743 if (old == -1) 744 return (EFAULT); 745 if (old != owner) 746 return (EINVAL); 747 return (0); 748 } 749 750 #ifdef COMPAT_IA32 751 752 /* 753 * Lock a umtx object. 754 */ 755 static int 756 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 757 { 758 struct umtx_q *uq; 759 uint32_t owner; 760 uint32_t old; 761 int error = 0; 762 763 uq = td->td_umtxq; 764 765 /* 766 * Care must be exercised when dealing with umtx structure. It 767 * can fault on any access. 768 */ 769 for (;;) { 770 /* 771 * Try the uncontested case. This should be done in userland. 772 */ 773 owner = casuword32(m, UMUTEX_UNOWNED, id); 774 775 /* The acquire succeeded. */ 776 if (owner == UMUTEX_UNOWNED) 777 return (0); 778 779 /* The address was invalid. */ 780 if (owner == -1) 781 return (EFAULT); 782 783 /* If no one owns it but it is contested try to acquire it. */ 784 if (owner == UMUTEX_CONTESTED) { 785 owner = casuword32(m, 786 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 787 if (owner == UMUTEX_CONTESTED) 788 return (0); 789 790 /* The address was invalid. */ 791 if (owner == -1) 792 return (EFAULT); 793 794 /* If this failed the lock has changed, restart. */ 795 continue; 796 } 797 798 /* 799 * If we caught a signal, we have retried and now 800 * exit immediately. 801 */ 802 if (error != 0) 803 return (error); 804 805 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 806 AUTO_SHARE, &uq->uq_key)) != 0) 807 return (error); 808 809 umtxq_lock(&uq->uq_key); 810 umtxq_busy(&uq->uq_key); 811 umtxq_insert(uq); 812 umtxq_unbusy(&uq->uq_key); 813 umtxq_unlock(&uq->uq_key); 814 815 /* 816 * Set the contested bit so that a release in user space 817 * knows to use the system call for unlock. If this fails 818 * either some one else has acquired the lock or it has been 819 * released. 820 */ 821 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 822 823 /* The address was invalid. */ 824 if (old == -1) { 825 umtxq_lock(&uq->uq_key); 826 umtxq_remove(uq); 827 umtxq_unlock(&uq->uq_key); 828 umtx_key_release(&uq->uq_key); 829 return (EFAULT); 830 } 831 832 /* 833 * We set the contested bit, sleep. Otherwise the lock changed 834 * and we need to retry or we lost a race to the thread 835 * unlocking the umtx. 836 */ 837 umtxq_lock(&uq->uq_key); 838 if (old == owner) 839 error = umtxq_sleep(uq, "umtx", timo); 840 umtxq_remove(uq); 841 umtxq_unlock(&uq->uq_key); 842 umtx_key_release(&uq->uq_key); 843 } 844 845 return (0); 846 } 847 848 /* 849 * Lock a umtx object. 850 */ 851 static int 852 do_lock_umtx32(struct thread *td, void *m, uint32_t id, 853 struct timespec *timeout) 854 { 855 struct timespec ts, ts2, ts3; 856 struct timeval tv; 857 int error; 858 859 if (timeout == NULL) { 860 error = _do_lock_umtx32(td, m, id, 0); 861 /* Mutex locking is restarted if it is interrupted. */ 862 if (error == EINTR) 863 error = ERESTART; 864 } else { 865 getnanouptime(&ts); 866 timespecadd(&ts, timeout); 867 TIMESPEC_TO_TIMEVAL(&tv, timeout); 868 for (;;) { 869 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 870 if (error != ETIMEDOUT) 871 break; 872 getnanouptime(&ts2); 873 if (timespeccmp(&ts2, &ts, >=)) { 874 error = ETIMEDOUT; 875 break; 876 } 877 ts3 = ts; 878 timespecsub(&ts3, &ts2); 879 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 880 } 881 /* Timed-locking is not restarted. */ 882 if (error == ERESTART) 883 error = EINTR; 884 } 885 return (error); 886 } 887 888 /* 889 * Unlock a umtx object. 890 */ 891 static int 892 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 893 { 894 struct umtx_key key; 895 uint32_t owner; 896 uint32_t old; 897 int error; 898 int count; 899 900 /* 901 * Make sure we own this mtx. 902 */ 903 owner = fuword32(m); 904 if (owner == -1) 905 return (EFAULT); 906 907 if ((owner & ~UMUTEX_CONTESTED) != id) 908 return (EPERM); 909 910 /* This should be done in userland */ 911 if ((owner & UMUTEX_CONTESTED) == 0) { 912 old = casuword32(m, owner, UMUTEX_UNOWNED); 913 if (old == -1) 914 return (EFAULT); 915 if (old == owner) 916 return (0); 917 owner = old; 918 } 919 920 /* We should only ever be in here for contested locks */ 921 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 922 &key)) != 0) 923 return (error); 924 925 umtxq_lock(&key); 926 umtxq_busy(&key); 927 count = umtxq_count(&key); 928 umtxq_unlock(&key); 929 930 /* 931 * When unlocking the umtx, it must be marked as unowned if 932 * there is zero or one thread only waiting for it. 933 * Otherwise, it must be marked as contested. 934 */ 935 old = casuword32(m, owner, 936 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 937 umtxq_lock(&key); 938 umtxq_signal(&key,1); 939 umtxq_unbusy(&key); 940 umtxq_unlock(&key); 941 umtx_key_release(&key); 942 if (old == -1) 943 return (EFAULT); 944 if (old != owner) 945 return (EINVAL); 946 return (0); 947 } 948 #endif 949 950 /* 951 * Fetch and compare value, sleep on the address if value is not changed. 952 */ 953 static int 954 do_wait(struct thread *td, void *addr, u_long id, 955 struct timespec *timeout, int compat32, int is_private) 956 { 957 struct umtx_q *uq; 958 struct timespec ts, ts2, ts3; 959 struct timeval tv; 960 u_long tmp; 961 int error = 0; 962 963 uq = td->td_umtxq; 964 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 965 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 966 return (error); 967 968 umtxq_lock(&uq->uq_key); 969 umtxq_insert(uq); 970 umtxq_unlock(&uq->uq_key); 971 if (compat32 == 0) 972 tmp = fuword(addr); 973 else 974 tmp = fuword32(addr); 975 if (tmp != id) { 976 umtxq_lock(&uq->uq_key); 977 umtxq_remove(uq); 978 umtxq_unlock(&uq->uq_key); 979 } else if (timeout == NULL) { 980 umtxq_lock(&uq->uq_key); 981 error = umtxq_sleep(uq, "uwait", 0); 982 umtxq_remove(uq); 983 umtxq_unlock(&uq->uq_key); 984 } else { 985 getnanouptime(&ts); 986 timespecadd(&ts, timeout); 987 TIMESPEC_TO_TIMEVAL(&tv, timeout); 988 umtxq_lock(&uq->uq_key); 989 for (;;) { 990 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 991 if (!(uq->uq_flags & UQF_UMTXQ)) 992 break; 993 if (error != ETIMEDOUT) 994 break; 995 umtxq_unlock(&uq->uq_key); 996 getnanouptime(&ts2); 997 if (timespeccmp(&ts2, &ts, >=)) { 998 error = ETIMEDOUT; 999 umtxq_lock(&uq->uq_key); 1000 break; 1001 } 1002 ts3 = ts; 1003 timespecsub(&ts3, &ts2); 1004 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 1005 umtxq_lock(&uq->uq_key); 1006 } 1007 umtxq_remove(uq); 1008 umtxq_unlock(&uq->uq_key); 1009 } 1010 umtx_key_release(&uq->uq_key); 1011 if (error == ERESTART) 1012 error = EINTR; 1013 return (error); 1014 } 1015 1016 /* 1017 * Wake up threads sleeping on the specified address. 1018 */ 1019 int 1020 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1021 { 1022 struct umtx_key key; 1023 int ret; 1024 1025 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1026 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1027 return (ret); 1028 umtxq_lock(&key); 1029 ret = umtxq_signal(&key, n_wake); 1030 umtxq_unlock(&key); 1031 umtx_key_release(&key); 1032 return (0); 1033 } 1034 1035 /* 1036 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1037 */ 1038 static int 1039 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1040 int try) 1041 { 1042 struct umtx_q *uq; 1043 uint32_t owner, old, id; 1044 int error = 0; 1045 1046 id = td->td_tid; 1047 uq = td->td_umtxq; 1048 1049 /* 1050 * Care must be exercised when dealing with umtx structure. It 1051 * can fault on any access. 1052 */ 1053 for (;;) { 1054 /* 1055 * Try the uncontested case. This should be done in userland. 1056 */ 1057 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1058 1059 /* The acquire succeeded. */ 1060 if (owner == UMUTEX_UNOWNED) 1061 return (0); 1062 1063 /* The address was invalid. */ 1064 if (owner == -1) 1065 return (EFAULT); 1066 1067 /* If no one owns it but it is contested try to acquire it. */ 1068 if (owner == UMUTEX_CONTESTED) { 1069 owner = casuword32(&m->m_owner, 1070 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1071 1072 if (owner == UMUTEX_CONTESTED) 1073 return (0); 1074 1075 /* The address was invalid. */ 1076 if (owner == -1) 1077 return (EFAULT); 1078 1079 /* If this failed the lock has changed, restart. */ 1080 continue; 1081 } 1082 1083 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1084 (owner & ~UMUTEX_CONTESTED) == id) 1085 return (EDEADLK); 1086 1087 if (try != 0) 1088 return (EBUSY); 1089 1090 /* 1091 * If we caught a signal, we have retried and now 1092 * exit immediately. 1093 */ 1094 if (error != 0) 1095 return (error); 1096 1097 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1098 GET_SHARE(flags), &uq->uq_key)) != 0) 1099 return (error); 1100 1101 umtxq_lock(&uq->uq_key); 1102 umtxq_busy(&uq->uq_key); 1103 umtxq_insert(uq); 1104 umtxq_unbusy(&uq->uq_key); 1105 umtxq_unlock(&uq->uq_key); 1106 1107 /* 1108 * Set the contested bit so that a release in user space 1109 * knows to use the system call for unlock. If this fails 1110 * either some one else has acquired the lock or it has been 1111 * released. 1112 */ 1113 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1114 1115 /* The address was invalid. */ 1116 if (old == -1) { 1117 umtxq_lock(&uq->uq_key); 1118 umtxq_remove(uq); 1119 umtxq_unlock(&uq->uq_key); 1120 umtx_key_release(&uq->uq_key); 1121 return (EFAULT); 1122 } 1123 1124 /* 1125 * We set the contested bit, sleep. Otherwise the lock changed 1126 * and we need to retry or we lost a race to the thread 1127 * unlocking the umtx. 1128 */ 1129 umtxq_lock(&uq->uq_key); 1130 if (old == owner) 1131 error = umtxq_sleep(uq, "umtxn", timo); 1132 umtxq_remove(uq); 1133 umtxq_unlock(&uq->uq_key); 1134 umtx_key_release(&uq->uq_key); 1135 } 1136 1137 return (0); 1138 } 1139 1140 /* 1141 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1142 */ 1143 /* 1144 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1145 */ 1146 static int 1147 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1148 { 1149 struct umtx_key key; 1150 uint32_t owner, old, id; 1151 int error; 1152 int count; 1153 1154 id = td->td_tid; 1155 /* 1156 * Make sure we own this mtx. 1157 */ 1158 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1159 if (owner == -1) 1160 return (EFAULT); 1161 1162 if ((owner & ~UMUTEX_CONTESTED) != id) 1163 return (EPERM); 1164 1165 /* This should be done in userland */ 1166 if ((owner & UMUTEX_CONTESTED) == 0) { 1167 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1168 if (old == -1) 1169 return (EFAULT); 1170 if (old == owner) 1171 return (0); 1172 owner = old; 1173 } 1174 1175 /* We should only ever be in here for contested locks */ 1176 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1177 &key)) != 0) 1178 return (error); 1179 1180 umtxq_lock(&key); 1181 umtxq_busy(&key); 1182 count = umtxq_count(&key); 1183 umtxq_unlock(&key); 1184 1185 /* 1186 * When unlocking the umtx, it must be marked as unowned if 1187 * there is zero or one thread only waiting for it. 1188 * Otherwise, it must be marked as contested. 1189 */ 1190 old = casuword32(&m->m_owner, owner, 1191 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1192 umtxq_lock(&key); 1193 umtxq_signal(&key,1); 1194 umtxq_unbusy(&key); 1195 umtxq_unlock(&key); 1196 umtx_key_release(&key); 1197 if (old == -1) 1198 return (EFAULT); 1199 if (old != owner) 1200 return (EINVAL); 1201 return (0); 1202 } 1203 1204 static inline struct umtx_pi * 1205 umtx_pi_alloc(int flags) 1206 { 1207 struct umtx_pi *pi; 1208 1209 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1210 TAILQ_INIT(&pi->pi_blocked); 1211 atomic_add_int(&umtx_pi_allocated, 1); 1212 return (pi); 1213 } 1214 1215 static inline void 1216 umtx_pi_free(struct umtx_pi *pi) 1217 { 1218 uma_zfree(umtx_pi_zone, pi); 1219 atomic_add_int(&umtx_pi_allocated, -1); 1220 } 1221 1222 /* 1223 * Adjust the thread's position on a pi_state after its priority has been 1224 * changed. 1225 */ 1226 static int 1227 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1228 { 1229 struct umtx_q *uq, *uq1, *uq2; 1230 struct thread *td1; 1231 1232 mtx_assert(&umtx_lock, MA_OWNED); 1233 if (pi == NULL) 1234 return (0); 1235 1236 uq = td->td_umtxq; 1237 1238 /* 1239 * Check if the thread needs to be moved on the blocked chain. 1240 * It needs to be moved if either its priority is lower than 1241 * the previous thread or higher than the next thread. 1242 */ 1243 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1244 uq2 = TAILQ_NEXT(uq, uq_lockq); 1245 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1246 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1247 /* 1248 * Remove thread from blocked chain and determine where 1249 * it should be moved to. 1250 */ 1251 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1252 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1253 td1 = uq1->uq_thread; 1254 MPASS(td1->td_proc->p_magic == P_MAGIC); 1255 if (UPRI(td1) > UPRI(td)) 1256 break; 1257 } 1258 1259 if (uq1 == NULL) 1260 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1261 else 1262 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1263 } 1264 return (1); 1265 } 1266 1267 /* 1268 * Propagate priority when a thread is blocked on POSIX 1269 * PI mutex. 1270 */ 1271 static void 1272 umtx_propagate_priority(struct thread *td) 1273 { 1274 struct umtx_q *uq; 1275 struct umtx_pi *pi; 1276 int pri; 1277 1278 mtx_assert(&umtx_lock, MA_OWNED); 1279 pri = UPRI(td); 1280 uq = td->td_umtxq; 1281 pi = uq->uq_pi_blocked; 1282 if (pi == NULL) 1283 return; 1284 1285 for (;;) { 1286 td = pi->pi_owner; 1287 if (td == NULL) 1288 return; 1289 1290 MPASS(td->td_proc != NULL); 1291 MPASS(td->td_proc->p_magic == P_MAGIC); 1292 1293 if (UPRI(td) <= pri) 1294 return; 1295 1296 thread_lock(td); 1297 sched_lend_user_prio(td, pri); 1298 thread_unlock(td); 1299 1300 /* 1301 * Pick up the lock that td is blocked on. 1302 */ 1303 uq = td->td_umtxq; 1304 pi = uq->uq_pi_blocked; 1305 /* Resort td on the list if needed. */ 1306 if (!umtx_pi_adjust_thread(pi, td)) 1307 break; 1308 } 1309 } 1310 1311 /* 1312 * Unpropagate priority for a PI mutex when a thread blocked on 1313 * it is interrupted by signal or resumed by others. 1314 */ 1315 static void 1316 umtx_unpropagate_priority(struct umtx_pi *pi) 1317 { 1318 struct umtx_q *uq, *uq_owner; 1319 struct umtx_pi *pi2; 1320 int pri, oldpri; 1321 1322 mtx_assert(&umtx_lock, MA_OWNED); 1323 1324 while (pi != NULL && pi->pi_owner != NULL) { 1325 pri = PRI_MAX; 1326 uq_owner = pi->pi_owner->td_umtxq; 1327 1328 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1329 uq = TAILQ_FIRST(&pi2->pi_blocked); 1330 if (uq != NULL) { 1331 if (pri > UPRI(uq->uq_thread)) 1332 pri = UPRI(uq->uq_thread); 1333 } 1334 } 1335 1336 if (pri > uq_owner->uq_inherited_pri) 1337 pri = uq_owner->uq_inherited_pri; 1338 thread_lock(pi->pi_owner); 1339 oldpri = pi->pi_owner->td_user_pri; 1340 sched_unlend_user_prio(pi->pi_owner, pri); 1341 thread_unlock(pi->pi_owner); 1342 umtx_pi_adjust_locked(pi->pi_owner, oldpri); 1343 pi = uq_owner->uq_pi_blocked; 1344 } 1345 } 1346 1347 /* 1348 * Insert a PI mutex into owned list. 1349 */ 1350 static void 1351 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1352 { 1353 struct umtx_q *uq_owner; 1354 1355 uq_owner = owner->td_umtxq; 1356 mtx_assert(&umtx_lock, MA_OWNED); 1357 if (pi->pi_owner != NULL) 1358 panic("pi_ower != NULL"); 1359 pi->pi_owner = owner; 1360 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1361 } 1362 1363 /* 1364 * Claim ownership of a PI mutex. 1365 */ 1366 static int 1367 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1368 { 1369 struct umtx_q *uq, *uq_owner; 1370 1371 uq_owner = owner->td_umtxq; 1372 mtx_lock_spin(&umtx_lock); 1373 if (pi->pi_owner == owner) { 1374 mtx_unlock_spin(&umtx_lock); 1375 return (0); 1376 } 1377 1378 if (pi->pi_owner != NULL) { 1379 /* 1380 * userland may have already messed the mutex, sigh. 1381 */ 1382 mtx_unlock_spin(&umtx_lock); 1383 return (EPERM); 1384 } 1385 umtx_pi_setowner(pi, owner); 1386 uq = TAILQ_FIRST(&pi->pi_blocked); 1387 if (uq != NULL) { 1388 int pri; 1389 1390 pri = UPRI(uq->uq_thread); 1391 thread_lock(owner); 1392 if (pri < UPRI(owner)) 1393 sched_lend_user_prio(owner, pri); 1394 thread_unlock(owner); 1395 } 1396 mtx_unlock_spin(&umtx_lock); 1397 return (0); 1398 } 1399 1400 static void 1401 umtx_pi_adjust_locked(struct thread *td, u_char oldpri) 1402 { 1403 struct umtx_q *uq; 1404 struct umtx_pi *pi; 1405 1406 uq = td->td_umtxq; 1407 /* 1408 * Pick up the lock that td is blocked on. 1409 */ 1410 pi = uq->uq_pi_blocked; 1411 MPASS(pi != NULL); 1412 1413 /* Resort the turnstile on the list. */ 1414 if (!umtx_pi_adjust_thread(pi, td)) 1415 return; 1416 1417 /* 1418 * If our priority was lowered and we are at the head of the 1419 * turnstile, then propagate our new priority up the chain. 1420 */ 1421 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1422 umtx_propagate_priority(td); 1423 } 1424 1425 /* 1426 * Adjust a thread's order position in its blocked PI mutex, 1427 * this may result new priority propagating process. 1428 */ 1429 void 1430 umtx_pi_adjust(struct thread *td, u_char oldpri) 1431 { 1432 struct umtx_q *uq; 1433 struct umtx_pi *pi; 1434 1435 uq = td->td_umtxq; 1436 mtx_lock_spin(&umtx_lock); 1437 /* 1438 * Pick up the lock that td is blocked on. 1439 */ 1440 pi = uq->uq_pi_blocked; 1441 if (pi != NULL) 1442 umtx_pi_adjust_locked(td, oldpri); 1443 mtx_unlock_spin(&umtx_lock); 1444 } 1445 1446 /* 1447 * Sleep on a PI mutex. 1448 */ 1449 static int 1450 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1451 uint32_t owner, const char *wmesg, int timo) 1452 { 1453 struct umtxq_chain *uc; 1454 struct thread *td, *td1; 1455 struct umtx_q *uq1; 1456 int pri; 1457 int error = 0; 1458 1459 td = uq->uq_thread; 1460 KASSERT(td == curthread, ("inconsistent uq_thread")); 1461 uc = umtxq_getchain(&uq->uq_key); 1462 UMTXQ_LOCKED_ASSERT(uc); 1463 umtxq_insert(uq); 1464 if (pi->pi_owner == NULL) { 1465 /* XXX 1466 * Current, We only support process private PI-mutex, 1467 * non-contended PI-mutexes are locked in userland. 1468 * Process shared PI-mutex should always be initialized 1469 * by kernel and be registered in kernel, locking should 1470 * always be done by kernel to avoid security problems. 1471 * For process private PI-mutex, we can find owner 1472 * thread and boost its priority safely. 1473 */ 1474 PROC_LOCK(curproc); 1475 td1 = thread_find(curproc, owner); 1476 mtx_lock_spin(&umtx_lock); 1477 if (td1 != NULL && pi->pi_owner == NULL) { 1478 uq1 = td1->td_umtxq; 1479 umtx_pi_setowner(pi, td1); 1480 } 1481 PROC_UNLOCK(curproc); 1482 } else { 1483 mtx_lock_spin(&umtx_lock); 1484 } 1485 1486 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1487 pri = UPRI(uq1->uq_thread); 1488 if (pri > UPRI(td)) 1489 break; 1490 } 1491 1492 if (uq1 != NULL) 1493 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1494 else 1495 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1496 1497 uq->uq_pi_blocked = pi; 1498 thread_lock(td); 1499 td->td_flags |= TDF_UPIBLOCKED; 1500 thread_unlock(td); 1501 mtx_unlock_spin(&umtx_lock); 1502 umtxq_unlock(&uq->uq_key); 1503 1504 mtx_lock_spin(&umtx_lock); 1505 umtx_propagate_priority(td); 1506 mtx_unlock_spin(&umtx_lock); 1507 1508 umtxq_lock(&uq->uq_key); 1509 if (uq->uq_flags & UQF_UMTXQ) { 1510 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1511 if (error == EWOULDBLOCK) 1512 error = ETIMEDOUT; 1513 if (uq->uq_flags & UQF_UMTXQ) { 1514 umtxq_busy(&uq->uq_key); 1515 umtxq_remove(uq); 1516 umtxq_unbusy(&uq->uq_key); 1517 } 1518 } 1519 umtxq_unlock(&uq->uq_key); 1520 1521 mtx_lock_spin(&umtx_lock); 1522 uq->uq_pi_blocked = NULL; 1523 thread_lock(td); 1524 td->td_flags &= ~TDF_UPIBLOCKED; 1525 thread_unlock(td); 1526 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1527 umtx_unpropagate_priority(pi); 1528 mtx_unlock_spin(&umtx_lock); 1529 1530 umtxq_lock(&uq->uq_key); 1531 1532 return (error); 1533 } 1534 1535 /* 1536 * Add reference count for a PI mutex. 1537 */ 1538 static void 1539 umtx_pi_ref(struct umtx_pi *pi) 1540 { 1541 struct umtxq_chain *uc; 1542 1543 uc = umtxq_getchain(&pi->pi_key); 1544 UMTXQ_LOCKED_ASSERT(uc); 1545 pi->pi_refcount++; 1546 } 1547 1548 /* 1549 * Decrease reference count for a PI mutex, if the counter 1550 * is decreased to zero, its memory space is freed. 1551 */ 1552 static void 1553 umtx_pi_unref(struct umtx_pi *pi) 1554 { 1555 struct umtxq_chain *uc; 1556 int free = 0; 1557 1558 uc = umtxq_getchain(&pi->pi_key); 1559 UMTXQ_LOCKED_ASSERT(uc); 1560 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1561 if (--pi->pi_refcount == 0) { 1562 mtx_lock_spin(&umtx_lock); 1563 if (pi->pi_owner != NULL) { 1564 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1565 pi, pi_link); 1566 pi->pi_owner = NULL; 1567 } 1568 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1569 ("blocked queue not empty")); 1570 mtx_unlock_spin(&umtx_lock); 1571 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1572 free = 1; 1573 } 1574 if (free) 1575 umtx_pi_free(pi); 1576 } 1577 1578 /* 1579 * Find a PI mutex in hash table. 1580 */ 1581 static struct umtx_pi * 1582 umtx_pi_lookup(struct umtx_key *key) 1583 { 1584 struct umtxq_chain *uc; 1585 struct umtx_pi *pi; 1586 1587 uc = umtxq_getchain(key); 1588 UMTXQ_LOCKED_ASSERT(uc); 1589 1590 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1591 if (umtx_key_match(&pi->pi_key, key)) { 1592 return (pi); 1593 } 1594 } 1595 return (NULL); 1596 } 1597 1598 /* 1599 * Insert a PI mutex into hash table. 1600 */ 1601 static inline void 1602 umtx_pi_insert(struct umtx_pi *pi) 1603 { 1604 struct umtxq_chain *uc; 1605 1606 uc = umtxq_getchain(&pi->pi_key); 1607 UMTXQ_LOCKED_ASSERT(uc); 1608 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1609 } 1610 1611 /* 1612 * Lock a PI mutex. 1613 */ 1614 static int 1615 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1616 int try) 1617 { 1618 struct umtx_q *uq; 1619 struct umtx_pi *pi, *new_pi; 1620 uint32_t id, owner, old; 1621 int error; 1622 1623 id = td->td_tid; 1624 uq = td->td_umtxq; 1625 1626 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1627 &uq->uq_key)) != 0) 1628 return (error); 1629 umtxq_lock(&uq->uq_key); 1630 pi = umtx_pi_lookup(&uq->uq_key); 1631 if (pi == NULL) { 1632 new_pi = umtx_pi_alloc(M_NOWAIT); 1633 if (new_pi == NULL) { 1634 umtxq_unlock(&uq->uq_key); 1635 new_pi = umtx_pi_alloc(M_WAITOK); 1636 new_pi->pi_key = uq->uq_key; 1637 umtxq_lock(&uq->uq_key); 1638 pi = umtx_pi_lookup(&uq->uq_key); 1639 if (pi != NULL) { 1640 umtx_pi_free(new_pi); 1641 new_pi = NULL; 1642 } 1643 } 1644 if (new_pi != NULL) { 1645 new_pi->pi_key = uq->uq_key; 1646 umtx_pi_insert(new_pi); 1647 pi = new_pi; 1648 } 1649 } 1650 umtx_pi_ref(pi); 1651 umtxq_unlock(&uq->uq_key); 1652 1653 /* 1654 * Care must be exercised when dealing with umtx structure. It 1655 * can fault on any access. 1656 */ 1657 for (;;) { 1658 /* 1659 * Try the uncontested case. This should be done in userland. 1660 */ 1661 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1662 1663 /* The acquire succeeded. */ 1664 if (owner == UMUTEX_UNOWNED) { 1665 error = 0; 1666 break; 1667 } 1668 1669 /* The address was invalid. */ 1670 if (owner == -1) { 1671 error = EFAULT; 1672 break; 1673 } 1674 1675 /* If no one owns it but it is contested try to acquire it. */ 1676 if (owner == UMUTEX_CONTESTED) { 1677 owner = casuword32(&m->m_owner, 1678 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1679 1680 if (owner == UMUTEX_CONTESTED) { 1681 umtxq_lock(&uq->uq_key); 1682 error = umtx_pi_claim(pi, td); 1683 umtxq_unlock(&uq->uq_key); 1684 break; 1685 } 1686 1687 /* The address was invalid. */ 1688 if (owner == -1) { 1689 error = EFAULT; 1690 break; 1691 } 1692 1693 /* If this failed the lock has changed, restart. */ 1694 continue; 1695 } 1696 1697 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1698 (owner & ~UMUTEX_CONTESTED) == id) { 1699 error = EDEADLK; 1700 break; 1701 } 1702 1703 if (try != 0) { 1704 error = EBUSY; 1705 break; 1706 } 1707 1708 /* 1709 * If we caught a signal, we have retried and now 1710 * exit immediately. 1711 */ 1712 if (error != 0) 1713 break; 1714 1715 umtxq_lock(&uq->uq_key); 1716 umtxq_busy(&uq->uq_key); 1717 umtxq_unlock(&uq->uq_key); 1718 1719 /* 1720 * Set the contested bit so that a release in user space 1721 * knows to use the system call for unlock. If this fails 1722 * either some one else has acquired the lock or it has been 1723 * released. 1724 */ 1725 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1726 1727 /* The address was invalid. */ 1728 if (old == -1) { 1729 umtxq_lock(&uq->uq_key); 1730 umtxq_unbusy(&uq->uq_key); 1731 umtxq_unlock(&uq->uq_key); 1732 error = EFAULT; 1733 break; 1734 } 1735 1736 umtxq_lock(&uq->uq_key); 1737 umtxq_unbusy(&uq->uq_key); 1738 /* 1739 * We set the contested bit, sleep. Otherwise the lock changed 1740 * and we need to retry or we lost a race to the thread 1741 * unlocking the umtx. 1742 */ 1743 if (old == owner) 1744 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1745 "umtxpi", timo); 1746 umtxq_unlock(&uq->uq_key); 1747 } 1748 1749 umtxq_lock(&uq->uq_key); 1750 umtx_pi_unref(pi); 1751 umtxq_unlock(&uq->uq_key); 1752 1753 umtx_key_release(&uq->uq_key); 1754 return (error); 1755 } 1756 1757 /* 1758 * Unlock a PI mutex. 1759 */ 1760 static int 1761 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1762 { 1763 struct umtx_key key; 1764 struct umtx_q *uq_first, *uq_first2, *uq_me; 1765 struct umtx_pi *pi, *pi2; 1766 uint32_t owner, old, id; 1767 int error; 1768 int count; 1769 int pri; 1770 1771 id = td->td_tid; 1772 /* 1773 * Make sure we own this mtx. 1774 */ 1775 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1776 if (owner == -1) 1777 return (EFAULT); 1778 1779 if ((owner & ~UMUTEX_CONTESTED) != id) 1780 return (EPERM); 1781 1782 /* This should be done in userland */ 1783 if ((owner & UMUTEX_CONTESTED) == 0) { 1784 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1785 if (old == -1) 1786 return (EFAULT); 1787 if (old == owner) 1788 return (0); 1789 owner = old; 1790 } 1791 1792 /* We should only ever be in here for contested locks */ 1793 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1794 &key)) != 0) 1795 return (error); 1796 1797 umtxq_lock(&key); 1798 umtxq_busy(&key); 1799 count = umtxq_count_pi(&key, &uq_first); 1800 if (uq_first != NULL) { 1801 pi = uq_first->uq_pi_blocked; 1802 if (pi->pi_owner != curthread) { 1803 umtxq_unbusy(&key); 1804 umtxq_unlock(&key); 1805 /* userland messed the mutex */ 1806 return (EPERM); 1807 } 1808 uq_me = curthread->td_umtxq; 1809 mtx_lock_spin(&umtx_lock); 1810 pi->pi_owner = NULL; 1811 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1812 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1813 pri = PRI_MAX; 1814 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1815 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1816 if (uq_first2 != NULL) { 1817 if (pri > UPRI(uq_first2->uq_thread)) 1818 pri = UPRI(uq_first2->uq_thread); 1819 } 1820 } 1821 thread_lock(curthread); 1822 sched_unlend_user_prio(curthread, pri); 1823 thread_unlock(curthread); 1824 mtx_unlock_spin(&umtx_lock); 1825 } 1826 umtxq_unlock(&key); 1827 1828 /* 1829 * When unlocking the umtx, it must be marked as unowned if 1830 * there is zero or one thread only waiting for it. 1831 * Otherwise, it must be marked as contested. 1832 */ 1833 old = casuword32(&m->m_owner, owner, 1834 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1835 1836 umtxq_lock(&key); 1837 if (uq_first != NULL) 1838 umtxq_signal_thread(uq_first); 1839 umtxq_unbusy(&key); 1840 umtxq_unlock(&key); 1841 umtx_key_release(&key); 1842 if (old == -1) 1843 return (EFAULT); 1844 if (old != owner) 1845 return (EINVAL); 1846 return (0); 1847 } 1848 1849 /* 1850 * Lock a PP mutex. 1851 */ 1852 static int 1853 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1854 int try) 1855 { 1856 struct umtx_q *uq, *uq2; 1857 struct umtx_pi *pi; 1858 uint32_t ceiling; 1859 uint32_t owner, id; 1860 int error, pri, old_inherited_pri, su; 1861 1862 id = td->td_tid; 1863 uq = td->td_umtxq; 1864 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1865 &uq->uq_key)) != 0) 1866 return (error); 1867 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1868 for (;;) { 1869 old_inherited_pri = uq->uq_inherited_pri; 1870 umtxq_lock(&uq->uq_key); 1871 umtxq_busy(&uq->uq_key); 1872 umtxq_unlock(&uq->uq_key); 1873 1874 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1875 if (ceiling > RTP_PRIO_MAX) { 1876 error = EINVAL; 1877 goto out; 1878 } 1879 1880 mtx_lock_spin(&umtx_lock); 1881 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1882 mtx_unlock_spin(&umtx_lock); 1883 error = EINVAL; 1884 goto out; 1885 } 1886 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1887 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1888 thread_lock(td); 1889 if (uq->uq_inherited_pri < UPRI(td)) 1890 sched_lend_user_prio(td, uq->uq_inherited_pri); 1891 thread_unlock(td); 1892 } 1893 mtx_unlock_spin(&umtx_lock); 1894 1895 owner = casuword32(&m->m_owner, 1896 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1897 1898 if (owner == UMUTEX_CONTESTED) { 1899 error = 0; 1900 break; 1901 } 1902 1903 /* The address was invalid. */ 1904 if (owner == -1) { 1905 error = EFAULT; 1906 break; 1907 } 1908 1909 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1910 (owner & ~UMUTEX_CONTESTED) == id) { 1911 error = EDEADLK; 1912 break; 1913 } 1914 1915 if (try != 0) { 1916 error = EBUSY; 1917 break; 1918 } 1919 1920 /* 1921 * If we caught a signal, we have retried and now 1922 * exit immediately. 1923 */ 1924 if (error != 0) 1925 break; 1926 1927 umtxq_lock(&uq->uq_key); 1928 umtxq_insert(uq); 1929 umtxq_unbusy(&uq->uq_key); 1930 error = umtxq_sleep(uq, "umtxpp", timo); 1931 umtxq_remove(uq); 1932 umtxq_unlock(&uq->uq_key); 1933 1934 mtx_lock_spin(&umtx_lock); 1935 uq->uq_inherited_pri = old_inherited_pri; 1936 pri = PRI_MAX; 1937 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1938 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1939 if (uq2 != NULL) { 1940 if (pri > UPRI(uq2->uq_thread)) 1941 pri = UPRI(uq2->uq_thread); 1942 } 1943 } 1944 if (pri > uq->uq_inherited_pri) 1945 pri = uq->uq_inherited_pri; 1946 thread_lock(td); 1947 sched_unlend_user_prio(td, pri); 1948 thread_unlock(td); 1949 mtx_unlock_spin(&umtx_lock); 1950 } 1951 1952 if (error != 0) { 1953 mtx_lock_spin(&umtx_lock); 1954 uq->uq_inherited_pri = old_inherited_pri; 1955 pri = PRI_MAX; 1956 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1957 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1958 if (uq2 != NULL) { 1959 if (pri > UPRI(uq2->uq_thread)) 1960 pri = UPRI(uq2->uq_thread); 1961 } 1962 } 1963 if (pri > uq->uq_inherited_pri) 1964 pri = uq->uq_inherited_pri; 1965 thread_lock(td); 1966 sched_unlend_user_prio(td, pri); 1967 thread_unlock(td); 1968 mtx_unlock_spin(&umtx_lock); 1969 } 1970 1971 out: 1972 umtxq_lock(&uq->uq_key); 1973 umtxq_unbusy(&uq->uq_key); 1974 umtxq_unlock(&uq->uq_key); 1975 umtx_key_release(&uq->uq_key); 1976 return (error); 1977 } 1978 1979 /* 1980 * Unlock a PP mutex. 1981 */ 1982 static int 1983 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1984 { 1985 struct umtx_key key; 1986 struct umtx_q *uq, *uq2; 1987 struct umtx_pi *pi; 1988 uint32_t owner, id; 1989 uint32_t rceiling; 1990 int error, pri, new_inherited_pri, su; 1991 1992 id = td->td_tid; 1993 uq = td->td_umtxq; 1994 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1995 1996 /* 1997 * Make sure we own this mtx. 1998 */ 1999 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2000 if (owner == -1) 2001 return (EFAULT); 2002 2003 if ((owner & ~UMUTEX_CONTESTED) != id) 2004 return (EPERM); 2005 2006 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2007 if (error != 0) 2008 return (error); 2009 2010 if (rceiling == -1) 2011 new_inherited_pri = PRI_MAX; 2012 else { 2013 rceiling = RTP_PRIO_MAX - rceiling; 2014 if (rceiling > RTP_PRIO_MAX) 2015 return (EINVAL); 2016 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2017 } 2018 2019 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2020 &key)) != 0) 2021 return (error); 2022 umtxq_lock(&key); 2023 umtxq_busy(&key); 2024 umtxq_unlock(&key); 2025 /* 2026 * For priority protected mutex, always set unlocked state 2027 * to UMUTEX_CONTESTED, so that userland always enters kernel 2028 * to lock the mutex, it is necessary because thread priority 2029 * has to be adjusted for such mutex. 2030 */ 2031 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2032 UMUTEX_CONTESTED); 2033 2034 umtxq_lock(&key); 2035 if (error == 0) 2036 umtxq_signal(&key, 1); 2037 umtxq_unbusy(&key); 2038 umtxq_unlock(&key); 2039 2040 if (error == -1) 2041 error = EFAULT; 2042 else { 2043 mtx_lock_spin(&umtx_lock); 2044 if (su != 0) 2045 uq->uq_inherited_pri = new_inherited_pri; 2046 pri = PRI_MAX; 2047 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2048 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2049 if (uq2 != NULL) { 2050 if (pri > UPRI(uq2->uq_thread)) 2051 pri = UPRI(uq2->uq_thread); 2052 } 2053 } 2054 if (pri > uq->uq_inherited_pri) 2055 pri = uq->uq_inherited_pri; 2056 thread_lock(td); 2057 sched_unlend_user_prio(td, pri); 2058 thread_unlock(td); 2059 mtx_unlock_spin(&umtx_lock); 2060 } 2061 umtx_key_release(&key); 2062 return (error); 2063 } 2064 2065 static int 2066 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2067 uint32_t *old_ceiling) 2068 { 2069 struct umtx_q *uq; 2070 uint32_t save_ceiling; 2071 uint32_t owner, id; 2072 uint32_t flags; 2073 int error; 2074 2075 flags = fuword32(&m->m_flags); 2076 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2077 return (EINVAL); 2078 if (ceiling > RTP_PRIO_MAX) 2079 return (EINVAL); 2080 id = td->td_tid; 2081 uq = td->td_umtxq; 2082 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2083 &uq->uq_key)) != 0) 2084 return (error); 2085 for (;;) { 2086 umtxq_lock(&uq->uq_key); 2087 umtxq_busy(&uq->uq_key); 2088 umtxq_unlock(&uq->uq_key); 2089 2090 save_ceiling = fuword32(&m->m_ceilings[0]); 2091 2092 owner = casuword32(&m->m_owner, 2093 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2094 2095 if (owner == UMUTEX_CONTESTED) { 2096 suword32(&m->m_ceilings[0], ceiling); 2097 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2098 UMUTEX_CONTESTED); 2099 error = 0; 2100 break; 2101 } 2102 2103 /* The address was invalid. */ 2104 if (owner == -1) { 2105 error = EFAULT; 2106 break; 2107 } 2108 2109 if ((owner & ~UMUTEX_CONTESTED) == id) { 2110 suword32(&m->m_ceilings[0], ceiling); 2111 error = 0; 2112 break; 2113 } 2114 2115 /* 2116 * If we caught a signal, we have retried and now 2117 * exit immediately. 2118 */ 2119 if (error != 0) 2120 break; 2121 2122 /* 2123 * We set the contested bit, sleep. Otherwise the lock changed 2124 * and we need to retry or we lost a race to the thread 2125 * unlocking the umtx. 2126 */ 2127 umtxq_lock(&uq->uq_key); 2128 umtxq_insert(uq); 2129 umtxq_unbusy(&uq->uq_key); 2130 error = umtxq_sleep(uq, "umtxpp", 0); 2131 umtxq_remove(uq); 2132 umtxq_unlock(&uq->uq_key); 2133 } 2134 umtxq_lock(&uq->uq_key); 2135 if (error == 0) 2136 umtxq_signal(&uq->uq_key, INT_MAX); 2137 umtxq_unbusy(&uq->uq_key); 2138 umtxq_unlock(&uq->uq_key); 2139 umtx_key_release(&uq->uq_key); 2140 if (error == 0 && old_ceiling != NULL) 2141 suword32(old_ceiling, save_ceiling); 2142 return (error); 2143 } 2144 2145 static int 2146 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2147 int try) 2148 { 2149 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2150 case 0: 2151 return (_do_lock_normal(td, m, flags, timo, try)); 2152 case UMUTEX_PRIO_INHERIT: 2153 return (_do_lock_pi(td, m, flags, timo, try)); 2154 case UMUTEX_PRIO_PROTECT: 2155 return (_do_lock_pp(td, m, flags, timo, try)); 2156 } 2157 return (EINVAL); 2158 } 2159 2160 /* 2161 * Lock a userland POSIX mutex. 2162 */ 2163 static int 2164 do_lock_umutex(struct thread *td, struct umutex *m, 2165 struct timespec *timeout, int try) 2166 { 2167 struct timespec ts, ts2, ts3; 2168 struct timeval tv; 2169 uint32_t flags; 2170 int error; 2171 2172 flags = fuword32(&m->m_flags); 2173 if (flags == -1) 2174 return (EFAULT); 2175 2176 if (timeout == NULL) { 2177 error = _do_lock_umutex(td, m, flags, 0, try); 2178 /* Mutex locking is restarted if it is interrupted. */ 2179 if (error == EINTR) 2180 error = ERESTART; 2181 } else { 2182 getnanouptime(&ts); 2183 timespecadd(&ts, timeout); 2184 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2185 for (;;) { 2186 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 2187 if (error != ETIMEDOUT) 2188 break; 2189 getnanouptime(&ts2); 2190 if (timespeccmp(&ts2, &ts, >=)) { 2191 error = ETIMEDOUT; 2192 break; 2193 } 2194 ts3 = ts; 2195 timespecsub(&ts3, &ts2); 2196 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2197 } 2198 /* Timed-locking is not restarted. */ 2199 if (error == ERESTART) 2200 error = EINTR; 2201 } 2202 return (error); 2203 } 2204 2205 /* 2206 * Unlock a userland POSIX mutex. 2207 */ 2208 static int 2209 do_unlock_umutex(struct thread *td, struct umutex *m) 2210 { 2211 uint32_t flags; 2212 2213 flags = fuword32(&m->m_flags); 2214 if (flags == -1) 2215 return (EFAULT); 2216 2217 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2218 case 0: 2219 return (do_unlock_normal(td, m, flags)); 2220 case UMUTEX_PRIO_INHERIT: 2221 return (do_unlock_pi(td, m, flags)); 2222 case UMUTEX_PRIO_PROTECT: 2223 return (do_unlock_pp(td, m, flags)); 2224 } 2225 2226 return (EINVAL); 2227 } 2228 2229 static int 2230 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2231 struct timespec *timeout, u_long wflags) 2232 { 2233 struct umtx_q *uq; 2234 struct timeval tv; 2235 struct timespec cts, ets, tts; 2236 uint32_t flags; 2237 int error; 2238 2239 uq = td->td_umtxq; 2240 flags = fuword32(&cv->c_flags); 2241 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2242 if (error != 0) 2243 return (error); 2244 umtxq_lock(&uq->uq_key); 2245 umtxq_busy(&uq->uq_key); 2246 umtxq_insert(uq); 2247 umtxq_unlock(&uq->uq_key); 2248 2249 /* 2250 * The magic thing is we should set c_has_waiters to 1 before 2251 * releasing user mutex. 2252 */ 2253 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2254 2255 umtxq_lock(&uq->uq_key); 2256 umtxq_unbusy(&uq->uq_key); 2257 umtxq_unlock(&uq->uq_key); 2258 2259 error = do_unlock_umutex(td, m); 2260 2261 umtxq_lock(&uq->uq_key); 2262 if (error == 0) { 2263 if ((wflags & UMTX_CHECK_UNPARKING) && 2264 (td->td_pflags & TDP_WAKEUP)) { 2265 td->td_pflags &= ~TDP_WAKEUP; 2266 error = EINTR; 2267 } else if (timeout == NULL) { 2268 error = umtxq_sleep(uq, "ucond", 0); 2269 } else { 2270 getnanouptime(&ets); 2271 timespecadd(&ets, timeout); 2272 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2273 for (;;) { 2274 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2275 if (error != ETIMEDOUT) 2276 break; 2277 getnanouptime(&cts); 2278 if (timespeccmp(&cts, &ets, >=)) { 2279 error = ETIMEDOUT; 2280 break; 2281 } 2282 tts = ets; 2283 timespecsub(&tts, &cts); 2284 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2285 } 2286 } 2287 } 2288 2289 if (error != 0) { 2290 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2291 /* 2292 * If we concurrently got do_cv_signal()d 2293 * and we got an error or UNIX signals or a timeout, 2294 * then, perform another umtxq_signal to avoid 2295 * consuming the wakeup. This may cause supurious 2296 * wakeup for another thread which was just queued, 2297 * but SUSV3 explicitly allows supurious wakeup to 2298 * occur, and indeed a kernel based implementation 2299 * can not avoid it. 2300 */ 2301 if (!umtxq_signal(&uq->uq_key, 1)) 2302 error = 0; 2303 } 2304 if (error == ERESTART) 2305 error = EINTR; 2306 } 2307 umtxq_remove(uq); 2308 umtxq_unlock(&uq->uq_key); 2309 umtx_key_release(&uq->uq_key); 2310 return (error); 2311 } 2312 2313 /* 2314 * Signal a userland condition variable. 2315 */ 2316 static int 2317 do_cv_signal(struct thread *td, struct ucond *cv) 2318 { 2319 struct umtx_key key; 2320 int error, cnt, nwake; 2321 uint32_t flags; 2322 2323 flags = fuword32(&cv->c_flags); 2324 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2325 return (error); 2326 umtxq_lock(&key); 2327 umtxq_busy(&key); 2328 cnt = umtxq_count(&key); 2329 nwake = umtxq_signal(&key, 1); 2330 if (cnt <= nwake) { 2331 umtxq_unlock(&key); 2332 error = suword32( 2333 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2334 umtxq_lock(&key); 2335 } 2336 umtxq_unbusy(&key); 2337 umtxq_unlock(&key); 2338 umtx_key_release(&key); 2339 return (error); 2340 } 2341 2342 static int 2343 do_cv_broadcast(struct thread *td, struct ucond *cv) 2344 { 2345 struct umtx_key key; 2346 int error; 2347 uint32_t flags; 2348 2349 flags = fuword32(&cv->c_flags); 2350 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2351 return (error); 2352 2353 umtxq_lock(&key); 2354 umtxq_busy(&key); 2355 umtxq_signal(&key, INT_MAX); 2356 umtxq_unlock(&key); 2357 2358 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2359 2360 umtxq_lock(&key); 2361 umtxq_unbusy(&key); 2362 umtxq_unlock(&key); 2363 2364 umtx_key_release(&key); 2365 return (error); 2366 } 2367 2368 static int 2369 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo) 2370 { 2371 struct umtx_q *uq; 2372 uint32_t flags, wrflags; 2373 int32_t state, oldstate; 2374 int32_t blocked_readers; 2375 int error; 2376 2377 uq = td->td_umtxq; 2378 flags = fuword32(&rwlock->rw_flags); 2379 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2380 if (error != 0) 2381 return (error); 2382 2383 wrflags = URWLOCK_WRITE_OWNER; 2384 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2385 wrflags |= URWLOCK_WRITE_WAITERS; 2386 2387 for (;;) { 2388 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2389 /* try to lock it */ 2390 while (!(state & wrflags)) { 2391 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2392 umtx_key_release(&uq->uq_key); 2393 return (EAGAIN); 2394 } 2395 oldstate = casuword32(&rwlock->rw_state, state, state + 1); 2396 if (oldstate == state) { 2397 umtx_key_release(&uq->uq_key); 2398 return (0); 2399 } 2400 state = oldstate; 2401 } 2402 2403 if (error) 2404 break; 2405 2406 /* grab monitor lock */ 2407 umtxq_lock(&uq->uq_key); 2408 umtxq_busy(&uq->uq_key); 2409 umtxq_unlock(&uq->uq_key); 2410 2411 /* set read contention bit */ 2412 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { 2413 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); 2414 if (oldstate == state) 2415 goto sleep; 2416 state = oldstate; 2417 } 2418 2419 /* state is changed while setting flags, restart */ 2420 if (!(state & wrflags)) { 2421 umtxq_lock(&uq->uq_key); 2422 umtxq_unbusy(&uq->uq_key); 2423 umtxq_unlock(&uq->uq_key); 2424 continue; 2425 } 2426 2427 sleep: 2428 /* contention bit is set, before sleeping, increase read waiter count */ 2429 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2430 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2431 2432 while (state & wrflags) { 2433 umtxq_lock(&uq->uq_key); 2434 umtxq_insert(uq); 2435 umtxq_unbusy(&uq->uq_key); 2436 2437 error = umtxq_sleep(uq, "urdlck", timo); 2438 2439 umtxq_busy(&uq->uq_key); 2440 umtxq_remove(uq); 2441 umtxq_unlock(&uq->uq_key); 2442 if (error) 2443 break; 2444 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2445 } 2446 2447 /* decrease read waiter count, and may clear read contention bit */ 2448 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2449 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2450 if (blocked_readers == 1) { 2451 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2452 for (;;) { 2453 oldstate = casuword32(&rwlock->rw_state, state, 2454 state & ~URWLOCK_READ_WAITERS); 2455 if (oldstate == state) 2456 break; 2457 state = oldstate; 2458 } 2459 } 2460 2461 umtxq_lock(&uq->uq_key); 2462 umtxq_unbusy(&uq->uq_key); 2463 umtxq_unlock(&uq->uq_key); 2464 } 2465 umtx_key_release(&uq->uq_key); 2466 return (error); 2467 } 2468 2469 static int 2470 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout) 2471 { 2472 struct timespec ts, ts2, ts3; 2473 struct timeval tv; 2474 int error; 2475 2476 getnanouptime(&ts); 2477 timespecadd(&ts, timeout); 2478 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2479 for (;;) { 2480 error = do_rw_rdlock(td, obj, val, tvtohz(&tv)); 2481 if (error != ETIMEDOUT) 2482 break; 2483 getnanouptime(&ts2); 2484 if (timespeccmp(&ts2, &ts, >=)) { 2485 error = ETIMEDOUT; 2486 break; 2487 } 2488 ts3 = ts; 2489 timespecsub(&ts3, &ts2); 2490 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2491 } 2492 if (error == ERESTART) 2493 error = EINTR; 2494 return (error); 2495 } 2496 2497 static int 2498 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo) 2499 { 2500 struct umtx_q *uq; 2501 uint32_t flags; 2502 int32_t state, oldstate; 2503 int32_t blocked_writers; 2504 int error; 2505 2506 uq = td->td_umtxq; 2507 flags = fuword32(&rwlock->rw_flags); 2508 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2509 if (error != 0) 2510 return (error); 2511 2512 for (;;) { 2513 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2514 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2515 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); 2516 if (oldstate == state) { 2517 umtx_key_release(&uq->uq_key); 2518 return (0); 2519 } 2520 state = oldstate; 2521 } 2522 2523 if (error) 2524 break; 2525 2526 /* grab monitor lock */ 2527 umtxq_lock(&uq->uq_key); 2528 umtxq_busy(&uq->uq_key); 2529 umtxq_unlock(&uq->uq_key); 2530 2531 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && 2532 (state & URWLOCK_WRITE_WAITERS) == 0) { 2533 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); 2534 if (oldstate == state) 2535 goto sleep; 2536 state = oldstate; 2537 } 2538 2539 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2540 umtxq_lock(&uq->uq_key); 2541 umtxq_unbusy(&uq->uq_key); 2542 umtxq_unlock(&uq->uq_key); 2543 continue; 2544 } 2545 sleep: 2546 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2547 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2548 2549 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2550 umtxq_lock(&uq->uq_key); 2551 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2552 umtxq_unbusy(&uq->uq_key); 2553 2554 error = umtxq_sleep(uq, "uwrlck", timo); 2555 2556 umtxq_busy(&uq->uq_key); 2557 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2558 umtxq_unlock(&uq->uq_key); 2559 if (error) 2560 break; 2561 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2562 } 2563 2564 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2565 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2566 if (blocked_writers == 1) { 2567 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2568 for (;;) { 2569 oldstate = casuword32(&rwlock->rw_state, state, 2570 state & ~URWLOCK_WRITE_WAITERS); 2571 if (oldstate == state) 2572 break; 2573 state = oldstate; 2574 } 2575 } 2576 2577 umtxq_lock(&uq->uq_key); 2578 umtxq_unbusy(&uq->uq_key); 2579 umtxq_unlock(&uq->uq_key); 2580 } 2581 2582 umtx_key_release(&uq->uq_key); 2583 return (error); 2584 } 2585 2586 static int 2587 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout) 2588 { 2589 struct timespec ts, ts2, ts3; 2590 struct timeval tv; 2591 int error; 2592 2593 getnanouptime(&ts); 2594 timespecadd(&ts, timeout); 2595 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2596 for (;;) { 2597 error = do_rw_wrlock(td, obj, tvtohz(&tv)); 2598 if (error != ETIMEDOUT) 2599 break; 2600 getnanouptime(&ts2); 2601 if (timespeccmp(&ts2, &ts, >=)) { 2602 error = ETIMEDOUT; 2603 break; 2604 } 2605 ts3 = ts; 2606 timespecsub(&ts3, &ts2); 2607 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2608 } 2609 if (error == ERESTART) 2610 error = EINTR; 2611 return (error); 2612 } 2613 2614 static int 2615 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2616 { 2617 struct umtx_q *uq; 2618 uint32_t flags; 2619 int32_t state, oldstate; 2620 int error, q, count; 2621 2622 uq = td->td_umtxq; 2623 flags = fuword32(&rwlock->rw_flags); 2624 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2625 if (error != 0) 2626 return (error); 2627 2628 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2629 if (state & URWLOCK_WRITE_OWNER) { 2630 for (;;) { 2631 oldstate = casuword32(&rwlock->rw_state, state, 2632 state & ~URWLOCK_WRITE_OWNER); 2633 if (oldstate != state) { 2634 state = oldstate; 2635 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2636 error = EPERM; 2637 goto out; 2638 } 2639 } else 2640 break; 2641 } 2642 } else if (URWLOCK_READER_COUNT(state) != 0) { 2643 for (;;) { 2644 oldstate = casuword32(&rwlock->rw_state, state, 2645 state - 1); 2646 if (oldstate != state) { 2647 state = oldstate; 2648 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2649 error = EPERM; 2650 goto out; 2651 } 2652 } 2653 else 2654 break; 2655 } 2656 } else { 2657 error = EPERM; 2658 goto out; 2659 } 2660 2661 count = 0; 2662 2663 if (!(flags & URWLOCK_PREFER_READER)) { 2664 if (state & URWLOCK_WRITE_WAITERS) { 2665 count = 1; 2666 q = UMTX_EXCLUSIVE_QUEUE; 2667 } else if (state & URWLOCK_READ_WAITERS) { 2668 count = INT_MAX; 2669 q = UMTX_SHARED_QUEUE; 2670 } 2671 } else { 2672 if (state & URWLOCK_READ_WAITERS) { 2673 count = INT_MAX; 2674 q = UMTX_SHARED_QUEUE; 2675 } else if (state & URWLOCK_WRITE_WAITERS) { 2676 count = 1; 2677 q = UMTX_EXCLUSIVE_QUEUE; 2678 } 2679 } 2680 2681 if (count) { 2682 umtxq_lock(&uq->uq_key); 2683 umtxq_busy(&uq->uq_key); 2684 umtxq_signal_queue(&uq->uq_key, count, q); 2685 umtxq_unbusy(&uq->uq_key); 2686 umtxq_unlock(&uq->uq_key); 2687 } 2688 out: 2689 umtx_key_release(&uq->uq_key); 2690 return (error); 2691 } 2692 2693 int 2694 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2695 /* struct umtx *umtx */ 2696 { 2697 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2698 } 2699 2700 int 2701 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2702 /* struct umtx *umtx */ 2703 { 2704 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2705 } 2706 2707 static int 2708 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2709 { 2710 struct timespec *ts, timeout; 2711 int error; 2712 2713 /* Allow a null timespec (wait forever). */ 2714 if (uap->uaddr2 == NULL) 2715 ts = NULL; 2716 else { 2717 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2718 if (error != 0) 2719 return (error); 2720 if (timeout.tv_nsec >= 1000000000 || 2721 timeout.tv_nsec < 0) { 2722 return (EINVAL); 2723 } 2724 ts = &timeout; 2725 } 2726 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2727 } 2728 2729 static int 2730 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2731 { 2732 return (do_unlock_umtx(td, uap->obj, uap->val)); 2733 } 2734 2735 static int 2736 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2737 { 2738 struct timespec *ts, timeout; 2739 int error; 2740 2741 if (uap->uaddr2 == NULL) 2742 ts = NULL; 2743 else { 2744 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2745 if (error != 0) 2746 return (error); 2747 if (timeout.tv_nsec >= 1000000000 || 2748 timeout.tv_nsec < 0) 2749 return (EINVAL); 2750 ts = &timeout; 2751 } 2752 return do_wait(td, uap->obj, uap->val, ts, 0, 0); 2753 } 2754 2755 static int 2756 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 2757 { 2758 struct timespec *ts, timeout; 2759 int error; 2760 2761 if (uap->uaddr2 == NULL) 2762 ts = NULL; 2763 else { 2764 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2765 if (error != 0) 2766 return (error); 2767 if (timeout.tv_nsec >= 1000000000 || 2768 timeout.tv_nsec < 0) 2769 return (EINVAL); 2770 ts = &timeout; 2771 } 2772 return do_wait(td, uap->obj, uap->val, ts, 1, 0); 2773 } 2774 2775 static int 2776 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 2777 { 2778 struct timespec *ts, timeout; 2779 int error; 2780 2781 if (uap->uaddr2 == NULL) 2782 ts = NULL; 2783 else { 2784 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2785 if (error != 0) 2786 return (error); 2787 if (timeout.tv_nsec >= 1000000000 || 2788 timeout.tv_nsec < 0) 2789 return (EINVAL); 2790 ts = &timeout; 2791 } 2792 return do_wait(td, uap->obj, uap->val, ts, 1, 1); 2793 } 2794 2795 static int 2796 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2797 { 2798 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 2799 } 2800 2801 static int 2802 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 2803 { 2804 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 2805 } 2806 2807 static int 2808 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2809 { 2810 struct timespec *ts, timeout; 2811 int error; 2812 2813 /* Allow a null timespec (wait forever). */ 2814 if (uap->uaddr2 == NULL) 2815 ts = NULL; 2816 else { 2817 error = copyin(uap->uaddr2, &timeout, 2818 sizeof(timeout)); 2819 if (error != 0) 2820 return (error); 2821 if (timeout.tv_nsec >= 1000000000 || 2822 timeout.tv_nsec < 0) { 2823 return (EINVAL); 2824 } 2825 ts = &timeout; 2826 } 2827 return do_lock_umutex(td, uap->obj, ts, 0); 2828 } 2829 2830 static int 2831 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2832 { 2833 return do_lock_umutex(td, uap->obj, NULL, 1); 2834 } 2835 2836 static int 2837 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2838 { 2839 return do_unlock_umutex(td, uap->obj); 2840 } 2841 2842 static int 2843 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2844 { 2845 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2846 } 2847 2848 static int 2849 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2850 { 2851 struct timespec *ts, timeout; 2852 int error; 2853 2854 /* Allow a null timespec (wait forever). */ 2855 if (uap->uaddr2 == NULL) 2856 ts = NULL; 2857 else { 2858 error = copyin(uap->uaddr2, &timeout, 2859 sizeof(timeout)); 2860 if (error != 0) 2861 return (error); 2862 if (timeout.tv_nsec >= 1000000000 || 2863 timeout.tv_nsec < 0) { 2864 return (EINVAL); 2865 } 2866 ts = &timeout; 2867 } 2868 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2869 } 2870 2871 static int 2872 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2873 { 2874 return do_cv_signal(td, uap->obj); 2875 } 2876 2877 static int 2878 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2879 { 2880 return do_cv_broadcast(td, uap->obj); 2881 } 2882 2883 static int 2884 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 2885 { 2886 struct timespec timeout; 2887 int error; 2888 2889 /* Allow a null timespec (wait forever). */ 2890 if (uap->uaddr2 == NULL) { 2891 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 2892 } else { 2893 error = copyin(uap->uaddr2, &timeout, 2894 sizeof(timeout)); 2895 if (error != 0) 2896 return (error); 2897 if (timeout.tv_nsec >= 1000000000 || 2898 timeout.tv_nsec < 0) { 2899 return (EINVAL); 2900 } 2901 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); 2902 } 2903 return (error); 2904 } 2905 2906 static int 2907 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 2908 { 2909 struct timespec timeout; 2910 int error; 2911 2912 /* Allow a null timespec (wait forever). */ 2913 if (uap->uaddr2 == NULL) { 2914 error = do_rw_wrlock(td, uap->obj, 0); 2915 } else { 2916 error = copyin(uap->uaddr2, &timeout, 2917 sizeof(timeout)); 2918 if (error != 0) 2919 return (error); 2920 if (timeout.tv_nsec >= 1000000000 || 2921 timeout.tv_nsec < 0) { 2922 return (EINVAL); 2923 } 2924 2925 error = do_rw_wrlock2(td, uap->obj, &timeout); 2926 } 2927 return (error); 2928 } 2929 2930 static int 2931 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 2932 { 2933 return do_rw_unlock(td, uap->obj); 2934 } 2935 2936 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 2937 2938 static _umtx_op_func op_table[] = { 2939 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 2940 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 2941 __umtx_op_wait, /* UMTX_OP_WAIT */ 2942 __umtx_op_wake, /* UMTX_OP_WAKE */ 2943 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 2944 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2945 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2946 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2947 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 2948 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2949 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 2950 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 2951 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 2952 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 2953 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 2954 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 2955 __umtx_op_wake_private /* UMTX_OP_WAKE_PRIVATE */ 2956 }; 2957 2958 int 2959 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 2960 { 2961 if ((unsigned)uap->op < UMTX_OP_MAX) 2962 return (*op_table[uap->op])(td, uap); 2963 return (EINVAL); 2964 } 2965 2966 #ifdef COMPAT_IA32 2967 int 2968 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 2969 /* struct umtx *umtx */ 2970 { 2971 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 2972 } 2973 2974 int 2975 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 2976 /* struct umtx *umtx */ 2977 { 2978 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 2979 } 2980 2981 struct timespec32 { 2982 u_int32_t tv_sec; 2983 u_int32_t tv_nsec; 2984 }; 2985 2986 static inline int 2987 copyin_timeout32(void *addr, struct timespec *tsp) 2988 { 2989 struct timespec32 ts32; 2990 int error; 2991 2992 error = copyin(addr, &ts32, sizeof(struct timespec32)); 2993 if (error == 0) { 2994 tsp->tv_sec = ts32.tv_sec; 2995 tsp->tv_nsec = ts32.tv_nsec; 2996 } 2997 return (error); 2998 } 2999 3000 static int 3001 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3002 { 3003 struct timespec *ts, timeout; 3004 int error; 3005 3006 /* Allow a null timespec (wait forever). */ 3007 if (uap->uaddr2 == NULL) 3008 ts = NULL; 3009 else { 3010 error = copyin_timeout32(uap->uaddr2, &timeout); 3011 if (error != 0) 3012 return (error); 3013 if (timeout.tv_nsec >= 1000000000 || 3014 timeout.tv_nsec < 0) { 3015 return (EINVAL); 3016 } 3017 ts = &timeout; 3018 } 3019 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3020 } 3021 3022 static int 3023 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3024 { 3025 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 3026 } 3027 3028 static int 3029 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3030 { 3031 struct timespec *ts, timeout; 3032 int error; 3033 3034 if (uap->uaddr2 == NULL) 3035 ts = NULL; 3036 else { 3037 error = copyin_timeout32(uap->uaddr2, &timeout); 3038 if (error != 0) 3039 return (error); 3040 if (timeout.tv_nsec >= 1000000000 || 3041 timeout.tv_nsec < 0) 3042 return (EINVAL); 3043 ts = &timeout; 3044 } 3045 return do_wait(td, uap->obj, uap->val, ts, 1, 0); 3046 } 3047 3048 static int 3049 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3050 { 3051 struct timespec *ts, timeout; 3052 int error; 3053 3054 /* Allow a null timespec (wait forever). */ 3055 if (uap->uaddr2 == NULL) 3056 ts = NULL; 3057 else { 3058 error = copyin_timeout32(uap->uaddr2, &timeout); 3059 if (error != 0) 3060 return (error); 3061 if (timeout.tv_nsec >= 1000000000 || 3062 timeout.tv_nsec < 0) 3063 return (EINVAL); 3064 ts = &timeout; 3065 } 3066 return do_lock_umutex(td, uap->obj, ts, 0); 3067 } 3068 3069 static int 3070 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3071 { 3072 struct timespec *ts, timeout; 3073 int error; 3074 3075 /* Allow a null timespec (wait forever). */ 3076 if (uap->uaddr2 == NULL) 3077 ts = NULL; 3078 else { 3079 error = copyin_timeout32(uap->uaddr2, &timeout); 3080 if (error != 0) 3081 return (error); 3082 if (timeout.tv_nsec >= 1000000000 || 3083 timeout.tv_nsec < 0) 3084 return (EINVAL); 3085 ts = &timeout; 3086 } 3087 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3088 } 3089 3090 static int 3091 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3092 { 3093 struct timespec timeout; 3094 int error; 3095 3096 /* Allow a null timespec (wait forever). */ 3097 if (uap->uaddr2 == NULL) { 3098 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3099 } else { 3100 error = copyin(uap->uaddr2, &timeout, 3101 sizeof(timeout)); 3102 if (error != 0) 3103 return (error); 3104 if (timeout.tv_nsec >= 1000000000 || 3105 timeout.tv_nsec < 0) { 3106 return (EINVAL); 3107 } 3108 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); 3109 } 3110 return (error); 3111 } 3112 3113 static int 3114 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3115 { 3116 struct timespec timeout; 3117 int error; 3118 3119 /* Allow a null timespec (wait forever). */ 3120 if (uap->uaddr2 == NULL) { 3121 error = do_rw_wrlock(td, uap->obj, 0); 3122 } else { 3123 error = copyin_timeout32(uap->uaddr2, &timeout); 3124 if (error != 0) 3125 return (error); 3126 if (timeout.tv_nsec >= 1000000000 || 3127 timeout.tv_nsec < 0) { 3128 return (EINVAL); 3129 } 3130 3131 error = do_rw_wrlock2(td, uap->obj, &timeout); 3132 } 3133 return (error); 3134 } 3135 3136 static int 3137 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3138 { 3139 struct timespec *ts, timeout; 3140 int error; 3141 3142 if (uap->uaddr2 == NULL) 3143 ts = NULL; 3144 else { 3145 error = copyin_timeout32(uap->uaddr2, &timeout); 3146 if (error != 0) 3147 return (error); 3148 if (timeout.tv_nsec >= 1000000000 || 3149 timeout.tv_nsec < 0) 3150 return (EINVAL); 3151 ts = &timeout; 3152 } 3153 return do_wait(td, uap->obj, uap->val, ts, 1, 1); 3154 } 3155 3156 static _umtx_op_func op_table_compat32[] = { 3157 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 3158 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 3159 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3160 __umtx_op_wake, /* UMTX_OP_WAKE */ 3161 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3162 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3163 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3164 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3165 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3166 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3167 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3168 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3169 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3170 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3171 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3172 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3173 __umtx_op_wake_private /* UMTX_OP_WAKE_PRIVATE */ 3174 }; 3175 3176 int 3177 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3178 { 3179 if ((unsigned)uap->op < UMTX_OP_MAX) 3180 return (*op_table_compat32[uap->op])(td, 3181 (struct _umtx_op_args *)uap); 3182 return (EINVAL); 3183 } 3184 #endif 3185 3186 void 3187 umtx_thread_init(struct thread *td) 3188 { 3189 td->td_umtxq = umtxq_alloc(); 3190 td->td_umtxq->uq_thread = td; 3191 } 3192 3193 void 3194 umtx_thread_fini(struct thread *td) 3195 { 3196 umtxq_free(td->td_umtxq); 3197 } 3198 3199 /* 3200 * It will be called when new thread is created, e.g fork(). 3201 */ 3202 void 3203 umtx_thread_alloc(struct thread *td) 3204 { 3205 struct umtx_q *uq; 3206 3207 uq = td->td_umtxq; 3208 uq->uq_inherited_pri = PRI_MAX; 3209 3210 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3211 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3212 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3213 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3214 } 3215 3216 /* 3217 * exec() hook. 3218 */ 3219 static void 3220 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3221 struct image_params *imgp __unused) 3222 { 3223 umtx_thread_cleanup(curthread); 3224 } 3225 3226 /* 3227 * thread_exit() hook. 3228 */ 3229 void 3230 umtx_thread_exit(struct thread *td) 3231 { 3232 umtx_thread_cleanup(td); 3233 } 3234 3235 /* 3236 * clean up umtx data. 3237 */ 3238 static void 3239 umtx_thread_cleanup(struct thread *td) 3240 { 3241 struct umtx_q *uq; 3242 struct umtx_pi *pi; 3243 3244 if ((uq = td->td_umtxq) == NULL) 3245 return; 3246 3247 mtx_lock_spin(&umtx_lock); 3248 uq->uq_inherited_pri = PRI_MAX; 3249 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3250 pi->pi_owner = NULL; 3251 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3252 } 3253 thread_lock(td); 3254 td->td_flags &= ~TDF_UBORROWING; 3255 thread_unlock(td); 3256 mtx_unlock_spin(&umtx_lock); 3257 } 3258