1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/limits.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/sched.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysent.h> 41 #include <sys/systm.h> 42 #include <sys/sysproto.h> 43 #include <sys/eventhandler.h> 44 #include <sys/umtx.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_param.h> 48 #include <vm/pmap.h> 49 #include <vm/vm_map.h> 50 #include <vm/vm_object.h> 51 52 #define TYPE_SIMPLE_LOCK 0 53 #define TYPE_SIMPLE_WAIT 1 54 #define TYPE_NORMAL_UMUTEX 2 55 #define TYPE_PI_UMUTEX 3 56 #define TYPE_PP_UMUTEX 4 57 #define TYPE_CV 5 58 59 /* Key to represent a unique userland synchronous object */ 60 struct umtx_key { 61 int hash; 62 int type; 63 int shared; 64 union { 65 struct { 66 vm_object_t object; 67 uintptr_t offset; 68 } shared; 69 struct { 70 struct vmspace *vs; 71 uintptr_t addr; 72 } private; 73 struct { 74 void *a; 75 uintptr_t b; 76 } both; 77 } info; 78 }; 79 80 /* Priority inheritance mutex info. */ 81 struct umtx_pi { 82 /* Owner thread */ 83 struct thread *pi_owner; 84 85 /* Reference count */ 86 int pi_refcount; 87 88 /* List entry to link umtx holding by thread */ 89 TAILQ_ENTRY(umtx_pi) pi_link; 90 91 /* List entry in hash */ 92 TAILQ_ENTRY(umtx_pi) pi_hashlink; 93 94 /* List for waiters */ 95 TAILQ_HEAD(,umtx_q) pi_blocked; 96 97 /* Identify a userland lock object */ 98 struct umtx_key pi_key; 99 }; 100 101 /* A userland synchronous object user. */ 102 struct umtx_q { 103 /* Linked list for the hash. */ 104 TAILQ_ENTRY(umtx_q) uq_link; 105 106 /* Umtx key. */ 107 struct umtx_key uq_key; 108 109 /* Umtx flags. */ 110 int uq_flags; 111 #define UQF_UMTXQ 0x0001 112 113 /* The thread waits on. */ 114 struct thread *uq_thread; 115 116 /* 117 * Blocked on PI mutex. read can use chain lock 118 * or sched_lock, write must have both chain lock and 119 * sched_lock being hold. 120 */ 121 struct umtx_pi *uq_pi_blocked; 122 123 /* On blocked list */ 124 TAILQ_ENTRY(umtx_q) uq_lockq; 125 126 /* Thread contending with us */ 127 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 128 129 /* Inherited priority from PP mutex */ 130 u_char uq_inherited_pri; 131 }; 132 133 TAILQ_HEAD(umtxq_head, umtx_q); 134 135 /* Userland lock object's wait-queue chain */ 136 struct umtxq_chain { 137 /* Lock for this chain. */ 138 struct mtx uc_lock; 139 140 /* List of sleep queues. */ 141 struct umtxq_head uc_queue; 142 143 /* Busy flag */ 144 char uc_busy; 145 146 /* Chain lock waiters */ 147 int uc_waiters; 148 149 /* All PI in the list */ 150 TAILQ_HEAD(,umtx_pi) uc_pi_list; 151 }; 152 153 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 154 155 /* 156 * Don't propagate time-sharing priority, there is a security reason, 157 * a user can simply introduce PI-mutex, let thread A lock the mutex, 158 * and let another thread B block on the mutex, because B is 159 * sleeping, its priority will be boosted, this causes A's priority to 160 * be boosted via priority propagating too and will never be lowered even 161 * if it is using 100%CPU, this is unfair to other processes. 162 */ 163 164 #define UPRI(td) (((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\ 165 (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\ 166 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri) 167 168 #define GOLDEN_RATIO_PRIME 2654404609U 169 #define UMTX_CHAINS 128 170 #define UMTX_SHIFTS (__WORD_BIT - 7) 171 172 #define THREAD_SHARE 0 173 #define PROCESS_SHARE 1 174 #define AUTO_SHARE 2 175 176 #define GET_SHARE(flags) \ 177 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 178 179 static uma_zone_t umtx_pi_zone; 180 static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 181 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 182 static int umtx_pi_allocated; 183 184 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 185 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 186 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 187 188 static void umtxq_sysinit(void *); 189 static void umtxq_hash(struct umtx_key *key); 190 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 191 static void umtxq_lock(struct umtx_key *key); 192 static void umtxq_unlock(struct umtx_key *key); 193 static void umtxq_busy(struct umtx_key *key); 194 static void umtxq_unbusy(struct umtx_key *key); 195 static void umtxq_insert(struct umtx_q *uq); 196 static void umtxq_remove(struct umtx_q *uq); 197 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 198 static int umtxq_count(struct umtx_key *key); 199 static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 200 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 201 static int umtx_key_get(void *addr, int type, int share, 202 struct umtx_key *key); 203 static void umtx_key_release(struct umtx_key *key); 204 static struct umtx_pi *umtx_pi_alloc(void); 205 static void umtx_pi_free(struct umtx_pi *pi); 206 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 207 static void umtx_thread_cleanup(struct thread *td); 208 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 209 struct image_params *imgp __unused); 210 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 211 212 static void 213 umtxq_sysinit(void *arg __unused) 214 { 215 int i; 216 217 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 218 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 219 for (i = 0; i < UMTX_CHAINS; ++i) { 220 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL, 221 MTX_DEF | MTX_DUPOK); 222 TAILQ_INIT(&umtxq_chains[i].uc_queue); 223 TAILQ_INIT(&umtxq_chains[i].uc_pi_list); 224 umtxq_chains[i].uc_busy = 0; 225 umtxq_chains[i].uc_waiters = 0; 226 } 227 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 228 EVENTHANDLER_PRI_ANY); 229 } 230 231 struct umtx_q * 232 umtxq_alloc(void) 233 { 234 struct umtx_q *uq; 235 236 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 237 TAILQ_INIT(&uq->uq_pi_contested); 238 uq->uq_inherited_pri = PRI_MAX; 239 return (uq); 240 } 241 242 void 243 umtxq_free(struct umtx_q *uq) 244 { 245 free(uq, M_UMTX); 246 } 247 248 static inline void 249 umtxq_hash(struct umtx_key *key) 250 { 251 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 252 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 253 } 254 255 static inline int 256 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 257 { 258 return (k1->type == k2->type && 259 k1->info.both.a == k2->info.both.a && 260 k1->info.both.b == k2->info.both.b); 261 } 262 263 static inline struct umtxq_chain * 264 umtxq_getchain(struct umtx_key *key) 265 { 266 return (&umtxq_chains[key->hash]); 267 } 268 269 /* 270 * Set chain to busy state when following operation 271 * may be blocked (kernel mutex can not be used). 272 */ 273 static inline void 274 umtxq_busy(struct umtx_key *key) 275 { 276 struct umtxq_chain *uc; 277 278 uc = umtxq_getchain(key); 279 mtx_assert(&uc->uc_lock, MA_OWNED); 280 while (uc->uc_busy != 0) { 281 uc->uc_waiters++; 282 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 283 uc->uc_waiters--; 284 } 285 uc->uc_busy = 1; 286 } 287 288 /* 289 * Unbusy a chain. 290 */ 291 static inline void 292 umtxq_unbusy(struct umtx_key *key) 293 { 294 struct umtxq_chain *uc; 295 296 uc = umtxq_getchain(key); 297 mtx_assert(&uc->uc_lock, MA_OWNED); 298 KASSERT(uc->uc_busy != 0, ("not busy")); 299 uc->uc_busy = 0; 300 if (uc->uc_waiters) 301 wakeup_one(uc); 302 } 303 304 /* 305 * Lock a chain. 306 */ 307 static inline void 308 umtxq_lock(struct umtx_key *key) 309 { 310 struct umtxq_chain *uc; 311 312 uc = umtxq_getchain(key); 313 mtx_lock(&uc->uc_lock); 314 } 315 316 /* 317 * Unlock a chain. 318 */ 319 static inline void 320 umtxq_unlock(struct umtx_key *key) 321 { 322 struct umtxq_chain *uc; 323 324 uc = umtxq_getchain(key); 325 mtx_unlock(&uc->uc_lock); 326 } 327 328 /* 329 * Insert a thread onto the umtx queue. 330 */ 331 static inline void 332 umtxq_insert(struct umtx_q *uq) 333 { 334 struct umtxq_chain *uc; 335 336 uc = umtxq_getchain(&uq->uq_key); 337 UMTXQ_LOCKED_ASSERT(uc); 338 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link); 339 uq->uq_flags |= UQF_UMTXQ; 340 } 341 342 /* 343 * Remove thread from the umtx queue. 344 */ 345 static inline void 346 umtxq_remove(struct umtx_q *uq) 347 { 348 struct umtxq_chain *uc; 349 350 uc = umtxq_getchain(&uq->uq_key); 351 UMTXQ_LOCKED_ASSERT(uc); 352 if (uq->uq_flags & UQF_UMTXQ) { 353 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link); 354 uq->uq_flags &= ~UQF_UMTXQ; 355 } 356 } 357 358 /* 359 * Check if there are multiple waiters 360 */ 361 static int 362 umtxq_count(struct umtx_key *key) 363 { 364 struct umtxq_chain *uc; 365 struct umtx_q *uq; 366 int count = 0; 367 368 uc = umtxq_getchain(key); 369 UMTXQ_LOCKED_ASSERT(uc); 370 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 371 if (umtx_key_match(&uq->uq_key, key)) { 372 if (++count > 1) 373 break; 374 } 375 } 376 return (count); 377 } 378 379 /* 380 * Check if there are multiple PI waiters and returns first 381 * waiter. 382 */ 383 static int 384 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 385 { 386 struct umtxq_chain *uc; 387 struct umtx_q *uq; 388 int count = 0; 389 390 *first = NULL; 391 uc = umtxq_getchain(key); 392 UMTXQ_LOCKED_ASSERT(uc); 393 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 394 if (umtx_key_match(&uq->uq_key, key)) { 395 if (++count > 1) 396 break; 397 *first = uq; 398 } 399 } 400 return (count); 401 } 402 403 /* 404 * Wake up threads waiting on an userland object. 405 */ 406 static int 407 umtxq_signal(struct umtx_key *key, int n_wake) 408 { 409 struct umtxq_chain *uc; 410 struct umtx_q *uq, *next; 411 int ret; 412 413 ret = 0; 414 uc = umtxq_getchain(key); 415 UMTXQ_LOCKED_ASSERT(uc); 416 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) { 417 if (umtx_key_match(&uq->uq_key, key)) { 418 umtxq_remove(uq); 419 wakeup(uq); 420 if (++ret >= n_wake) 421 break; 422 } 423 } 424 return (ret); 425 } 426 427 /* 428 * Wake up specified thread. 429 */ 430 static inline void 431 umtxq_signal_thread(struct umtx_q *uq) 432 { 433 struct umtxq_chain *uc; 434 435 uc = umtxq_getchain(&uq->uq_key); 436 UMTXQ_LOCKED_ASSERT(uc); 437 umtxq_remove(uq); 438 wakeup(uq); 439 } 440 441 /* 442 * Put thread into sleep state, before sleeping, check if 443 * thread was removed from umtx queue. 444 */ 445 static inline int 446 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 447 { 448 struct umtxq_chain *uc; 449 int error; 450 451 uc = umtxq_getchain(&uq->uq_key); 452 UMTXQ_LOCKED_ASSERT(uc); 453 if (!(uq->uq_flags & UQF_UMTXQ)) 454 return (0); 455 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 456 if (error == EWOULDBLOCK) 457 error = ETIMEDOUT; 458 return (error); 459 } 460 461 /* 462 * Convert userspace address into unique logical address. 463 */ 464 static int 465 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 466 { 467 struct thread *td = curthread; 468 vm_map_t map; 469 vm_map_entry_t entry; 470 vm_pindex_t pindex; 471 vm_prot_t prot; 472 boolean_t wired; 473 474 key->type = type; 475 if (share == THREAD_SHARE) { 476 key->shared = 0; 477 key->info.private.vs = td->td_proc->p_vmspace; 478 key->info.private.addr = (uintptr_t)addr; 479 } else if (share == PROCESS_SHARE || share == AUTO_SHARE) { 480 map = &td->td_proc->p_vmspace->vm_map; 481 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 482 &entry, &key->info.shared.object, &pindex, &prot, 483 &wired) != KERN_SUCCESS) { 484 return EFAULT; 485 } 486 487 if ((share == PROCESS_SHARE) || 488 (share == AUTO_SHARE && 489 VM_INHERIT_SHARE == entry->inheritance)) { 490 key->shared = 1; 491 key->info.shared.offset = entry->offset + entry->start - 492 (vm_offset_t)addr; 493 vm_object_reference(key->info.shared.object); 494 } else { 495 key->shared = 0; 496 key->info.private.vs = td->td_proc->p_vmspace; 497 key->info.private.addr = (uintptr_t)addr; 498 } 499 vm_map_lookup_done(map, entry); 500 } 501 502 umtxq_hash(key); 503 return (0); 504 } 505 506 /* 507 * Release key. 508 */ 509 static inline void 510 umtx_key_release(struct umtx_key *key) 511 { 512 if (key->shared) 513 vm_object_deallocate(key->info.shared.object); 514 } 515 516 /* 517 * Lock a umtx object. 518 */ 519 static int 520 _do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, int timo) 521 { 522 struct umtx_q *uq; 523 intptr_t owner; 524 intptr_t old; 525 int error = 0; 526 527 uq = td->td_umtxq; 528 529 /* 530 * Care must be exercised when dealing with umtx structure. It 531 * can fault on any access. 532 */ 533 for (;;) { 534 /* 535 * Try the uncontested case. This should be done in userland. 536 */ 537 owner = casuptr((intptr_t *)&umtx->u_owner, UMTX_UNOWNED, id); 538 539 /* The acquire succeeded. */ 540 if (owner == UMTX_UNOWNED) 541 return (0); 542 543 /* The address was invalid. */ 544 if (owner == -1) 545 return (EFAULT); 546 547 /* If no one owns it but it is contested try to acquire it. */ 548 if (owner == UMTX_CONTESTED) { 549 owner = casuptr((intptr_t *)&umtx->u_owner, 550 UMTX_CONTESTED, id | UMTX_CONTESTED); 551 552 if (owner == UMTX_CONTESTED) 553 return (0); 554 555 /* The address was invalid. */ 556 if (owner == -1) 557 return (EFAULT); 558 559 /* If this failed the lock has changed, restart. */ 560 continue; 561 } 562 563 /* 564 * If we caught a signal, we have retried and now 565 * exit immediately. 566 */ 567 if (error != 0) 568 return (error); 569 570 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 571 AUTO_SHARE, &uq->uq_key)) != 0) 572 return (error); 573 574 umtxq_lock(&uq->uq_key); 575 umtxq_busy(&uq->uq_key); 576 umtxq_insert(uq); 577 umtxq_unbusy(&uq->uq_key); 578 umtxq_unlock(&uq->uq_key); 579 580 /* 581 * Set the contested bit so that a release in user space 582 * knows to use the system call for unlock. If this fails 583 * either some one else has acquired the lock or it has been 584 * released. 585 */ 586 old = casuptr((intptr_t *)&umtx->u_owner, owner, 587 owner | UMTX_CONTESTED); 588 589 /* The address was invalid. */ 590 if (old == -1) { 591 umtxq_lock(&uq->uq_key); 592 umtxq_remove(uq); 593 umtxq_unlock(&uq->uq_key); 594 umtx_key_release(&uq->uq_key); 595 return (EFAULT); 596 } 597 598 /* 599 * We set the contested bit, sleep. Otherwise the lock changed 600 * and we need to retry or we lost a race to the thread 601 * unlocking the umtx. 602 */ 603 umtxq_lock(&uq->uq_key); 604 if (old == owner) 605 error = umtxq_sleep(uq, "umtx", timo); 606 umtxq_remove(uq); 607 umtxq_unlock(&uq->uq_key); 608 umtx_key_release(&uq->uq_key); 609 } 610 611 return (0); 612 } 613 614 /* 615 * Lock a umtx object. 616 */ 617 static int 618 do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, 619 struct timespec *timeout) 620 { 621 struct timespec ts, ts2, ts3; 622 struct timeval tv; 623 int error; 624 625 if (timeout == NULL) { 626 error = _do_lock(td, umtx, id, 0); 627 /* Mutex locking is restarted if it is interrupted. */ 628 if (error == EINTR) 629 error = ERESTART; 630 } else { 631 getnanouptime(&ts); 632 timespecadd(&ts, timeout); 633 TIMESPEC_TO_TIMEVAL(&tv, timeout); 634 for (;;) { 635 error = _do_lock(td, umtx, id, tvtohz(&tv)); 636 if (error != ETIMEDOUT) 637 break; 638 getnanouptime(&ts2); 639 if (timespeccmp(&ts2, &ts, >=)) { 640 error = ETIMEDOUT; 641 break; 642 } 643 ts3 = ts; 644 timespecsub(&ts3, &ts2); 645 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 646 } 647 /* Timed-locking is not restarted. */ 648 if (error == ERESTART) 649 error = EINTR; 650 } 651 return (error); 652 } 653 654 /* 655 * Unlock a umtx object. 656 */ 657 static int 658 do_unlock(struct thread *td, struct umtx *umtx, uintptr_t id) 659 { 660 struct umtx_key key; 661 intptr_t owner; 662 intptr_t old; 663 int error; 664 int count; 665 666 /* 667 * Make sure we own this mtx. 668 * 669 * XXX Need a {fu,su}ptr this is not correct on arch where 670 * sizeof(intptr_t) != sizeof(long). 671 */ 672 owner = fuword(&umtx->u_owner); 673 if (owner == -1) 674 return (EFAULT); 675 676 if ((owner & ~UMTX_CONTESTED) != id) 677 return (EPERM); 678 679 /* This should be done in userland */ 680 if ((owner & UMTX_CONTESTED) == 0) { 681 old = casuptr((intptr_t *)&umtx->u_owner, owner, 682 UMTX_UNOWNED); 683 if (old == -1) 684 return (EFAULT); 685 if (old == owner) 686 return (0); 687 owner = old; 688 } 689 690 /* We should only ever be in here for contested locks */ 691 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 692 &key)) != 0) 693 return (error); 694 695 umtxq_lock(&key); 696 umtxq_busy(&key); 697 count = umtxq_count(&key); 698 umtxq_unlock(&key); 699 700 /* 701 * When unlocking the umtx, it must be marked as unowned if 702 * there is zero or one thread only waiting for it. 703 * Otherwise, it must be marked as contested. 704 */ 705 old = casuptr((intptr_t *)&umtx->u_owner, owner, 706 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 707 umtxq_lock(&key); 708 umtxq_signal(&key,1); 709 umtxq_unbusy(&key); 710 umtxq_unlock(&key); 711 umtx_key_release(&key); 712 if (old == -1) 713 return (EFAULT); 714 if (old != owner) 715 return (EINVAL); 716 return (0); 717 } 718 719 /* 720 * Fetch and compare value, sleep on the address if value is not changed. 721 */ 722 static int 723 do_wait(struct thread *td, struct umtx *umtx, uintptr_t id, struct timespec *timeout) 724 { 725 struct umtx_q *uq; 726 struct timespec ts, ts2, ts3; 727 struct timeval tv; 728 uintptr_t tmp; 729 int error = 0; 730 731 uq = td->td_umtxq; 732 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_WAIT, AUTO_SHARE, 733 &uq->uq_key)) != 0) 734 return (error); 735 736 umtxq_lock(&uq->uq_key); 737 umtxq_insert(uq); 738 umtxq_unlock(&uq->uq_key); 739 tmp = fuword(&umtx->u_owner); 740 if (tmp != id) { 741 umtxq_lock(&uq->uq_key); 742 umtxq_remove(uq); 743 umtxq_unlock(&uq->uq_key); 744 } else if (timeout == NULL) { 745 umtxq_lock(&uq->uq_key); 746 error = umtxq_sleep(uq, "ucond", 0); 747 umtxq_remove(uq); 748 umtxq_unlock(&uq->uq_key); 749 } else { 750 getnanouptime(&ts); 751 timespecadd(&ts, timeout); 752 TIMESPEC_TO_TIMEVAL(&tv, timeout); 753 umtxq_lock(&uq->uq_key); 754 for (;;) { 755 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 756 if (!(uq->uq_flags & UQF_UMTXQ)) 757 break; 758 if (error != ETIMEDOUT) 759 break; 760 umtxq_unlock(&uq->uq_key); 761 getnanouptime(&ts2); 762 if (timespeccmp(&ts2, &ts, >=)) { 763 error = ETIMEDOUT; 764 umtxq_lock(&uq->uq_key); 765 break; 766 } 767 ts3 = ts; 768 timespecsub(&ts3, &ts2); 769 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 770 umtxq_lock(&uq->uq_key); 771 } 772 umtxq_remove(uq); 773 umtxq_unlock(&uq->uq_key); 774 } 775 umtx_key_release(&uq->uq_key); 776 if (error == ERESTART) 777 error = EINTR; 778 return (error); 779 } 780 781 /* 782 * Wake up threads sleeping on the specified address. 783 */ 784 int 785 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 786 { 787 struct umtx_key key; 788 int ret; 789 790 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 791 &key)) != 0) 792 return (ret); 793 umtxq_lock(&key); 794 ret = umtxq_signal(&key, n_wake); 795 umtxq_unlock(&key); 796 umtx_key_release(&key); 797 return (0); 798 } 799 800 /* 801 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 802 */ 803 static int 804 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 805 int try) 806 { 807 struct umtx_q *uq; 808 uint32_t owner, old, id; 809 int error = 0; 810 811 id = td->td_tid; 812 uq = td->td_umtxq; 813 814 /* 815 * Care must be exercised when dealing with umtx structure. It 816 * can fault on any access. 817 */ 818 for (;;) { 819 /* 820 * Try the uncontested case. This should be done in userland. 821 */ 822 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 823 824 /* The acquire succeeded. */ 825 if (owner == UMUTEX_UNOWNED) 826 return (0); 827 828 /* The address was invalid. */ 829 if (owner == -1) 830 return (EFAULT); 831 832 /* If no one owns it but it is contested try to acquire it. */ 833 if (owner == UMUTEX_CONTESTED) { 834 owner = casuword32(&m->m_owner, 835 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 836 837 if (owner == UMUTEX_CONTESTED) 838 return (0); 839 840 /* The address was invalid. */ 841 if (owner == -1) 842 return (EFAULT); 843 844 /* If this failed the lock has changed, restart. */ 845 continue; 846 } 847 848 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 849 (owner & ~UMUTEX_CONTESTED) == id) 850 return (EDEADLK); 851 852 if (try != 0) 853 return (EBUSY); 854 855 /* 856 * If we caught a signal, we have retried and now 857 * exit immediately. 858 */ 859 if (error != 0) 860 return (error); 861 862 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 863 GET_SHARE(flags), &uq->uq_key)) != 0) 864 return (error); 865 866 umtxq_lock(&uq->uq_key); 867 umtxq_busy(&uq->uq_key); 868 umtxq_insert(uq); 869 umtxq_unbusy(&uq->uq_key); 870 umtxq_unlock(&uq->uq_key); 871 872 /* 873 * Set the contested bit so that a release in user space 874 * knows to use the system call for unlock. If this fails 875 * either some one else has acquired the lock or it has been 876 * released. 877 */ 878 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 879 880 /* The address was invalid. */ 881 if (old == -1) { 882 umtxq_lock(&uq->uq_key); 883 umtxq_remove(uq); 884 umtxq_unlock(&uq->uq_key); 885 umtx_key_release(&uq->uq_key); 886 return (EFAULT); 887 } 888 889 /* 890 * We set the contested bit, sleep. Otherwise the lock changed 891 * and we need to retry or we lost a race to the thread 892 * unlocking the umtx. 893 */ 894 umtxq_lock(&uq->uq_key); 895 if (old == owner) 896 error = umtxq_sleep(uq, "umtxn", timo); 897 umtxq_remove(uq); 898 umtxq_unlock(&uq->uq_key); 899 umtx_key_release(&uq->uq_key); 900 } 901 902 return (0); 903 } 904 905 /* 906 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 907 */ 908 /* 909 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 910 */ 911 static int 912 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 913 { 914 struct umtx_key key; 915 uint32_t owner, old, id; 916 int error; 917 int count; 918 919 id = td->td_tid; 920 /* 921 * Make sure we own this mtx. 922 */ 923 owner = fuword32(&m->m_owner); 924 if (owner == -1) 925 return (EFAULT); 926 927 if ((owner & ~UMUTEX_CONTESTED) != id) 928 return (EPERM); 929 930 /* This should be done in userland */ 931 if ((owner & UMUTEX_CONTESTED) == 0) { 932 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 933 if (old == -1) 934 return (EFAULT); 935 if (old == owner) 936 return (0); 937 owner = old; 938 } 939 940 /* We should only ever be in here for contested locks */ 941 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 942 &key)) != 0) 943 return (error); 944 945 umtxq_lock(&key); 946 umtxq_busy(&key); 947 count = umtxq_count(&key); 948 umtxq_unlock(&key); 949 950 /* 951 * When unlocking the umtx, it must be marked as unowned if 952 * there is zero or one thread only waiting for it. 953 * Otherwise, it must be marked as contested. 954 */ 955 old = casuword32(&m->m_owner, owner, 956 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 957 umtxq_lock(&key); 958 umtxq_signal(&key,1); 959 umtxq_unbusy(&key); 960 umtxq_unlock(&key); 961 umtx_key_release(&key); 962 if (old == -1) 963 return (EFAULT); 964 if (old != owner) 965 return (EINVAL); 966 return (0); 967 } 968 969 static inline struct umtx_pi * 970 umtx_pi_alloc(void) 971 { 972 struct umtx_pi *pi; 973 974 pi = uma_zalloc(umtx_pi_zone, M_ZERO | M_WAITOK); 975 TAILQ_INIT(&pi->pi_blocked); 976 atomic_add_int(&umtx_pi_allocated, 1); 977 return (pi); 978 } 979 980 static inline void 981 umtx_pi_free(struct umtx_pi *pi) 982 { 983 uma_zfree(umtx_pi_zone, pi); 984 atomic_add_int(&umtx_pi_allocated, -1); 985 } 986 987 /* 988 * Adjust the thread's position on a pi_state after its priority has been 989 * changed. 990 */ 991 static int 992 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 993 { 994 struct umtx_q *uq, *uq1, *uq2; 995 struct thread *td1; 996 997 mtx_assert(&sched_lock, MA_OWNED); 998 if (pi == NULL) 999 return (0); 1000 1001 uq = td->td_umtxq; 1002 1003 /* 1004 * Check if the thread needs to be moved on the blocked chain. 1005 * It needs to be moved if either its priority is lower than 1006 * the previous thread or higher than the next thread. 1007 */ 1008 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1009 uq2 = TAILQ_NEXT(uq, uq_lockq); 1010 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1011 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1012 /* 1013 * Remove thread from blocked chain and determine where 1014 * it should be moved to. 1015 */ 1016 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1017 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1018 td1 = uq1->uq_thread; 1019 MPASS(td1->td_proc->p_magic == P_MAGIC); 1020 if (UPRI(td1) > UPRI(td)) 1021 break; 1022 } 1023 1024 if (uq1 == NULL) 1025 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1026 else 1027 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1028 } 1029 return (1); 1030 } 1031 1032 /* 1033 * Propagate priority when a thread is blocked on POSIX 1034 * PI mutex. 1035 */ 1036 static void 1037 umtx_propagate_priority(struct thread *td) 1038 { 1039 struct umtx_q *uq; 1040 struct umtx_pi *pi; 1041 int pri; 1042 1043 mtx_assert(&sched_lock, MA_OWNED); 1044 pri = UPRI(td); 1045 uq = td->td_umtxq; 1046 pi = uq->uq_pi_blocked; 1047 if (pi == NULL) 1048 return; 1049 1050 for (;;) { 1051 td = pi->pi_owner; 1052 if (td == NULL) 1053 return; 1054 1055 MPASS(td->td_proc != NULL); 1056 MPASS(td->td_proc->p_magic == P_MAGIC); 1057 1058 if (UPRI(td) <= pri) 1059 return; 1060 1061 sched_lend_user_prio(td, pri); 1062 1063 /* 1064 * Pick up the lock that td is blocked on. 1065 */ 1066 uq = td->td_umtxq; 1067 pi = uq->uq_pi_blocked; 1068 /* Resort td on the list if needed. */ 1069 if (!umtx_pi_adjust_thread(pi, td)) 1070 break; 1071 } 1072 } 1073 1074 /* 1075 * Unpropagate priority for a PI mutex when a thread blocked on 1076 * it is interrupted by signal or resumed by others. 1077 */ 1078 static void 1079 umtx_unpropagate_priority(struct umtx_pi *pi) 1080 { 1081 struct umtx_q *uq, *uq_owner; 1082 struct umtx_pi *pi2; 1083 int pri; 1084 1085 mtx_assert(&sched_lock, MA_OWNED); 1086 1087 while (pi != NULL && pi->pi_owner != NULL) { 1088 pri = PRI_MAX; 1089 uq_owner = pi->pi_owner->td_umtxq; 1090 1091 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1092 uq = TAILQ_FIRST(&pi2->pi_blocked); 1093 if (uq != NULL) { 1094 if (pri > UPRI(uq->uq_thread)) 1095 pri = UPRI(uq->uq_thread); 1096 } 1097 } 1098 1099 if (pri > uq_owner->uq_inherited_pri) 1100 pri = uq_owner->uq_inherited_pri; 1101 sched_unlend_user_prio(pi->pi_owner, pri); 1102 pi = uq_owner->uq_pi_blocked; 1103 } 1104 } 1105 1106 /* 1107 * Insert a PI mutex into owned list. 1108 */ 1109 static void 1110 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1111 { 1112 struct umtx_q *uq_owner; 1113 1114 uq_owner = owner->td_umtxq; 1115 mtx_assert(&sched_lock, MA_OWNED); 1116 if (pi->pi_owner != NULL) 1117 panic("pi_ower != NULL"); 1118 pi->pi_owner = owner; 1119 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1120 } 1121 1122 /* 1123 * Claim ownership of a PI mutex. 1124 */ 1125 static int 1126 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1127 { 1128 struct umtx_q *uq, *uq_owner; 1129 1130 uq_owner = owner->td_umtxq; 1131 mtx_lock_spin(&sched_lock); 1132 if (pi->pi_owner == owner) { 1133 mtx_unlock_spin(&sched_lock); 1134 return (0); 1135 } 1136 1137 if (pi->pi_owner != NULL) { 1138 /* 1139 * userland may have already messed the mutex, sigh. 1140 */ 1141 mtx_unlock_spin(&sched_lock); 1142 return (EPERM); 1143 } 1144 umtx_pi_setowner(pi, owner); 1145 uq = TAILQ_FIRST(&pi->pi_blocked); 1146 if (uq != NULL) { 1147 int pri; 1148 1149 pri = UPRI(uq->uq_thread); 1150 if (pri < UPRI(owner)) 1151 sched_lend_user_prio(owner, pri); 1152 } 1153 mtx_unlock_spin(&sched_lock); 1154 return (0); 1155 } 1156 1157 /* 1158 * Adjust a thread's order position in its blocked PI mutex, 1159 * this may result new priority propagating process. 1160 */ 1161 void 1162 umtx_pi_adjust(struct thread *td, u_char oldpri) 1163 { 1164 struct umtx_q *uq; 1165 struct umtx_pi *pi; 1166 1167 uq = td->td_umtxq; 1168 1169 mtx_assert(&sched_lock, MA_OWNED); 1170 MPASS(TD_ON_UPILOCK(td)); 1171 1172 /* 1173 * Pick up the lock that td is blocked on. 1174 */ 1175 pi = uq->uq_pi_blocked; 1176 MPASS(pi != NULL); 1177 1178 /* Resort the turnstile on the list. */ 1179 if (!umtx_pi_adjust_thread(pi, td)) 1180 return; 1181 1182 /* 1183 * If our priority was lowered and we are at the head of the 1184 * turnstile, then propagate our new priority up the chain. 1185 */ 1186 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1187 umtx_propagate_priority(td); 1188 } 1189 1190 /* 1191 * Sleep on a PI mutex. 1192 */ 1193 static int 1194 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1195 uint32_t owner, const char *wmesg, int timo) 1196 { 1197 struct umtxq_chain *uc; 1198 struct thread *td, *td1; 1199 struct umtx_q *uq1; 1200 int pri; 1201 int error = 0; 1202 1203 td = uq->uq_thread; 1204 KASSERT(td == curthread, ("inconsistent uq_thread")); 1205 uc = umtxq_getchain(&uq->uq_key); 1206 UMTXQ_LOCKED_ASSERT(uc); 1207 umtxq_insert(uq); 1208 if (pi->pi_owner == NULL) { 1209 /* XXX 1210 * Current, We only support process private PI-mutex, 1211 * non-contended PI-mutexes are locked in userland. 1212 * Process shared PI-mutex should always be initialized 1213 * by kernel and be registered in kernel, locking should 1214 * always be done by kernel to avoid security problems. 1215 * For process private PI-mutex, we can find owner 1216 * thread and boost its priority safely. 1217 */ 1218 PROC_LOCK(curproc); 1219 td1 = thread_find(curproc, owner); 1220 mtx_lock_spin(&sched_lock); 1221 if (td1 != NULL && pi->pi_owner == NULL) { 1222 uq1 = td1->td_umtxq; 1223 umtx_pi_setowner(pi, td1); 1224 } 1225 PROC_UNLOCK(curproc); 1226 } else { 1227 mtx_lock_spin(&sched_lock); 1228 } 1229 1230 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1231 pri = UPRI(uq1->uq_thread); 1232 if (pri > UPRI(td)) 1233 break; 1234 } 1235 1236 if (uq1 != NULL) 1237 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1238 else 1239 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1240 1241 uq->uq_pi_blocked = pi; 1242 td->td_flags |= TDF_UPIBLOCKED; 1243 mtx_unlock_spin(&sched_lock); 1244 umtxq_unlock(&uq->uq_key); 1245 1246 mtx_lock_spin(&sched_lock); 1247 umtx_propagate_priority(td); 1248 mtx_unlock_spin(&sched_lock); 1249 1250 umtxq_lock(&uq->uq_key); 1251 if (uq->uq_flags & UQF_UMTXQ) { 1252 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1253 if (error == EWOULDBLOCK) 1254 error = ETIMEDOUT; 1255 if (uq->uq_flags & UQF_UMTXQ) { 1256 umtxq_busy(&uq->uq_key); 1257 umtxq_remove(uq); 1258 umtxq_unbusy(&uq->uq_key); 1259 } 1260 } 1261 umtxq_unlock(&uq->uq_key); 1262 1263 mtx_lock_spin(&sched_lock); 1264 uq->uq_pi_blocked = NULL; 1265 td->td_flags &= ~TDF_UPIBLOCKED; 1266 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1267 umtx_unpropagate_priority(pi); 1268 mtx_unlock_spin(&sched_lock); 1269 1270 umtxq_lock(&uq->uq_key); 1271 1272 return (error); 1273 } 1274 1275 /* 1276 * Add reference count for a PI mutex. 1277 */ 1278 static void 1279 umtx_pi_ref(struct umtx_pi *pi) 1280 { 1281 struct umtxq_chain *uc; 1282 1283 uc = umtxq_getchain(&pi->pi_key); 1284 UMTXQ_LOCKED_ASSERT(uc); 1285 pi->pi_refcount++; 1286 } 1287 1288 /* 1289 * Decrease reference count for a PI mutex, if the counter 1290 * is decreased to zero, its memory space is freed. 1291 */ 1292 static void 1293 umtx_pi_unref(struct umtx_pi *pi) 1294 { 1295 struct umtxq_chain *uc; 1296 int free = 0; 1297 1298 uc = umtxq_getchain(&pi->pi_key); 1299 UMTXQ_LOCKED_ASSERT(uc); 1300 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1301 if (--pi->pi_refcount == 0) { 1302 mtx_lock_spin(&sched_lock); 1303 if (pi->pi_owner != NULL) { 1304 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1305 pi, pi_link); 1306 pi->pi_owner = NULL; 1307 } 1308 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1309 ("blocked queue not empty")); 1310 mtx_unlock_spin(&sched_lock); 1311 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1312 free = 1; 1313 } 1314 if (free) 1315 umtx_pi_free(pi); 1316 } 1317 1318 /* 1319 * Find a PI mutex in hash table. 1320 */ 1321 static struct umtx_pi * 1322 umtx_pi_lookup(struct umtx_key *key) 1323 { 1324 struct umtxq_chain *uc; 1325 struct umtx_pi *pi; 1326 1327 uc = umtxq_getchain(key); 1328 UMTXQ_LOCKED_ASSERT(uc); 1329 1330 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1331 if (umtx_key_match(&pi->pi_key, key)) { 1332 return (pi); 1333 } 1334 } 1335 return (NULL); 1336 } 1337 1338 /* 1339 * Insert a PI mutex into hash table. 1340 */ 1341 static inline void 1342 umtx_pi_insert(struct umtx_pi *pi) 1343 { 1344 struct umtxq_chain *uc; 1345 1346 uc = umtxq_getchain(&pi->pi_key); 1347 UMTXQ_LOCKED_ASSERT(uc); 1348 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1349 } 1350 1351 /* 1352 * Lock a PI mutex. 1353 */ 1354 static int 1355 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1356 int try) 1357 { 1358 struct umtx_q *uq; 1359 struct umtx_pi *pi, *new_pi; 1360 uint32_t id, owner, old; 1361 int error; 1362 1363 id = td->td_tid; 1364 uq = td->td_umtxq; 1365 1366 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1367 &uq->uq_key)) != 0) 1368 return (error); 1369 for (;;) { 1370 pi = NULL; 1371 umtxq_lock(&uq->uq_key); 1372 pi = umtx_pi_lookup(&uq->uq_key); 1373 if (pi == NULL) { 1374 umtxq_unlock(&uq->uq_key); 1375 new_pi = umtx_pi_alloc(); 1376 new_pi->pi_key = uq->uq_key; 1377 umtxq_lock(&uq->uq_key); 1378 pi = umtx_pi_lookup(&uq->uq_key); 1379 if (pi != NULL) 1380 umtx_pi_free(new_pi); 1381 else { 1382 umtx_pi_insert(new_pi); 1383 pi = new_pi; 1384 } 1385 } 1386 1387 umtx_pi_ref(pi); 1388 umtxq_unlock(&uq->uq_key); 1389 1390 /* 1391 * Care must be exercised when dealing with umtx structure. It 1392 * can fault on any access. 1393 */ 1394 1395 /* 1396 * Try the uncontested case. This should be done in userland. 1397 */ 1398 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1399 1400 /* The acquire succeeded. */ 1401 if (owner == UMUTEX_UNOWNED) { 1402 error = 0; 1403 break; 1404 } 1405 1406 /* The address was invalid. */ 1407 if (owner == -1) { 1408 error = EFAULT; 1409 break; 1410 } 1411 1412 /* If no one owns it but it is contested try to acquire it. */ 1413 if (owner == UMUTEX_CONTESTED) { 1414 owner = casuword32(&m->m_owner, 1415 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1416 1417 if (owner == UMUTEX_CONTESTED) { 1418 umtxq_lock(&uq->uq_key); 1419 error = umtx_pi_claim(pi, td); 1420 umtxq_unlock(&uq->uq_key); 1421 break; 1422 } 1423 1424 /* The address was invalid. */ 1425 if (owner == -1) { 1426 error = EFAULT; 1427 break; 1428 } 1429 1430 /* If this failed the lock has changed, restart. */ 1431 umtxq_lock(&uq->uq_key); 1432 umtx_pi_unref(pi); 1433 umtxq_unlock(&uq->uq_key); 1434 pi = NULL; 1435 continue; 1436 } 1437 1438 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1439 (owner & ~UMUTEX_CONTESTED) == id) { 1440 error = EDEADLK; 1441 break; 1442 } 1443 1444 if (try != 0) { 1445 error = EBUSY; 1446 break; 1447 } 1448 1449 /* 1450 * If we caught a signal, we have retried and now 1451 * exit immediately. 1452 */ 1453 if (error != 0) 1454 break; 1455 1456 umtxq_lock(&uq->uq_key); 1457 umtxq_busy(&uq->uq_key); 1458 umtxq_unlock(&uq->uq_key); 1459 1460 /* 1461 * Set the contested bit so that a release in user space 1462 * knows to use the system call for unlock. If this fails 1463 * either some one else has acquired the lock or it has been 1464 * released. 1465 */ 1466 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1467 1468 /* The address was invalid. */ 1469 if (old == -1) { 1470 umtxq_lock(&uq->uq_key); 1471 umtxq_unbusy(&uq->uq_key); 1472 umtxq_unlock(&uq->uq_key); 1473 error = EFAULT; 1474 break; 1475 } 1476 1477 umtxq_lock(&uq->uq_key); 1478 umtxq_unbusy(&uq->uq_key); 1479 /* 1480 * We set the contested bit, sleep. Otherwise the lock changed 1481 * and we need to retry or we lost a race to the thread 1482 * unlocking the umtx. 1483 */ 1484 if (old == owner) 1485 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1486 "umtxpi", timo); 1487 umtx_pi_unref(pi); 1488 umtxq_unlock(&uq->uq_key); 1489 pi = NULL; 1490 } 1491 1492 if (pi != NULL) { 1493 umtxq_lock(&uq->uq_key); 1494 umtx_pi_unref(pi); 1495 umtxq_unlock(&uq->uq_key); 1496 } 1497 1498 umtx_key_release(&uq->uq_key); 1499 return (error); 1500 } 1501 1502 /* 1503 * Unlock a PI mutex. 1504 */ 1505 static int 1506 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1507 { 1508 struct umtx_key key; 1509 struct umtx_q *uq_first, *uq_first2, *uq_me; 1510 struct umtx_pi *pi, *pi2; 1511 uint32_t owner, old, id; 1512 int error; 1513 int count; 1514 int pri; 1515 1516 id = td->td_tid; 1517 /* 1518 * Make sure we own this mtx. 1519 */ 1520 owner = fuword32(&m->m_owner); 1521 if (owner == -1) 1522 return (EFAULT); 1523 1524 if ((owner & ~UMUTEX_CONTESTED) != id) 1525 return (EPERM); 1526 1527 /* This should be done in userland */ 1528 if ((owner & UMUTEX_CONTESTED) == 0) { 1529 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1530 if (old == -1) 1531 return (EFAULT); 1532 if (old == owner) 1533 return (0); 1534 owner = old; 1535 } 1536 1537 /* We should only ever be in here for contested locks */ 1538 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1539 &key)) != 0) 1540 return (error); 1541 1542 umtxq_lock(&key); 1543 umtxq_busy(&key); 1544 count = umtxq_count_pi(&key, &uq_first); 1545 if (uq_first != NULL) { 1546 pi = uq_first->uq_pi_blocked; 1547 if (pi->pi_owner != curthread) { 1548 umtxq_unbusy(&key); 1549 umtxq_unlock(&key); 1550 /* userland messed the mutex */ 1551 return (EPERM); 1552 } 1553 uq_me = curthread->td_umtxq; 1554 mtx_lock_spin(&sched_lock); 1555 pi->pi_owner = NULL; 1556 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1557 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1558 pri = PRI_MAX; 1559 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1560 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1561 if (uq_first2 != NULL) { 1562 if (pri > UPRI(uq_first2->uq_thread)) 1563 pri = UPRI(uq_first2->uq_thread); 1564 } 1565 } 1566 sched_unlend_user_prio(curthread, pri); 1567 mtx_unlock_spin(&sched_lock); 1568 } 1569 umtxq_unlock(&key); 1570 1571 /* 1572 * When unlocking the umtx, it must be marked as unowned if 1573 * there is zero or one thread only waiting for it. 1574 * Otherwise, it must be marked as contested. 1575 */ 1576 old = casuword32(&m->m_owner, owner, 1577 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1578 1579 umtxq_lock(&key); 1580 if (uq_first != NULL) 1581 umtxq_signal_thread(uq_first); 1582 umtxq_unbusy(&key); 1583 umtxq_unlock(&key); 1584 umtx_key_release(&key); 1585 if (old == -1) 1586 return (EFAULT); 1587 if (old != owner) 1588 return (EINVAL); 1589 return (0); 1590 } 1591 1592 /* 1593 * Lock a PP mutex. 1594 */ 1595 static int 1596 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1597 int try) 1598 { 1599 struct umtx_q *uq, *uq2; 1600 struct umtx_pi *pi; 1601 uint32_t ceiling; 1602 uint32_t owner, id; 1603 int error, pri, old_inherited_pri, su; 1604 1605 id = td->td_tid; 1606 uq = td->td_umtxq; 1607 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1608 &uq->uq_key)) != 0) 1609 return (error); 1610 su = (suser(td) == 0); 1611 for (;;) { 1612 old_inherited_pri = uq->uq_inherited_pri; 1613 umtxq_lock(&uq->uq_key); 1614 umtxq_busy(&uq->uq_key); 1615 umtxq_unlock(&uq->uq_key); 1616 1617 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1618 if (ceiling > RTP_PRIO_MAX) { 1619 error = EINVAL; 1620 goto out; 1621 } 1622 1623 mtx_lock_spin(&sched_lock); 1624 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1625 mtx_unlock_spin(&sched_lock); 1626 error = EINVAL; 1627 goto out; 1628 } 1629 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1630 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1631 if (uq->uq_inherited_pri < UPRI(td)) 1632 sched_lend_user_prio(td, uq->uq_inherited_pri); 1633 } 1634 mtx_unlock_spin(&sched_lock); 1635 1636 owner = casuword32(&m->m_owner, 1637 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1638 1639 if (owner == UMUTEX_CONTESTED) { 1640 error = 0; 1641 break; 1642 } 1643 1644 /* The address was invalid. */ 1645 if (owner == -1) { 1646 error = EFAULT; 1647 break; 1648 } 1649 1650 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1651 (owner & ~UMUTEX_CONTESTED) == id) { 1652 error = EDEADLK; 1653 break; 1654 } 1655 1656 if (try != 0) { 1657 error = EBUSY; 1658 break; 1659 } 1660 1661 /* 1662 * If we caught a signal, we have retried and now 1663 * exit immediately. 1664 */ 1665 if (error != 0) 1666 break; 1667 1668 umtxq_lock(&uq->uq_key); 1669 umtxq_insert(uq); 1670 umtxq_unbusy(&uq->uq_key); 1671 error = umtxq_sleep(uq, "umtxpp", timo); 1672 umtxq_remove(uq); 1673 umtxq_unlock(&uq->uq_key); 1674 1675 mtx_lock_spin(&sched_lock); 1676 uq->uq_inherited_pri = old_inherited_pri; 1677 pri = PRI_MAX; 1678 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1679 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1680 if (uq2 != NULL) { 1681 if (pri > UPRI(uq2->uq_thread)) 1682 pri = UPRI(uq2->uq_thread); 1683 } 1684 } 1685 if (pri > uq->uq_inherited_pri) 1686 pri = uq->uq_inherited_pri; 1687 sched_unlend_user_prio(td, pri); 1688 mtx_unlock_spin(&sched_lock); 1689 } 1690 1691 if (error != 0) { 1692 mtx_lock_spin(&sched_lock); 1693 uq->uq_inherited_pri = old_inherited_pri; 1694 pri = PRI_MAX; 1695 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1696 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1697 if (uq2 != NULL) { 1698 if (pri > UPRI(uq2->uq_thread)) 1699 pri = UPRI(uq2->uq_thread); 1700 } 1701 } 1702 if (pri > uq->uq_inherited_pri) 1703 pri = uq->uq_inherited_pri; 1704 sched_unlend_user_prio(td, pri); 1705 mtx_unlock_spin(&sched_lock); 1706 } 1707 1708 out: 1709 umtxq_lock(&uq->uq_key); 1710 umtxq_unbusy(&uq->uq_key); 1711 umtxq_unlock(&uq->uq_key); 1712 umtx_key_release(&uq->uq_key); 1713 return (error); 1714 } 1715 1716 /* 1717 * Unlock a PP mutex. 1718 */ 1719 static int 1720 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1721 { 1722 struct umtx_key key; 1723 struct umtx_q *uq, *uq2; 1724 struct umtx_pi *pi; 1725 uint32_t owner, id; 1726 uint32_t rceiling; 1727 int error, pri, new_inherited_pri, su; 1728 1729 id = td->td_tid; 1730 uq = td->td_umtxq; 1731 su = (suser(td) == 0); 1732 1733 /* 1734 * Make sure we own this mtx. 1735 */ 1736 owner = fuword32(&m->m_owner); 1737 if (owner == -1) 1738 return (EFAULT); 1739 1740 if ((owner & ~UMUTEX_CONTESTED) != id) 1741 return (EPERM); 1742 1743 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 1744 if (error != 0) 1745 return (error); 1746 1747 if (rceiling == -1) 1748 new_inherited_pri = PRI_MAX; 1749 else { 1750 rceiling = RTP_PRIO_MAX - rceiling; 1751 if (rceiling > RTP_PRIO_MAX) 1752 return (EINVAL); 1753 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 1754 } 1755 1756 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1757 &key)) != 0) 1758 return (error); 1759 umtxq_lock(&key); 1760 umtxq_busy(&key); 1761 umtxq_unlock(&key); 1762 /* 1763 * For priority protected mutex, always set unlocked state 1764 * to UMUTEX_CONTESTED, so that userland always enters kernel 1765 * to lock the mutex, it is necessary because thread priority 1766 * has to be adjusted for such mutex. 1767 */ 1768 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 1769 1770 umtxq_lock(&key); 1771 if (error == 0) 1772 umtxq_signal(&key, 1); 1773 umtxq_unbusy(&key); 1774 umtxq_unlock(&key); 1775 1776 if (error == -1) 1777 error = EFAULT; 1778 else { 1779 mtx_lock_spin(&sched_lock); 1780 if (su != 0) 1781 uq->uq_inherited_pri = new_inherited_pri; 1782 pri = PRI_MAX; 1783 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1784 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1785 if (uq2 != NULL) { 1786 if (pri > UPRI(uq2->uq_thread)) 1787 pri = UPRI(uq2->uq_thread); 1788 } 1789 } 1790 if (pri > uq->uq_inherited_pri) 1791 pri = uq->uq_inherited_pri; 1792 sched_unlend_user_prio(td, pri); 1793 mtx_unlock_spin(&sched_lock); 1794 } 1795 umtx_key_release(&key); 1796 return (error); 1797 } 1798 1799 static int 1800 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 1801 uint32_t *old_ceiling) 1802 { 1803 struct umtx_q *uq; 1804 uint32_t save_ceiling; 1805 uint32_t owner, id; 1806 uint32_t flags; 1807 int error; 1808 1809 flags = fuword32(&m->m_flags); 1810 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 1811 return (EINVAL); 1812 if (ceiling > RTP_PRIO_MAX) 1813 return (EINVAL); 1814 id = td->td_tid; 1815 uq = td->td_umtxq; 1816 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1817 &uq->uq_key)) != 0) 1818 return (error); 1819 for (;;) { 1820 umtxq_lock(&uq->uq_key); 1821 umtxq_busy(&uq->uq_key); 1822 umtxq_unlock(&uq->uq_key); 1823 1824 save_ceiling = fuword32(&m->m_ceilings[0]); 1825 1826 owner = casuword32(&m->m_owner, 1827 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1828 1829 if (owner == UMUTEX_CONTESTED) { 1830 suword32(&m->m_ceilings[0], ceiling); 1831 suword32(&m->m_owner, UMUTEX_CONTESTED); 1832 error = 0; 1833 break; 1834 } 1835 1836 /* The address was invalid. */ 1837 if (owner == -1) { 1838 error = EFAULT; 1839 break; 1840 } 1841 1842 if ((owner & ~UMUTEX_CONTESTED) == id) { 1843 suword32(&m->m_ceilings[0], ceiling); 1844 error = 0; 1845 break; 1846 } 1847 1848 /* 1849 * If we caught a signal, we have retried and now 1850 * exit immediately. 1851 */ 1852 if (error != 0) 1853 break; 1854 1855 /* 1856 * We set the contested bit, sleep. Otherwise the lock changed 1857 * and we need to retry or we lost a race to the thread 1858 * unlocking the umtx. 1859 */ 1860 umtxq_lock(&uq->uq_key); 1861 umtxq_insert(uq); 1862 umtxq_unbusy(&uq->uq_key); 1863 error = umtxq_sleep(uq, "umtxpp", 0); 1864 umtxq_remove(uq); 1865 umtxq_unlock(&uq->uq_key); 1866 } 1867 umtxq_lock(&uq->uq_key); 1868 if (error == 0) 1869 umtxq_signal(&uq->uq_key, INT_MAX); 1870 umtxq_unbusy(&uq->uq_key); 1871 umtxq_unlock(&uq->uq_key); 1872 umtx_key_release(&uq->uq_key); 1873 if (error == 0 && old_ceiling != NULL) 1874 suword32(old_ceiling, save_ceiling); 1875 return (error); 1876 } 1877 1878 static int 1879 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 1880 int try) 1881 { 1882 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1883 case 0: 1884 return (_do_lock_normal(td, m, flags, timo, try)); 1885 case UMUTEX_PRIO_INHERIT: 1886 return (_do_lock_pi(td, m, flags, timo, try)); 1887 case UMUTEX_PRIO_PROTECT: 1888 return (_do_lock_pp(td, m, flags, timo, try)); 1889 } 1890 return (EINVAL); 1891 } 1892 1893 /* 1894 * Lock a userland POSIX mutex. 1895 */ 1896 static int 1897 do_lock_umutex(struct thread *td, struct umutex *m, 1898 struct timespec *timeout, int try) 1899 { 1900 struct timespec ts, ts2, ts3; 1901 struct timeval tv; 1902 uint32_t flags; 1903 int error; 1904 1905 flags = fuword32(&m->m_flags); 1906 if (flags == -1) 1907 return (EFAULT); 1908 1909 if (timeout == NULL) { 1910 error = _do_lock_umutex(td, m, flags, 0, try); 1911 /* Mutex locking is restarted if it is interrupted. */ 1912 if (error == EINTR) 1913 error = ERESTART; 1914 } else { 1915 getnanouptime(&ts); 1916 timespecadd(&ts, timeout); 1917 TIMESPEC_TO_TIMEVAL(&tv, timeout); 1918 for (;;) { 1919 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 1920 if (error != ETIMEDOUT) 1921 break; 1922 getnanouptime(&ts2); 1923 if (timespeccmp(&ts2, &ts, >=)) { 1924 error = ETIMEDOUT; 1925 break; 1926 } 1927 ts3 = ts; 1928 timespecsub(&ts3, &ts2); 1929 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 1930 } 1931 /* Timed-locking is not restarted. */ 1932 if (error == ERESTART) 1933 error = EINTR; 1934 } 1935 return (error); 1936 } 1937 1938 /* 1939 * Unlock a userland POSIX mutex. 1940 */ 1941 static int 1942 do_unlock_umutex(struct thread *td, struct umutex *m) 1943 { 1944 uint32_t flags; 1945 1946 flags = fuword32(&m->m_flags); 1947 if (flags == -1) 1948 return (EFAULT); 1949 1950 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1951 case 0: 1952 return (do_unlock_normal(td, m, flags)); 1953 case UMUTEX_PRIO_INHERIT: 1954 return (do_unlock_pi(td, m, flags)); 1955 case UMUTEX_PRIO_PROTECT: 1956 return (do_unlock_pp(td, m, flags)); 1957 } 1958 1959 return (EINVAL); 1960 } 1961 1962 int 1963 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 1964 /* struct umtx *umtx */ 1965 { 1966 return _do_lock(td, uap->umtx, td->td_tid, 0); 1967 } 1968 1969 int 1970 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 1971 /* struct umtx *umtx */ 1972 { 1973 return do_unlock(td, uap->umtx, td->td_tid); 1974 } 1975 1976 int 1977 _umtx_op(struct thread *td, struct _umtx_op_args *uap) 1978 { 1979 struct timespec timeout; 1980 struct timespec *ts; 1981 int error; 1982 1983 switch(uap->op) { 1984 case UMTX_OP_MUTEX_LOCK: 1985 /* Allow a null timespec (wait forever). */ 1986 if (uap->uaddr2 == NULL) 1987 ts = NULL; 1988 else { 1989 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 1990 if (error != 0) 1991 break; 1992 if (timeout.tv_nsec >= 1000000000 || 1993 timeout.tv_nsec < 0) { 1994 error = EINVAL; 1995 break; 1996 } 1997 ts = &timeout; 1998 } 1999 error = do_lock_umutex(td, uap->obj, ts, 0); 2000 break; 2001 case UMTX_OP_MUTEX_UNLOCK: 2002 error = do_unlock_umutex(td, uap->obj); 2003 break; 2004 case UMTX_OP_LOCK: 2005 /* Allow a null timespec (wait forever). */ 2006 if (uap->uaddr2 == NULL) 2007 ts = NULL; 2008 else { 2009 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2010 if (error != 0) 2011 break; 2012 if (timeout.tv_nsec >= 1000000000 || 2013 timeout.tv_nsec < 0) { 2014 error = EINVAL; 2015 break; 2016 } 2017 ts = &timeout; 2018 } 2019 error = do_lock(td, uap->obj, uap->val, ts); 2020 break; 2021 case UMTX_OP_UNLOCK: 2022 error = do_unlock(td, uap->obj, uap->val); 2023 break; 2024 case UMTX_OP_WAIT: 2025 /* Allow a null timespec (wait forever). */ 2026 if (uap->uaddr2 == NULL) 2027 ts = NULL; 2028 else { 2029 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2030 if (error != 0) 2031 break; 2032 if (timeout.tv_nsec >= 1000000000 || 2033 timeout.tv_nsec < 0) { 2034 error = EINVAL; 2035 break; 2036 } 2037 ts = &timeout; 2038 } 2039 error = do_wait(td, uap->obj, uap->val, ts); 2040 break; 2041 case UMTX_OP_WAKE: 2042 error = kern_umtx_wake(td, uap->obj, uap->val); 2043 break; 2044 case UMTX_OP_MUTEX_TRYLOCK: 2045 error = do_lock_umutex(td, uap->obj, NULL, 1); 2046 break; 2047 case UMTX_OP_SET_CEILING: 2048 error = do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2049 break; 2050 default: 2051 error = EINVAL; 2052 break; 2053 } 2054 return (error); 2055 } 2056 2057 void 2058 umtx_thread_init(struct thread *td) 2059 { 2060 td->td_umtxq = umtxq_alloc(); 2061 td->td_umtxq->uq_thread = td; 2062 } 2063 2064 void 2065 umtx_thread_fini(struct thread *td) 2066 { 2067 umtxq_free(td->td_umtxq); 2068 } 2069 2070 /* 2071 * It will be called when new thread is created, e.g fork(). 2072 */ 2073 void 2074 umtx_thread_alloc(struct thread *td) 2075 { 2076 struct umtx_q *uq; 2077 2078 uq = td->td_umtxq; 2079 uq->uq_inherited_pri = PRI_MAX; 2080 2081 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 2082 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 2083 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 2084 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 2085 } 2086 2087 /* 2088 * exec() hook. 2089 */ 2090 static void 2091 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 2092 struct image_params *imgp __unused) 2093 { 2094 umtx_thread_cleanup(curthread); 2095 } 2096 2097 /* 2098 * thread_exit() hook. 2099 */ 2100 void 2101 umtx_thread_exit(struct thread *td) 2102 { 2103 umtx_thread_cleanup(td); 2104 } 2105 2106 /* 2107 * clean up umtx data. 2108 */ 2109 static void 2110 umtx_thread_cleanup(struct thread *td) 2111 { 2112 struct umtx_q *uq; 2113 struct umtx_pi *pi; 2114 2115 if ((uq = td->td_umtxq) == NULL) 2116 return; 2117 2118 mtx_lock_spin(&sched_lock); 2119 uq->uq_inherited_pri = PRI_MAX; 2120 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 2121 pi->pi_owner = NULL; 2122 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 2123 } 2124 td->td_flags &= ~TDF_UBORROWING; 2125 mtx_unlock_spin(&sched_lock); 2126 } 2127