1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 173 /* 174 * Don't propagate time-sharing priority, there is a security reason, 175 * a user can simply introduce PI-mutex, let thread A lock the mutex, 176 * and let another thread B block on the mutex, because B is 177 * sleeping, its priority will be boosted, this causes A's priority to 178 * be boosted via priority propagating too and will never be lowered even 179 * if it is using 100%CPU, this is unfair to other processes. 180 */ 181 182 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 183 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 184 PRI_MAX_TIMESHARE : (td)->td_user_pri) 185 186 #define GOLDEN_RATIO_PRIME 2654404609U 187 #define UMTX_CHAINS 512 188 #define UMTX_SHIFTS (__WORD_BIT - 9) 189 190 #define GET_SHARE(flags) \ 191 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 192 193 #define BUSY_SPINS 200 194 195 struct abs_timeout { 196 int clockid; 197 struct timespec cur; 198 struct timespec end; 199 }; 200 201 static uma_zone_t umtx_pi_zone; 202 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 203 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 204 static int umtx_pi_allocated; 205 206 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 207 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 208 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 209 210 #ifdef UMTX_PROFILING 211 static long max_length; 212 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 213 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 214 #endif 215 216 static void umtxq_sysinit(void *); 217 static void umtxq_hash(struct umtx_key *key); 218 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 219 static void umtxq_lock(struct umtx_key *key); 220 static void umtxq_unlock(struct umtx_key *key); 221 static void umtxq_busy(struct umtx_key *key); 222 static void umtxq_unbusy(struct umtx_key *key); 223 static void umtxq_insert_queue(struct umtx_q *uq, int q); 224 static void umtxq_remove_queue(struct umtx_q *uq, int q); 225 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 226 static int umtxq_count(struct umtx_key *key); 227 static struct umtx_pi *umtx_pi_alloc(int); 228 static void umtx_pi_free(struct umtx_pi *pi); 229 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 230 static void umtx_thread_cleanup(struct thread *td); 231 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 232 struct image_params *imgp __unused); 233 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 234 235 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 236 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 237 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 238 239 static struct mtx umtx_lock; 240 241 #ifdef UMTX_PROFILING 242 static void 243 umtx_init_profiling(void) 244 { 245 struct sysctl_oid *chain_oid; 246 char chain_name[10]; 247 int i; 248 249 for (i = 0; i < UMTX_CHAINS; ++i) { 250 snprintf(chain_name, sizeof(chain_name), "%d", i); 251 chain_oid = SYSCTL_ADD_NODE(NULL, 252 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 253 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 254 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 255 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 256 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 257 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 258 } 259 } 260 261 static int 262 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 263 { 264 char buf[512]; 265 struct sbuf sb; 266 struct umtxq_chain *uc; 267 u_int fract, i, j, tot, whole; 268 u_int sf0, sf1, sf2, sf3, sf4; 269 u_int si0, si1, si2, si3, si4; 270 u_int sw0, sw1, sw2, sw3, sw4; 271 272 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 273 for (i = 0; i < 2; i++) { 274 tot = 0; 275 for (j = 0; j < UMTX_CHAINS; ++j) { 276 uc = &umtxq_chains[i][j]; 277 mtx_lock(&uc->uc_lock); 278 tot += uc->max_length; 279 mtx_unlock(&uc->uc_lock); 280 } 281 if (tot == 0) 282 sbuf_printf(&sb, "%u) Empty ", i); 283 else { 284 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 285 si0 = si1 = si2 = si3 = si4 = 0; 286 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 287 for (j = 0; j < UMTX_CHAINS; j++) { 288 uc = &umtxq_chains[i][j]; 289 mtx_lock(&uc->uc_lock); 290 whole = uc->max_length * 100; 291 mtx_unlock(&uc->uc_lock); 292 fract = (whole % tot) * 100; 293 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 294 sf0 = fract; 295 si0 = j; 296 sw0 = whole; 297 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 298 sf1)) { 299 sf1 = fract; 300 si1 = j; 301 sw1 = whole; 302 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 303 sf2)) { 304 sf2 = fract; 305 si2 = j; 306 sw2 = whole; 307 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 308 sf3)) { 309 sf3 = fract; 310 si3 = j; 311 sw3 = whole; 312 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 313 sf4)) { 314 sf4 = fract; 315 si4 = j; 316 sw4 = whole; 317 } 318 } 319 sbuf_printf(&sb, "queue %u:\n", i); 320 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 321 sf0 / tot, si0); 322 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 323 sf1 / tot, si1); 324 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 325 sf2 / tot, si2); 326 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 327 sf3 / tot, si3); 328 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 329 sf4 / tot, si4); 330 } 331 } 332 sbuf_trim(&sb); 333 sbuf_finish(&sb); 334 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 335 sbuf_delete(&sb); 336 return (0); 337 } 338 339 static int 340 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 341 { 342 struct umtxq_chain *uc; 343 u_int i, j; 344 int clear, error; 345 346 clear = 0; 347 error = sysctl_handle_int(oidp, &clear, 0, req); 348 if (error != 0 || req->newptr == NULL) 349 return (error); 350 351 if (clear != 0) { 352 for (i = 0; i < 2; ++i) { 353 for (j = 0; j < UMTX_CHAINS; ++j) { 354 uc = &umtxq_chains[i][j]; 355 mtx_lock(&uc->uc_lock); 356 uc->length = 0; 357 uc->max_length = 0; 358 mtx_unlock(&uc->uc_lock); 359 } 360 } 361 } 362 return (0); 363 } 364 365 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 366 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 367 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 368 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 369 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 370 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 371 #endif 372 373 static void 374 umtxq_sysinit(void *arg __unused) 375 { 376 int i, j; 377 378 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 379 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 380 for (i = 0; i < 2; ++i) { 381 for (j = 0; j < UMTX_CHAINS; ++j) { 382 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 383 MTX_DEF | MTX_DUPOK); 384 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 386 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 387 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 388 umtxq_chains[i][j].uc_busy = 0; 389 umtxq_chains[i][j].uc_waiters = 0; 390 #ifdef UMTX_PROFILING 391 umtxq_chains[i][j].length = 0; 392 umtxq_chains[i][j].max_length = 0; 393 #endif 394 } 395 } 396 #ifdef UMTX_PROFILING 397 umtx_init_profiling(); 398 #endif 399 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 400 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 401 EVENTHANDLER_PRI_ANY); 402 } 403 404 struct umtx_q * 405 umtxq_alloc(void) 406 { 407 struct umtx_q *uq; 408 409 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 410 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 411 TAILQ_INIT(&uq->uq_spare_queue->head); 412 TAILQ_INIT(&uq->uq_pi_contested); 413 uq->uq_inherited_pri = PRI_MAX; 414 return (uq); 415 } 416 417 void 418 umtxq_free(struct umtx_q *uq) 419 { 420 MPASS(uq->uq_spare_queue != NULL); 421 free(uq->uq_spare_queue, M_UMTX); 422 free(uq, M_UMTX); 423 } 424 425 static inline void 426 umtxq_hash(struct umtx_key *key) 427 { 428 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 429 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 430 } 431 432 static inline struct umtxq_chain * 433 umtxq_getchain(struct umtx_key *key) 434 { 435 if (key->type <= TYPE_SEM) 436 return (&umtxq_chains[1][key->hash]); 437 return (&umtxq_chains[0][key->hash]); 438 } 439 440 /* 441 * Lock a chain. 442 */ 443 static inline void 444 umtxq_lock(struct umtx_key *key) 445 { 446 struct umtxq_chain *uc; 447 448 uc = umtxq_getchain(key); 449 mtx_lock(&uc->uc_lock); 450 } 451 452 /* 453 * Unlock a chain. 454 */ 455 static inline void 456 umtxq_unlock(struct umtx_key *key) 457 { 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 mtx_unlock(&uc->uc_lock); 462 } 463 464 /* 465 * Set chain to busy state when following operation 466 * may be blocked (kernel mutex can not be used). 467 */ 468 static inline void 469 umtxq_busy(struct umtx_key *key) 470 { 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(key); 474 mtx_assert(&uc->uc_lock, MA_OWNED); 475 if (uc->uc_busy) { 476 #ifdef SMP 477 if (smp_cpus > 1) { 478 int count = BUSY_SPINS; 479 if (count > 0) { 480 umtxq_unlock(key); 481 while (uc->uc_busy && --count > 0) 482 cpu_spinwait(); 483 umtxq_lock(key); 484 } 485 } 486 #endif 487 while (uc->uc_busy) { 488 uc->uc_waiters++; 489 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 490 uc->uc_waiters--; 491 } 492 } 493 uc->uc_busy = 1; 494 } 495 496 /* 497 * Unbusy a chain. 498 */ 499 static inline void 500 umtxq_unbusy(struct umtx_key *key) 501 { 502 struct umtxq_chain *uc; 503 504 uc = umtxq_getchain(key); 505 mtx_assert(&uc->uc_lock, MA_OWNED); 506 KASSERT(uc->uc_busy != 0, ("not busy")); 507 uc->uc_busy = 0; 508 if (uc->uc_waiters) 509 wakeup_one(uc); 510 } 511 512 static inline void 513 umtxq_unbusy_unlocked(struct umtx_key *key) 514 { 515 516 umtxq_lock(key); 517 umtxq_unbusy(key); 518 umtxq_unlock(key); 519 } 520 521 static struct umtxq_queue * 522 umtxq_queue_lookup(struct umtx_key *key, int q) 523 { 524 struct umtxq_queue *uh; 525 struct umtxq_chain *uc; 526 527 uc = umtxq_getchain(key); 528 UMTXQ_LOCKED_ASSERT(uc); 529 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 530 if (umtx_key_match(&uh->key, key)) 531 return (uh); 532 } 533 534 return (NULL); 535 } 536 537 static inline void 538 umtxq_insert_queue(struct umtx_q *uq, int q) 539 { 540 struct umtxq_queue *uh; 541 struct umtxq_chain *uc; 542 543 uc = umtxq_getchain(&uq->uq_key); 544 UMTXQ_LOCKED_ASSERT(uc); 545 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 546 uh = umtxq_queue_lookup(&uq->uq_key, q); 547 if (uh != NULL) { 548 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 549 } else { 550 uh = uq->uq_spare_queue; 551 uh->key = uq->uq_key; 552 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 553 #ifdef UMTX_PROFILING 554 uc->length++; 555 if (uc->length > uc->max_length) { 556 uc->max_length = uc->length; 557 if (uc->max_length > max_length) 558 max_length = uc->max_length; 559 } 560 #endif 561 } 562 uq->uq_spare_queue = NULL; 563 564 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 565 uh->length++; 566 uq->uq_flags |= UQF_UMTXQ; 567 uq->uq_cur_queue = uh; 568 return; 569 } 570 571 static inline void 572 umtxq_remove_queue(struct umtx_q *uq, int q) 573 { 574 struct umtxq_chain *uc; 575 struct umtxq_queue *uh; 576 577 uc = umtxq_getchain(&uq->uq_key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 if (uq->uq_flags & UQF_UMTXQ) { 580 uh = uq->uq_cur_queue; 581 TAILQ_REMOVE(&uh->head, uq, uq_link); 582 uh->length--; 583 uq->uq_flags &= ~UQF_UMTXQ; 584 if (TAILQ_EMPTY(&uh->head)) { 585 KASSERT(uh->length == 0, 586 ("inconsistent umtxq_queue length")); 587 #ifdef UMTX_PROFILING 588 uc->length--; 589 #endif 590 LIST_REMOVE(uh, link); 591 } else { 592 uh = LIST_FIRST(&uc->uc_spare_queue); 593 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 594 LIST_REMOVE(uh, link); 595 } 596 uq->uq_spare_queue = uh; 597 uq->uq_cur_queue = NULL; 598 } 599 } 600 601 /* 602 * Check if there are multiple waiters 603 */ 604 static int 605 umtxq_count(struct umtx_key *key) 606 { 607 struct umtxq_chain *uc; 608 struct umtxq_queue *uh; 609 610 uc = umtxq_getchain(key); 611 UMTXQ_LOCKED_ASSERT(uc); 612 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 613 if (uh != NULL) 614 return (uh->length); 615 return (0); 616 } 617 618 /* 619 * Check if there are multiple PI waiters and returns first 620 * waiter. 621 */ 622 static int 623 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 624 { 625 struct umtxq_chain *uc; 626 struct umtxq_queue *uh; 627 628 *first = NULL; 629 uc = umtxq_getchain(key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 632 if (uh != NULL) { 633 *first = TAILQ_FIRST(&uh->head); 634 return (uh->length); 635 } 636 return (0); 637 } 638 639 static int 640 umtxq_check_susp(struct thread *td) 641 { 642 struct proc *p; 643 int error; 644 645 /* 646 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 647 * eventually break the lockstep loop. 648 */ 649 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 650 return (0); 651 error = 0; 652 p = td->td_proc; 653 PROC_LOCK(p); 654 if (P_SHOULDSTOP(p) || 655 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 656 if (p->p_flag & P_SINGLE_EXIT) 657 error = EINTR; 658 else 659 error = ERESTART; 660 } 661 PROC_UNLOCK(p); 662 return (error); 663 } 664 665 /* 666 * Wake up threads waiting on an userland object. 667 */ 668 669 static int 670 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 671 { 672 struct umtxq_chain *uc; 673 struct umtxq_queue *uh; 674 struct umtx_q *uq; 675 int ret; 676 677 ret = 0; 678 uc = umtxq_getchain(key); 679 UMTXQ_LOCKED_ASSERT(uc); 680 uh = umtxq_queue_lookup(key, q); 681 if (uh != NULL) { 682 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 683 umtxq_remove_queue(uq, q); 684 wakeup(uq); 685 if (++ret >= n_wake) 686 return (ret); 687 } 688 } 689 return (ret); 690 } 691 692 693 /* 694 * Wake up specified thread. 695 */ 696 static inline void 697 umtxq_signal_thread(struct umtx_q *uq) 698 { 699 struct umtxq_chain *uc; 700 701 uc = umtxq_getchain(&uq->uq_key); 702 UMTXQ_LOCKED_ASSERT(uc); 703 umtxq_remove(uq); 704 wakeup(uq); 705 } 706 707 static inline int 708 tstohz(const struct timespec *tsp) 709 { 710 struct timeval tv; 711 712 TIMESPEC_TO_TIMEVAL(&tv, tsp); 713 return tvtohz(&tv); 714 } 715 716 static void 717 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 718 const struct timespec *timeout) 719 { 720 721 timo->clockid = clockid; 722 if (!absolute) { 723 kern_clock_gettime(curthread, clockid, &timo->end); 724 timo->cur = timo->end; 725 timespecadd(&timo->end, timeout); 726 } else { 727 timo->end = *timeout; 728 kern_clock_gettime(curthread, clockid, &timo->cur); 729 } 730 } 731 732 static void 733 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 734 { 735 736 abs_timeout_init(timo, umtxtime->_clockid, 737 (umtxtime->_flags & UMTX_ABSTIME) != 0, 738 &umtxtime->_timeout); 739 } 740 741 static inline void 742 abs_timeout_update(struct abs_timeout *timo) 743 { 744 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 745 } 746 747 static int 748 abs_timeout_gethz(struct abs_timeout *timo) 749 { 750 struct timespec tts; 751 752 if (timespeccmp(&timo->end, &timo->cur, <=)) 753 return (-1); 754 tts = timo->end; 755 timespecsub(&tts, &timo->cur); 756 return (tstohz(&tts)); 757 } 758 759 /* 760 * Put thread into sleep state, before sleeping, check if 761 * thread was removed from umtx queue. 762 */ 763 static inline int 764 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 765 { 766 struct umtxq_chain *uc; 767 int error, timo; 768 769 uc = umtxq_getchain(&uq->uq_key); 770 UMTXQ_LOCKED_ASSERT(uc); 771 for (;;) { 772 if (!(uq->uq_flags & UQF_UMTXQ)) 773 return (0); 774 if (abstime != NULL) { 775 timo = abs_timeout_gethz(abstime); 776 if (timo < 0) 777 return (ETIMEDOUT); 778 } else 779 timo = 0; 780 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 781 if (error != EWOULDBLOCK) { 782 umtxq_lock(&uq->uq_key); 783 break; 784 } 785 if (abstime != NULL) 786 abs_timeout_update(abstime); 787 umtxq_lock(&uq->uq_key); 788 } 789 return (error); 790 } 791 792 /* 793 * Convert userspace address into unique logical address. 794 */ 795 int 796 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 797 { 798 struct thread *td = curthread; 799 vm_map_t map; 800 vm_map_entry_t entry; 801 vm_pindex_t pindex; 802 vm_prot_t prot; 803 boolean_t wired; 804 805 key->type = type; 806 if (share == THREAD_SHARE) { 807 key->shared = 0; 808 key->info.private.vs = td->td_proc->p_vmspace; 809 key->info.private.addr = (uintptr_t)addr; 810 } else { 811 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 812 map = &td->td_proc->p_vmspace->vm_map; 813 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 814 &entry, &key->info.shared.object, &pindex, &prot, 815 &wired) != KERN_SUCCESS) { 816 return EFAULT; 817 } 818 819 if ((share == PROCESS_SHARE) || 820 (share == AUTO_SHARE && 821 VM_INHERIT_SHARE == entry->inheritance)) { 822 key->shared = 1; 823 key->info.shared.offset = entry->offset + entry->start - 824 (vm_offset_t)addr; 825 vm_object_reference(key->info.shared.object); 826 } else { 827 key->shared = 0; 828 key->info.private.vs = td->td_proc->p_vmspace; 829 key->info.private.addr = (uintptr_t)addr; 830 } 831 vm_map_lookup_done(map, entry); 832 } 833 834 umtxq_hash(key); 835 return (0); 836 } 837 838 /* 839 * Release key. 840 */ 841 void 842 umtx_key_release(struct umtx_key *key) 843 { 844 if (key->shared) 845 vm_object_deallocate(key->info.shared.object); 846 } 847 848 /* 849 * Fetch and compare value, sleep on the address if value is not changed. 850 */ 851 static int 852 do_wait(struct thread *td, void *addr, u_long id, 853 struct _umtx_time *timeout, int compat32, int is_private) 854 { 855 struct abs_timeout timo; 856 struct umtx_q *uq; 857 u_long tmp; 858 uint32_t tmp32; 859 int error = 0; 860 861 uq = td->td_umtxq; 862 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 863 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 864 return (error); 865 866 if (timeout != NULL) 867 abs_timeout_init2(&timo, timeout); 868 869 umtxq_lock(&uq->uq_key); 870 umtxq_insert(uq); 871 umtxq_unlock(&uq->uq_key); 872 if (compat32 == 0) { 873 error = fueword(addr, &tmp); 874 if (error != 0) 875 error = EFAULT; 876 } else { 877 error = fueword32(addr, &tmp32); 878 if (error == 0) 879 tmp = tmp32; 880 else 881 error = EFAULT; 882 } 883 umtxq_lock(&uq->uq_key); 884 if (error == 0) { 885 if (tmp == id) 886 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 887 NULL : &timo); 888 if ((uq->uq_flags & UQF_UMTXQ) == 0) 889 error = 0; 890 else 891 umtxq_remove(uq); 892 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 893 umtxq_remove(uq); 894 } 895 umtxq_unlock(&uq->uq_key); 896 umtx_key_release(&uq->uq_key); 897 if (error == ERESTART) 898 error = EINTR; 899 return (error); 900 } 901 902 /* 903 * Wake up threads sleeping on the specified address. 904 */ 905 int 906 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 907 { 908 struct umtx_key key; 909 int ret; 910 911 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 912 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 913 return (ret); 914 umtxq_lock(&key); 915 ret = umtxq_signal(&key, n_wake); 916 umtxq_unlock(&key); 917 umtx_key_release(&key); 918 return (0); 919 } 920 921 /* 922 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 923 */ 924 static int 925 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 926 struct _umtx_time *timeout, int mode) 927 { 928 struct abs_timeout timo; 929 struct umtx_q *uq; 930 uint32_t owner, old, id; 931 int error, rv; 932 933 id = td->td_tid; 934 uq = td->td_umtxq; 935 error = 0; 936 if (timeout != NULL) 937 abs_timeout_init2(&timo, timeout); 938 939 /* 940 * Care must be exercised when dealing with umtx structure. It 941 * can fault on any access. 942 */ 943 for (;;) { 944 rv = fueword32(&m->m_owner, &owner); 945 if (rv == -1) 946 return (EFAULT); 947 if (mode == _UMUTEX_WAIT) { 948 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 949 return (0); 950 } else { 951 /* 952 * Try the uncontested case. This should be done in userland. 953 */ 954 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 955 &owner, id); 956 /* The address was invalid. */ 957 if (rv == -1) 958 return (EFAULT); 959 960 /* The acquire succeeded. */ 961 if (owner == UMUTEX_UNOWNED) 962 return (0); 963 964 /* If no one owns it but it is contested try to acquire it. */ 965 if (owner == UMUTEX_CONTESTED) { 966 rv = casueword32(&m->m_owner, 967 UMUTEX_CONTESTED, &owner, 968 id | UMUTEX_CONTESTED); 969 /* The address was invalid. */ 970 if (rv == -1) 971 return (EFAULT); 972 973 if (owner == UMUTEX_CONTESTED) 974 return (0); 975 976 rv = umtxq_check_susp(td); 977 if (rv != 0) 978 return (rv); 979 980 /* If this failed the lock has changed, restart. */ 981 continue; 982 } 983 } 984 985 if (mode == _UMUTEX_TRY) 986 return (EBUSY); 987 988 /* 989 * If we caught a signal, we have retried and now 990 * exit immediately. 991 */ 992 if (error != 0) 993 return (error); 994 995 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 996 GET_SHARE(flags), &uq->uq_key)) != 0) 997 return (error); 998 999 umtxq_lock(&uq->uq_key); 1000 umtxq_busy(&uq->uq_key); 1001 umtxq_insert(uq); 1002 umtxq_unlock(&uq->uq_key); 1003 1004 /* 1005 * Set the contested bit so that a release in user space 1006 * knows to use the system call for unlock. If this fails 1007 * either some one else has acquired the lock or it has been 1008 * released. 1009 */ 1010 rv = casueword32(&m->m_owner, owner, &old, 1011 owner | UMUTEX_CONTESTED); 1012 1013 /* The address was invalid. */ 1014 if (rv == -1) { 1015 umtxq_lock(&uq->uq_key); 1016 umtxq_remove(uq); 1017 umtxq_unbusy(&uq->uq_key); 1018 umtxq_unlock(&uq->uq_key); 1019 umtx_key_release(&uq->uq_key); 1020 return (EFAULT); 1021 } 1022 1023 /* 1024 * We set the contested bit, sleep. Otherwise the lock changed 1025 * and we need to retry or we lost a race to the thread 1026 * unlocking the umtx. 1027 */ 1028 umtxq_lock(&uq->uq_key); 1029 umtxq_unbusy(&uq->uq_key); 1030 if (old == owner) 1031 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1032 NULL : &timo); 1033 umtxq_remove(uq); 1034 umtxq_unlock(&uq->uq_key); 1035 umtx_key_release(&uq->uq_key); 1036 1037 if (error == 0) 1038 error = umtxq_check_susp(td); 1039 } 1040 1041 return (0); 1042 } 1043 1044 /* 1045 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1046 */ 1047 static int 1048 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1049 { 1050 struct umtx_key key; 1051 uint32_t owner, old, id; 1052 int error; 1053 int count; 1054 1055 id = td->td_tid; 1056 /* 1057 * Make sure we own this mtx. 1058 */ 1059 error = fueword32(&m->m_owner, &owner); 1060 if (error == -1) 1061 return (EFAULT); 1062 1063 if ((owner & ~UMUTEX_CONTESTED) != id) 1064 return (EPERM); 1065 1066 if ((owner & UMUTEX_CONTESTED) == 0) { 1067 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1068 if (error == -1) 1069 return (EFAULT); 1070 if (old == owner) 1071 return (0); 1072 owner = old; 1073 } 1074 1075 /* We should only ever be in here for contested locks */ 1076 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1077 &key)) != 0) 1078 return (error); 1079 1080 umtxq_lock(&key); 1081 umtxq_busy(&key); 1082 count = umtxq_count(&key); 1083 umtxq_unlock(&key); 1084 1085 /* 1086 * When unlocking the umtx, it must be marked as unowned if 1087 * there is zero or one thread only waiting for it. 1088 * Otherwise, it must be marked as contested. 1089 */ 1090 error = casueword32(&m->m_owner, owner, &old, 1091 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1092 umtxq_lock(&key); 1093 umtxq_signal(&key,1); 1094 umtxq_unbusy(&key); 1095 umtxq_unlock(&key); 1096 umtx_key_release(&key); 1097 if (error == -1) 1098 return (EFAULT); 1099 if (old != owner) 1100 return (EINVAL); 1101 return (0); 1102 } 1103 1104 /* 1105 * Check if the mutex is available and wake up a waiter, 1106 * only for simple mutex. 1107 */ 1108 static int 1109 do_wake_umutex(struct thread *td, struct umutex *m) 1110 { 1111 struct umtx_key key; 1112 uint32_t owner; 1113 uint32_t flags; 1114 int error; 1115 int count; 1116 1117 error = fueword32(&m->m_owner, &owner); 1118 if (error == -1) 1119 return (EFAULT); 1120 1121 if ((owner & ~UMUTEX_CONTESTED) != 0) 1122 return (0); 1123 1124 error = fueword32(&m->m_flags, &flags); 1125 if (error == -1) 1126 return (EFAULT); 1127 1128 /* We should only ever be in here for contested locks */ 1129 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1130 &key)) != 0) 1131 return (error); 1132 1133 umtxq_lock(&key); 1134 umtxq_busy(&key); 1135 count = umtxq_count(&key); 1136 umtxq_unlock(&key); 1137 1138 if (count <= 1) { 1139 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1140 UMUTEX_UNOWNED); 1141 if (error == -1) 1142 error = EFAULT; 1143 } 1144 1145 umtxq_lock(&key); 1146 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1147 umtxq_signal(&key, 1); 1148 umtxq_unbusy(&key); 1149 umtxq_unlock(&key); 1150 umtx_key_release(&key); 1151 return (error); 1152 } 1153 1154 /* 1155 * Check if the mutex has waiters and tries to fix contention bit. 1156 */ 1157 static int 1158 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1159 { 1160 struct umtx_key key; 1161 uint32_t owner, old; 1162 int type; 1163 int error; 1164 int count; 1165 1166 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1167 case 0: 1168 type = TYPE_NORMAL_UMUTEX; 1169 break; 1170 case UMUTEX_PRIO_INHERIT: 1171 type = TYPE_PI_UMUTEX; 1172 break; 1173 case UMUTEX_PRIO_PROTECT: 1174 type = TYPE_PP_UMUTEX; 1175 break; 1176 default: 1177 return (EINVAL); 1178 } 1179 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1180 &key)) != 0) 1181 return (error); 1182 1183 owner = 0; 1184 umtxq_lock(&key); 1185 umtxq_busy(&key); 1186 count = umtxq_count(&key); 1187 umtxq_unlock(&key); 1188 /* 1189 * Only repair contention bit if there is a waiter, this means the mutex 1190 * is still being referenced by userland code, otherwise don't update 1191 * any memory. 1192 */ 1193 if (count > 1) { 1194 error = fueword32(&m->m_owner, &owner); 1195 if (error == -1) 1196 error = EFAULT; 1197 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1198 error = casueword32(&m->m_owner, owner, &old, 1199 owner | UMUTEX_CONTESTED); 1200 if (error == -1) { 1201 error = EFAULT; 1202 break; 1203 } 1204 if (old == owner) 1205 break; 1206 owner = old; 1207 error = umtxq_check_susp(td); 1208 if (error != 0) 1209 break; 1210 } 1211 } else if (count == 1) { 1212 error = fueword32(&m->m_owner, &owner); 1213 if (error == -1) 1214 error = EFAULT; 1215 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1216 (owner & UMUTEX_CONTESTED) == 0) { 1217 error = casueword32(&m->m_owner, owner, &old, 1218 owner | UMUTEX_CONTESTED); 1219 if (error == -1) { 1220 error = EFAULT; 1221 break; 1222 } 1223 if (old == owner) 1224 break; 1225 owner = old; 1226 error = umtxq_check_susp(td); 1227 if (error != 0) 1228 break; 1229 } 1230 } 1231 umtxq_lock(&key); 1232 if (error == EFAULT) { 1233 umtxq_signal(&key, INT_MAX); 1234 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1235 umtxq_signal(&key, 1); 1236 umtxq_unbusy(&key); 1237 umtxq_unlock(&key); 1238 umtx_key_release(&key); 1239 return (error); 1240 } 1241 1242 static inline struct umtx_pi * 1243 umtx_pi_alloc(int flags) 1244 { 1245 struct umtx_pi *pi; 1246 1247 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1248 TAILQ_INIT(&pi->pi_blocked); 1249 atomic_add_int(&umtx_pi_allocated, 1); 1250 return (pi); 1251 } 1252 1253 static inline void 1254 umtx_pi_free(struct umtx_pi *pi) 1255 { 1256 uma_zfree(umtx_pi_zone, pi); 1257 atomic_add_int(&umtx_pi_allocated, -1); 1258 } 1259 1260 /* 1261 * Adjust the thread's position on a pi_state after its priority has been 1262 * changed. 1263 */ 1264 static int 1265 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1266 { 1267 struct umtx_q *uq, *uq1, *uq2; 1268 struct thread *td1; 1269 1270 mtx_assert(&umtx_lock, MA_OWNED); 1271 if (pi == NULL) 1272 return (0); 1273 1274 uq = td->td_umtxq; 1275 1276 /* 1277 * Check if the thread needs to be moved on the blocked chain. 1278 * It needs to be moved if either its priority is lower than 1279 * the previous thread or higher than the next thread. 1280 */ 1281 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1282 uq2 = TAILQ_NEXT(uq, uq_lockq); 1283 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1284 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1285 /* 1286 * Remove thread from blocked chain and determine where 1287 * it should be moved to. 1288 */ 1289 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1290 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1291 td1 = uq1->uq_thread; 1292 MPASS(td1->td_proc->p_magic == P_MAGIC); 1293 if (UPRI(td1) > UPRI(td)) 1294 break; 1295 } 1296 1297 if (uq1 == NULL) 1298 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1299 else 1300 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1301 } 1302 return (1); 1303 } 1304 1305 /* 1306 * Propagate priority when a thread is blocked on POSIX 1307 * PI mutex. 1308 */ 1309 static void 1310 umtx_propagate_priority(struct thread *td) 1311 { 1312 struct umtx_q *uq; 1313 struct umtx_pi *pi; 1314 int pri; 1315 1316 mtx_assert(&umtx_lock, MA_OWNED); 1317 pri = UPRI(td); 1318 uq = td->td_umtxq; 1319 pi = uq->uq_pi_blocked; 1320 if (pi == NULL) 1321 return; 1322 1323 for (;;) { 1324 td = pi->pi_owner; 1325 if (td == NULL || td == curthread) 1326 return; 1327 1328 MPASS(td->td_proc != NULL); 1329 MPASS(td->td_proc->p_magic == P_MAGIC); 1330 1331 thread_lock(td); 1332 if (td->td_lend_user_pri > pri) 1333 sched_lend_user_prio(td, pri); 1334 else { 1335 thread_unlock(td); 1336 break; 1337 } 1338 thread_unlock(td); 1339 1340 /* 1341 * Pick up the lock that td is blocked on. 1342 */ 1343 uq = td->td_umtxq; 1344 pi = uq->uq_pi_blocked; 1345 if (pi == NULL) 1346 break; 1347 /* Resort td on the list if needed. */ 1348 umtx_pi_adjust_thread(pi, td); 1349 } 1350 } 1351 1352 /* 1353 * Unpropagate priority for a PI mutex when a thread blocked on 1354 * it is interrupted by signal or resumed by others. 1355 */ 1356 static void 1357 umtx_repropagate_priority(struct umtx_pi *pi) 1358 { 1359 struct umtx_q *uq, *uq_owner; 1360 struct umtx_pi *pi2; 1361 int pri; 1362 1363 mtx_assert(&umtx_lock, MA_OWNED); 1364 1365 while (pi != NULL && pi->pi_owner != NULL) { 1366 pri = PRI_MAX; 1367 uq_owner = pi->pi_owner->td_umtxq; 1368 1369 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1370 uq = TAILQ_FIRST(&pi2->pi_blocked); 1371 if (uq != NULL) { 1372 if (pri > UPRI(uq->uq_thread)) 1373 pri = UPRI(uq->uq_thread); 1374 } 1375 } 1376 1377 if (pri > uq_owner->uq_inherited_pri) 1378 pri = uq_owner->uq_inherited_pri; 1379 thread_lock(pi->pi_owner); 1380 sched_lend_user_prio(pi->pi_owner, pri); 1381 thread_unlock(pi->pi_owner); 1382 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1383 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1384 } 1385 } 1386 1387 /* 1388 * Insert a PI mutex into owned list. 1389 */ 1390 static void 1391 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1392 { 1393 struct umtx_q *uq_owner; 1394 1395 uq_owner = owner->td_umtxq; 1396 mtx_assert(&umtx_lock, MA_OWNED); 1397 if (pi->pi_owner != NULL) 1398 panic("pi_ower != NULL"); 1399 pi->pi_owner = owner; 1400 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1401 } 1402 1403 /* 1404 * Claim ownership of a PI mutex. 1405 */ 1406 static int 1407 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1408 { 1409 struct umtx_q *uq, *uq_owner; 1410 1411 uq_owner = owner->td_umtxq; 1412 mtx_lock_spin(&umtx_lock); 1413 if (pi->pi_owner == owner) { 1414 mtx_unlock_spin(&umtx_lock); 1415 return (0); 1416 } 1417 1418 if (pi->pi_owner != NULL) { 1419 /* 1420 * userland may have already messed the mutex, sigh. 1421 */ 1422 mtx_unlock_spin(&umtx_lock); 1423 return (EPERM); 1424 } 1425 umtx_pi_setowner(pi, owner); 1426 uq = TAILQ_FIRST(&pi->pi_blocked); 1427 if (uq != NULL) { 1428 int pri; 1429 1430 pri = UPRI(uq->uq_thread); 1431 thread_lock(owner); 1432 if (pri < UPRI(owner)) 1433 sched_lend_user_prio(owner, pri); 1434 thread_unlock(owner); 1435 } 1436 mtx_unlock_spin(&umtx_lock); 1437 return (0); 1438 } 1439 1440 /* 1441 * Adjust a thread's order position in its blocked PI mutex, 1442 * this may result new priority propagating process. 1443 */ 1444 void 1445 umtx_pi_adjust(struct thread *td, u_char oldpri) 1446 { 1447 struct umtx_q *uq; 1448 struct umtx_pi *pi; 1449 1450 uq = td->td_umtxq; 1451 mtx_lock_spin(&umtx_lock); 1452 /* 1453 * Pick up the lock that td is blocked on. 1454 */ 1455 pi = uq->uq_pi_blocked; 1456 if (pi != NULL) { 1457 umtx_pi_adjust_thread(pi, td); 1458 umtx_repropagate_priority(pi); 1459 } 1460 mtx_unlock_spin(&umtx_lock); 1461 } 1462 1463 /* 1464 * Sleep on a PI mutex. 1465 */ 1466 static int 1467 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1468 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1469 { 1470 struct umtxq_chain *uc; 1471 struct thread *td, *td1; 1472 struct umtx_q *uq1; 1473 int pri; 1474 int error = 0; 1475 1476 td = uq->uq_thread; 1477 KASSERT(td == curthread, ("inconsistent uq_thread")); 1478 uc = umtxq_getchain(&uq->uq_key); 1479 UMTXQ_LOCKED_ASSERT(uc); 1480 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1481 umtxq_insert(uq); 1482 mtx_lock_spin(&umtx_lock); 1483 if (pi->pi_owner == NULL) { 1484 mtx_unlock_spin(&umtx_lock); 1485 /* XXX Only look up thread in current process. */ 1486 td1 = tdfind(owner, curproc->p_pid); 1487 mtx_lock_spin(&umtx_lock); 1488 if (td1 != NULL) { 1489 if (pi->pi_owner == NULL) 1490 umtx_pi_setowner(pi, td1); 1491 PROC_UNLOCK(td1->td_proc); 1492 } 1493 } 1494 1495 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1496 pri = UPRI(uq1->uq_thread); 1497 if (pri > UPRI(td)) 1498 break; 1499 } 1500 1501 if (uq1 != NULL) 1502 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1503 else 1504 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1505 1506 uq->uq_pi_blocked = pi; 1507 thread_lock(td); 1508 td->td_flags |= TDF_UPIBLOCKED; 1509 thread_unlock(td); 1510 umtx_propagate_priority(td); 1511 mtx_unlock_spin(&umtx_lock); 1512 umtxq_unbusy(&uq->uq_key); 1513 1514 error = umtxq_sleep(uq, wmesg, timo); 1515 umtxq_remove(uq); 1516 1517 mtx_lock_spin(&umtx_lock); 1518 uq->uq_pi_blocked = NULL; 1519 thread_lock(td); 1520 td->td_flags &= ~TDF_UPIBLOCKED; 1521 thread_unlock(td); 1522 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1523 umtx_repropagate_priority(pi); 1524 mtx_unlock_spin(&umtx_lock); 1525 umtxq_unlock(&uq->uq_key); 1526 1527 return (error); 1528 } 1529 1530 /* 1531 * Add reference count for a PI mutex. 1532 */ 1533 static void 1534 umtx_pi_ref(struct umtx_pi *pi) 1535 { 1536 struct umtxq_chain *uc; 1537 1538 uc = umtxq_getchain(&pi->pi_key); 1539 UMTXQ_LOCKED_ASSERT(uc); 1540 pi->pi_refcount++; 1541 } 1542 1543 /* 1544 * Decrease reference count for a PI mutex, if the counter 1545 * is decreased to zero, its memory space is freed. 1546 */ 1547 static void 1548 umtx_pi_unref(struct umtx_pi *pi) 1549 { 1550 struct umtxq_chain *uc; 1551 1552 uc = umtxq_getchain(&pi->pi_key); 1553 UMTXQ_LOCKED_ASSERT(uc); 1554 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1555 if (--pi->pi_refcount == 0) { 1556 mtx_lock_spin(&umtx_lock); 1557 if (pi->pi_owner != NULL) { 1558 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1559 pi, pi_link); 1560 pi->pi_owner = NULL; 1561 } 1562 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1563 ("blocked queue not empty")); 1564 mtx_unlock_spin(&umtx_lock); 1565 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1566 umtx_pi_free(pi); 1567 } 1568 } 1569 1570 /* 1571 * Find a PI mutex in hash table. 1572 */ 1573 static struct umtx_pi * 1574 umtx_pi_lookup(struct umtx_key *key) 1575 { 1576 struct umtxq_chain *uc; 1577 struct umtx_pi *pi; 1578 1579 uc = umtxq_getchain(key); 1580 UMTXQ_LOCKED_ASSERT(uc); 1581 1582 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1583 if (umtx_key_match(&pi->pi_key, key)) { 1584 return (pi); 1585 } 1586 } 1587 return (NULL); 1588 } 1589 1590 /* 1591 * Insert a PI mutex into hash table. 1592 */ 1593 static inline void 1594 umtx_pi_insert(struct umtx_pi *pi) 1595 { 1596 struct umtxq_chain *uc; 1597 1598 uc = umtxq_getchain(&pi->pi_key); 1599 UMTXQ_LOCKED_ASSERT(uc); 1600 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1601 } 1602 1603 /* 1604 * Lock a PI mutex. 1605 */ 1606 static int 1607 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1608 struct _umtx_time *timeout, int try) 1609 { 1610 struct abs_timeout timo; 1611 struct umtx_q *uq; 1612 struct umtx_pi *pi, *new_pi; 1613 uint32_t id, owner, old; 1614 int error, rv; 1615 1616 id = td->td_tid; 1617 uq = td->td_umtxq; 1618 1619 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1620 &uq->uq_key)) != 0) 1621 return (error); 1622 1623 if (timeout != NULL) 1624 abs_timeout_init2(&timo, timeout); 1625 1626 umtxq_lock(&uq->uq_key); 1627 pi = umtx_pi_lookup(&uq->uq_key); 1628 if (pi == NULL) { 1629 new_pi = umtx_pi_alloc(M_NOWAIT); 1630 if (new_pi == NULL) { 1631 umtxq_unlock(&uq->uq_key); 1632 new_pi = umtx_pi_alloc(M_WAITOK); 1633 umtxq_lock(&uq->uq_key); 1634 pi = umtx_pi_lookup(&uq->uq_key); 1635 if (pi != NULL) { 1636 umtx_pi_free(new_pi); 1637 new_pi = NULL; 1638 } 1639 } 1640 if (new_pi != NULL) { 1641 new_pi->pi_key = uq->uq_key; 1642 umtx_pi_insert(new_pi); 1643 pi = new_pi; 1644 } 1645 } 1646 umtx_pi_ref(pi); 1647 umtxq_unlock(&uq->uq_key); 1648 1649 /* 1650 * Care must be exercised when dealing with umtx structure. It 1651 * can fault on any access. 1652 */ 1653 for (;;) { 1654 /* 1655 * Try the uncontested case. This should be done in userland. 1656 */ 1657 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1658 /* The address was invalid. */ 1659 if (rv == -1) { 1660 error = EFAULT; 1661 break; 1662 } 1663 1664 /* The acquire succeeded. */ 1665 if (owner == UMUTEX_UNOWNED) { 1666 error = 0; 1667 break; 1668 } 1669 1670 /* If no one owns it but it is contested try to acquire it. */ 1671 if (owner == UMUTEX_CONTESTED) { 1672 rv = casueword32(&m->m_owner, 1673 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1674 /* The address was invalid. */ 1675 if (rv == -1) { 1676 error = EFAULT; 1677 break; 1678 } 1679 1680 if (owner == UMUTEX_CONTESTED) { 1681 umtxq_lock(&uq->uq_key); 1682 umtxq_busy(&uq->uq_key); 1683 error = umtx_pi_claim(pi, td); 1684 umtxq_unbusy(&uq->uq_key); 1685 umtxq_unlock(&uq->uq_key); 1686 break; 1687 } 1688 1689 error = umtxq_check_susp(td); 1690 if (error != 0) 1691 break; 1692 1693 /* If this failed the lock has changed, restart. */ 1694 continue; 1695 } 1696 1697 if (try != 0) { 1698 error = EBUSY; 1699 break; 1700 } 1701 1702 /* 1703 * If we caught a signal, we have retried and now 1704 * exit immediately. 1705 */ 1706 if (error != 0) 1707 break; 1708 1709 umtxq_lock(&uq->uq_key); 1710 umtxq_busy(&uq->uq_key); 1711 umtxq_unlock(&uq->uq_key); 1712 1713 /* 1714 * Set the contested bit so that a release in user space 1715 * knows to use the system call for unlock. If this fails 1716 * either some one else has acquired the lock or it has been 1717 * released. 1718 */ 1719 rv = casueword32(&m->m_owner, owner, &old, 1720 owner | UMUTEX_CONTESTED); 1721 1722 /* The address was invalid. */ 1723 if (rv == -1) { 1724 umtxq_unbusy_unlocked(&uq->uq_key); 1725 error = EFAULT; 1726 break; 1727 } 1728 1729 umtxq_lock(&uq->uq_key); 1730 /* 1731 * We set the contested bit, sleep. Otherwise the lock changed 1732 * and we need to retry or we lost a race to the thread 1733 * unlocking the umtx. 1734 */ 1735 if (old == owner) { 1736 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1737 "umtxpi", timeout == NULL ? NULL : &timo); 1738 if (error != 0) 1739 continue; 1740 } else { 1741 umtxq_unbusy(&uq->uq_key); 1742 umtxq_unlock(&uq->uq_key); 1743 } 1744 1745 error = umtxq_check_susp(td); 1746 if (error != 0) 1747 break; 1748 } 1749 1750 umtxq_lock(&uq->uq_key); 1751 umtx_pi_unref(pi); 1752 umtxq_unlock(&uq->uq_key); 1753 1754 umtx_key_release(&uq->uq_key); 1755 return (error); 1756 } 1757 1758 /* 1759 * Unlock a PI mutex. 1760 */ 1761 static int 1762 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1763 { 1764 struct umtx_key key; 1765 struct umtx_q *uq_first, *uq_first2, *uq_me; 1766 struct umtx_pi *pi, *pi2; 1767 uint32_t owner, old, id; 1768 int error; 1769 int count; 1770 int pri; 1771 1772 id = td->td_tid; 1773 /* 1774 * Make sure we own this mtx. 1775 */ 1776 error = fueword32(&m->m_owner, &owner); 1777 if (error == -1) 1778 return (EFAULT); 1779 1780 if ((owner & ~UMUTEX_CONTESTED) != id) 1781 return (EPERM); 1782 1783 /* This should be done in userland */ 1784 if ((owner & UMUTEX_CONTESTED) == 0) { 1785 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1786 if (error == -1) 1787 return (EFAULT); 1788 if (old == owner) 1789 return (0); 1790 owner = old; 1791 } 1792 1793 /* We should only ever be in here for contested locks */ 1794 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1795 &key)) != 0) 1796 return (error); 1797 1798 umtxq_lock(&key); 1799 umtxq_busy(&key); 1800 count = umtxq_count_pi(&key, &uq_first); 1801 if (uq_first != NULL) { 1802 mtx_lock_spin(&umtx_lock); 1803 pi = uq_first->uq_pi_blocked; 1804 KASSERT(pi != NULL, ("pi == NULL?")); 1805 if (pi->pi_owner != curthread) { 1806 mtx_unlock_spin(&umtx_lock); 1807 umtxq_unbusy(&key); 1808 umtxq_unlock(&key); 1809 umtx_key_release(&key); 1810 /* userland messed the mutex */ 1811 return (EPERM); 1812 } 1813 uq_me = curthread->td_umtxq; 1814 pi->pi_owner = NULL; 1815 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1816 /* get highest priority thread which is still sleeping. */ 1817 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1818 while (uq_first != NULL && 1819 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 1820 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 1821 } 1822 pri = PRI_MAX; 1823 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1824 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1825 if (uq_first2 != NULL) { 1826 if (pri > UPRI(uq_first2->uq_thread)) 1827 pri = UPRI(uq_first2->uq_thread); 1828 } 1829 } 1830 thread_lock(curthread); 1831 sched_lend_user_prio(curthread, pri); 1832 thread_unlock(curthread); 1833 mtx_unlock_spin(&umtx_lock); 1834 if (uq_first) 1835 umtxq_signal_thread(uq_first); 1836 } 1837 umtxq_unlock(&key); 1838 1839 /* 1840 * When unlocking the umtx, it must be marked as unowned if 1841 * there is zero or one thread only waiting for it. 1842 * Otherwise, it must be marked as contested. 1843 */ 1844 error = casueword32(&m->m_owner, owner, &old, 1845 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1846 1847 umtxq_unbusy_unlocked(&key); 1848 umtx_key_release(&key); 1849 if (error == -1) 1850 return (EFAULT); 1851 if (old != owner) 1852 return (EINVAL); 1853 return (0); 1854 } 1855 1856 /* 1857 * Lock a PP mutex. 1858 */ 1859 static int 1860 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 1861 struct _umtx_time *timeout, int try) 1862 { 1863 struct abs_timeout timo; 1864 struct umtx_q *uq, *uq2; 1865 struct umtx_pi *pi; 1866 uint32_t ceiling; 1867 uint32_t owner, id; 1868 int error, pri, old_inherited_pri, su, rv; 1869 1870 id = td->td_tid; 1871 uq = td->td_umtxq; 1872 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1873 &uq->uq_key)) != 0) 1874 return (error); 1875 1876 if (timeout != NULL) 1877 abs_timeout_init2(&timo, timeout); 1878 1879 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1880 for (;;) { 1881 old_inherited_pri = uq->uq_inherited_pri; 1882 umtxq_lock(&uq->uq_key); 1883 umtxq_busy(&uq->uq_key); 1884 umtxq_unlock(&uq->uq_key); 1885 1886 rv = fueword32(&m->m_ceilings[0], &ceiling); 1887 if (rv == -1) { 1888 error = EFAULT; 1889 goto out; 1890 } 1891 ceiling = RTP_PRIO_MAX - ceiling; 1892 if (ceiling > RTP_PRIO_MAX) { 1893 error = EINVAL; 1894 goto out; 1895 } 1896 1897 mtx_lock_spin(&umtx_lock); 1898 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1899 mtx_unlock_spin(&umtx_lock); 1900 error = EINVAL; 1901 goto out; 1902 } 1903 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1904 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1905 thread_lock(td); 1906 if (uq->uq_inherited_pri < UPRI(td)) 1907 sched_lend_user_prio(td, uq->uq_inherited_pri); 1908 thread_unlock(td); 1909 } 1910 mtx_unlock_spin(&umtx_lock); 1911 1912 rv = casueword32(&m->m_owner, 1913 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1914 /* The address was invalid. */ 1915 if (rv == -1) { 1916 error = EFAULT; 1917 break; 1918 } 1919 1920 if (owner == UMUTEX_CONTESTED) { 1921 error = 0; 1922 break; 1923 } 1924 1925 if (try != 0) { 1926 error = EBUSY; 1927 break; 1928 } 1929 1930 /* 1931 * If we caught a signal, we have retried and now 1932 * exit immediately. 1933 */ 1934 if (error != 0) 1935 break; 1936 1937 umtxq_lock(&uq->uq_key); 1938 umtxq_insert(uq); 1939 umtxq_unbusy(&uq->uq_key); 1940 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 1941 NULL : &timo); 1942 umtxq_remove(uq); 1943 umtxq_unlock(&uq->uq_key); 1944 1945 mtx_lock_spin(&umtx_lock); 1946 uq->uq_inherited_pri = old_inherited_pri; 1947 pri = PRI_MAX; 1948 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1949 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1950 if (uq2 != NULL) { 1951 if (pri > UPRI(uq2->uq_thread)) 1952 pri = UPRI(uq2->uq_thread); 1953 } 1954 } 1955 if (pri > uq->uq_inherited_pri) 1956 pri = uq->uq_inherited_pri; 1957 thread_lock(td); 1958 sched_lend_user_prio(td, pri); 1959 thread_unlock(td); 1960 mtx_unlock_spin(&umtx_lock); 1961 } 1962 1963 if (error != 0) { 1964 mtx_lock_spin(&umtx_lock); 1965 uq->uq_inherited_pri = old_inherited_pri; 1966 pri = PRI_MAX; 1967 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1968 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1969 if (uq2 != NULL) { 1970 if (pri > UPRI(uq2->uq_thread)) 1971 pri = UPRI(uq2->uq_thread); 1972 } 1973 } 1974 if (pri > uq->uq_inherited_pri) 1975 pri = uq->uq_inherited_pri; 1976 thread_lock(td); 1977 sched_lend_user_prio(td, pri); 1978 thread_unlock(td); 1979 mtx_unlock_spin(&umtx_lock); 1980 } 1981 1982 out: 1983 umtxq_unbusy_unlocked(&uq->uq_key); 1984 umtx_key_release(&uq->uq_key); 1985 return (error); 1986 } 1987 1988 /* 1989 * Unlock a PP mutex. 1990 */ 1991 static int 1992 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1993 { 1994 struct umtx_key key; 1995 struct umtx_q *uq, *uq2; 1996 struct umtx_pi *pi; 1997 uint32_t owner, id; 1998 uint32_t rceiling; 1999 int error, pri, new_inherited_pri, su; 2000 2001 id = td->td_tid; 2002 uq = td->td_umtxq; 2003 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2004 2005 /* 2006 * Make sure we own this mtx. 2007 */ 2008 error = fueword32(&m->m_owner, &owner); 2009 if (error == -1) 2010 return (EFAULT); 2011 2012 if ((owner & ~UMUTEX_CONTESTED) != id) 2013 return (EPERM); 2014 2015 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2016 if (error != 0) 2017 return (error); 2018 2019 if (rceiling == -1) 2020 new_inherited_pri = PRI_MAX; 2021 else { 2022 rceiling = RTP_PRIO_MAX - rceiling; 2023 if (rceiling > RTP_PRIO_MAX) 2024 return (EINVAL); 2025 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2026 } 2027 2028 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2029 &key)) != 0) 2030 return (error); 2031 umtxq_lock(&key); 2032 umtxq_busy(&key); 2033 umtxq_unlock(&key); 2034 /* 2035 * For priority protected mutex, always set unlocked state 2036 * to UMUTEX_CONTESTED, so that userland always enters kernel 2037 * to lock the mutex, it is necessary because thread priority 2038 * has to be adjusted for such mutex. 2039 */ 2040 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 2041 2042 umtxq_lock(&key); 2043 if (error == 0) 2044 umtxq_signal(&key, 1); 2045 umtxq_unbusy(&key); 2046 umtxq_unlock(&key); 2047 2048 if (error == -1) 2049 error = EFAULT; 2050 else { 2051 mtx_lock_spin(&umtx_lock); 2052 if (su != 0) 2053 uq->uq_inherited_pri = new_inherited_pri; 2054 pri = PRI_MAX; 2055 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2056 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2057 if (uq2 != NULL) { 2058 if (pri > UPRI(uq2->uq_thread)) 2059 pri = UPRI(uq2->uq_thread); 2060 } 2061 } 2062 if (pri > uq->uq_inherited_pri) 2063 pri = uq->uq_inherited_pri; 2064 thread_lock(td); 2065 sched_lend_user_prio(td, pri); 2066 thread_unlock(td); 2067 mtx_unlock_spin(&umtx_lock); 2068 } 2069 umtx_key_release(&key); 2070 return (error); 2071 } 2072 2073 static int 2074 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2075 uint32_t *old_ceiling) 2076 { 2077 struct umtx_q *uq; 2078 uint32_t save_ceiling; 2079 uint32_t owner, id; 2080 uint32_t flags; 2081 int error, rv; 2082 2083 error = fueword32(&m->m_flags, &flags); 2084 if (error == -1) 2085 return (EFAULT); 2086 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2087 return (EINVAL); 2088 if (ceiling > RTP_PRIO_MAX) 2089 return (EINVAL); 2090 id = td->td_tid; 2091 uq = td->td_umtxq; 2092 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2093 &uq->uq_key)) != 0) 2094 return (error); 2095 for (;;) { 2096 umtxq_lock(&uq->uq_key); 2097 umtxq_busy(&uq->uq_key); 2098 umtxq_unlock(&uq->uq_key); 2099 2100 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2101 if (rv == -1) { 2102 error = EFAULT; 2103 break; 2104 } 2105 2106 rv = casueword32(&m->m_owner, 2107 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2108 if (rv == -1) { 2109 error = EFAULT; 2110 break; 2111 } 2112 2113 if (owner == UMUTEX_CONTESTED) { 2114 suword32(&m->m_ceilings[0], ceiling); 2115 suword32(&m->m_owner, UMUTEX_CONTESTED); 2116 error = 0; 2117 break; 2118 } 2119 2120 if ((owner & ~UMUTEX_CONTESTED) == id) { 2121 suword32(&m->m_ceilings[0], ceiling); 2122 error = 0; 2123 break; 2124 } 2125 2126 /* 2127 * If we caught a signal, we have retried and now 2128 * exit immediately. 2129 */ 2130 if (error != 0) 2131 break; 2132 2133 /* 2134 * We set the contested bit, sleep. Otherwise the lock changed 2135 * and we need to retry or we lost a race to the thread 2136 * unlocking the umtx. 2137 */ 2138 umtxq_lock(&uq->uq_key); 2139 umtxq_insert(uq); 2140 umtxq_unbusy(&uq->uq_key); 2141 error = umtxq_sleep(uq, "umtxpp", NULL); 2142 umtxq_remove(uq); 2143 umtxq_unlock(&uq->uq_key); 2144 } 2145 umtxq_lock(&uq->uq_key); 2146 if (error == 0) 2147 umtxq_signal(&uq->uq_key, INT_MAX); 2148 umtxq_unbusy(&uq->uq_key); 2149 umtxq_unlock(&uq->uq_key); 2150 umtx_key_release(&uq->uq_key); 2151 if (error == 0 && old_ceiling != NULL) 2152 suword32(old_ceiling, save_ceiling); 2153 return (error); 2154 } 2155 2156 /* 2157 * Lock a userland POSIX mutex. 2158 */ 2159 static int 2160 do_lock_umutex(struct thread *td, struct umutex *m, 2161 struct _umtx_time *timeout, int mode) 2162 { 2163 uint32_t flags; 2164 int error; 2165 2166 error = fueword32(&m->m_flags, &flags); 2167 if (error == -1) 2168 return (EFAULT); 2169 2170 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2171 case 0: 2172 error = do_lock_normal(td, m, flags, timeout, mode); 2173 break; 2174 case UMUTEX_PRIO_INHERIT: 2175 error = do_lock_pi(td, m, flags, timeout, mode); 2176 break; 2177 case UMUTEX_PRIO_PROTECT: 2178 error = do_lock_pp(td, m, flags, timeout, mode); 2179 break; 2180 default: 2181 return (EINVAL); 2182 } 2183 if (timeout == NULL) { 2184 if (error == EINTR && mode != _UMUTEX_WAIT) 2185 error = ERESTART; 2186 } else { 2187 /* Timed-locking is not restarted. */ 2188 if (error == ERESTART) 2189 error = EINTR; 2190 } 2191 return (error); 2192 } 2193 2194 /* 2195 * Unlock a userland POSIX mutex. 2196 */ 2197 static int 2198 do_unlock_umutex(struct thread *td, struct umutex *m) 2199 { 2200 uint32_t flags; 2201 int error; 2202 2203 error = fueword32(&m->m_flags, &flags); 2204 if (error == -1) 2205 return (EFAULT); 2206 2207 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2208 case 0: 2209 return (do_unlock_normal(td, m, flags)); 2210 case UMUTEX_PRIO_INHERIT: 2211 return (do_unlock_pi(td, m, flags)); 2212 case UMUTEX_PRIO_PROTECT: 2213 return (do_unlock_pp(td, m, flags)); 2214 } 2215 2216 return (EINVAL); 2217 } 2218 2219 static int 2220 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2221 struct timespec *timeout, u_long wflags) 2222 { 2223 struct abs_timeout timo; 2224 struct umtx_q *uq; 2225 uint32_t flags, clockid, hasw; 2226 int error; 2227 2228 uq = td->td_umtxq; 2229 error = fueword32(&cv->c_flags, &flags); 2230 if (error == -1) 2231 return (EFAULT); 2232 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2233 if (error != 0) 2234 return (error); 2235 2236 if ((wflags & CVWAIT_CLOCKID) != 0) { 2237 error = fueword32(&cv->c_clockid, &clockid); 2238 if (error == -1) { 2239 umtx_key_release(&uq->uq_key); 2240 return (EFAULT); 2241 } 2242 if (clockid < CLOCK_REALTIME || 2243 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2244 /* hmm, only HW clock id will work. */ 2245 umtx_key_release(&uq->uq_key); 2246 return (EINVAL); 2247 } 2248 } else { 2249 clockid = CLOCK_REALTIME; 2250 } 2251 2252 umtxq_lock(&uq->uq_key); 2253 umtxq_busy(&uq->uq_key); 2254 umtxq_insert(uq); 2255 umtxq_unlock(&uq->uq_key); 2256 2257 /* 2258 * Set c_has_waiters to 1 before releasing user mutex, also 2259 * don't modify cache line when unnecessary. 2260 */ 2261 error = fueword32(&cv->c_has_waiters, &hasw); 2262 if (error == 0 && hasw == 0) 2263 suword32(&cv->c_has_waiters, 1); 2264 2265 umtxq_unbusy_unlocked(&uq->uq_key); 2266 2267 error = do_unlock_umutex(td, m); 2268 2269 if (timeout != NULL) 2270 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2271 timeout); 2272 2273 umtxq_lock(&uq->uq_key); 2274 if (error == 0) { 2275 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2276 NULL : &timo); 2277 } 2278 2279 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2280 error = 0; 2281 else { 2282 /* 2283 * This must be timeout,interrupted by signal or 2284 * surprious wakeup, clear c_has_waiter flag when 2285 * necessary. 2286 */ 2287 umtxq_busy(&uq->uq_key); 2288 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2289 int oldlen = uq->uq_cur_queue->length; 2290 umtxq_remove(uq); 2291 if (oldlen == 1) { 2292 umtxq_unlock(&uq->uq_key); 2293 suword32(&cv->c_has_waiters, 0); 2294 umtxq_lock(&uq->uq_key); 2295 } 2296 } 2297 umtxq_unbusy(&uq->uq_key); 2298 if (error == ERESTART) 2299 error = EINTR; 2300 } 2301 2302 umtxq_unlock(&uq->uq_key); 2303 umtx_key_release(&uq->uq_key); 2304 return (error); 2305 } 2306 2307 /* 2308 * Signal a userland condition variable. 2309 */ 2310 static int 2311 do_cv_signal(struct thread *td, struct ucond *cv) 2312 { 2313 struct umtx_key key; 2314 int error, cnt, nwake; 2315 uint32_t flags; 2316 2317 error = fueword32(&cv->c_flags, &flags); 2318 if (error == -1) 2319 return (EFAULT); 2320 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2321 return (error); 2322 umtxq_lock(&key); 2323 umtxq_busy(&key); 2324 cnt = umtxq_count(&key); 2325 nwake = umtxq_signal(&key, 1); 2326 if (cnt <= nwake) { 2327 umtxq_unlock(&key); 2328 error = suword32(&cv->c_has_waiters, 0); 2329 if (error == -1) 2330 error = EFAULT; 2331 umtxq_lock(&key); 2332 } 2333 umtxq_unbusy(&key); 2334 umtxq_unlock(&key); 2335 umtx_key_release(&key); 2336 return (error); 2337 } 2338 2339 static int 2340 do_cv_broadcast(struct thread *td, struct ucond *cv) 2341 { 2342 struct umtx_key key; 2343 int error; 2344 uint32_t flags; 2345 2346 error = fueword32(&cv->c_flags, &flags); 2347 if (error == -1) 2348 return (EFAULT); 2349 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2350 return (error); 2351 2352 umtxq_lock(&key); 2353 umtxq_busy(&key); 2354 umtxq_signal(&key, INT_MAX); 2355 umtxq_unlock(&key); 2356 2357 error = suword32(&cv->c_has_waiters, 0); 2358 if (error == -1) 2359 error = EFAULT; 2360 2361 umtxq_unbusy_unlocked(&key); 2362 2363 umtx_key_release(&key); 2364 return (error); 2365 } 2366 2367 static int 2368 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2369 { 2370 struct abs_timeout timo; 2371 struct umtx_q *uq; 2372 uint32_t flags, wrflags; 2373 int32_t state, oldstate; 2374 int32_t blocked_readers; 2375 int error, rv; 2376 2377 uq = td->td_umtxq; 2378 error = fueword32(&rwlock->rw_flags, &flags); 2379 if (error == -1) 2380 return (EFAULT); 2381 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2382 if (error != 0) 2383 return (error); 2384 2385 if (timeout != NULL) 2386 abs_timeout_init2(&timo, timeout); 2387 2388 wrflags = URWLOCK_WRITE_OWNER; 2389 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2390 wrflags |= URWLOCK_WRITE_WAITERS; 2391 2392 for (;;) { 2393 rv = fueword32(&rwlock->rw_state, &state); 2394 if (rv == -1) { 2395 umtx_key_release(&uq->uq_key); 2396 return (EFAULT); 2397 } 2398 2399 /* try to lock it */ 2400 while (!(state & wrflags)) { 2401 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2402 umtx_key_release(&uq->uq_key); 2403 return (EAGAIN); 2404 } 2405 rv = casueword32(&rwlock->rw_state, state, 2406 &oldstate, state + 1); 2407 if (rv == -1) { 2408 umtx_key_release(&uq->uq_key); 2409 return (EFAULT); 2410 } 2411 if (oldstate == state) { 2412 umtx_key_release(&uq->uq_key); 2413 return (0); 2414 } 2415 error = umtxq_check_susp(td); 2416 if (error != 0) 2417 break; 2418 state = oldstate; 2419 } 2420 2421 if (error) 2422 break; 2423 2424 /* grab monitor lock */ 2425 umtxq_lock(&uq->uq_key); 2426 umtxq_busy(&uq->uq_key); 2427 umtxq_unlock(&uq->uq_key); 2428 2429 /* 2430 * re-read the state, in case it changed between the try-lock above 2431 * and the check below 2432 */ 2433 rv = fueword32(&rwlock->rw_state, &state); 2434 if (rv == -1) 2435 error = EFAULT; 2436 2437 /* set read contention bit */ 2438 while (error == 0 && (state & wrflags) && 2439 !(state & URWLOCK_READ_WAITERS)) { 2440 rv = casueword32(&rwlock->rw_state, state, 2441 &oldstate, state | URWLOCK_READ_WAITERS); 2442 if (rv == -1) { 2443 error = EFAULT; 2444 break; 2445 } 2446 if (oldstate == state) 2447 goto sleep; 2448 state = oldstate; 2449 error = umtxq_check_susp(td); 2450 if (error != 0) 2451 break; 2452 } 2453 if (error != 0) { 2454 umtxq_unbusy_unlocked(&uq->uq_key); 2455 break; 2456 } 2457 2458 /* state is changed while setting flags, restart */ 2459 if (!(state & wrflags)) { 2460 umtxq_unbusy_unlocked(&uq->uq_key); 2461 error = umtxq_check_susp(td); 2462 if (error != 0) 2463 break; 2464 continue; 2465 } 2466 2467 sleep: 2468 /* contention bit is set, before sleeping, increase read waiter count */ 2469 rv = fueword32(&rwlock->rw_blocked_readers, 2470 &blocked_readers); 2471 if (rv == -1) { 2472 umtxq_unbusy_unlocked(&uq->uq_key); 2473 error = EFAULT; 2474 break; 2475 } 2476 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2477 2478 while (state & wrflags) { 2479 umtxq_lock(&uq->uq_key); 2480 umtxq_insert(uq); 2481 umtxq_unbusy(&uq->uq_key); 2482 2483 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2484 NULL : &timo); 2485 2486 umtxq_busy(&uq->uq_key); 2487 umtxq_remove(uq); 2488 umtxq_unlock(&uq->uq_key); 2489 if (error) 2490 break; 2491 rv = fueword32(&rwlock->rw_state, &state); 2492 if (rv == -1) { 2493 error = EFAULT; 2494 break; 2495 } 2496 } 2497 2498 /* decrease read waiter count, and may clear read contention bit */ 2499 rv = fueword32(&rwlock->rw_blocked_readers, 2500 &blocked_readers); 2501 if (rv == -1) { 2502 umtxq_unbusy_unlocked(&uq->uq_key); 2503 error = EFAULT; 2504 break; 2505 } 2506 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2507 if (blocked_readers == 1) { 2508 rv = fueword32(&rwlock->rw_state, &state); 2509 if (rv == -1) 2510 error = EFAULT; 2511 while (error == 0) { 2512 rv = casueword32(&rwlock->rw_state, state, 2513 &oldstate, state & ~URWLOCK_READ_WAITERS); 2514 if (rv == -1) { 2515 error = EFAULT; 2516 break; 2517 } 2518 if (oldstate == state) 2519 break; 2520 state = oldstate; 2521 error = umtxq_check_susp(td); 2522 } 2523 } 2524 2525 umtxq_unbusy_unlocked(&uq->uq_key); 2526 if (error != 0) 2527 break; 2528 } 2529 umtx_key_release(&uq->uq_key); 2530 if (error == ERESTART) 2531 error = EINTR; 2532 return (error); 2533 } 2534 2535 static int 2536 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2537 { 2538 struct abs_timeout timo; 2539 struct umtx_q *uq; 2540 uint32_t flags; 2541 int32_t state, oldstate; 2542 int32_t blocked_writers; 2543 int32_t blocked_readers; 2544 int error, rv; 2545 2546 uq = td->td_umtxq; 2547 error = fueword32(&rwlock->rw_flags, &flags); 2548 if (error == -1) 2549 return (EFAULT); 2550 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2551 if (error != 0) 2552 return (error); 2553 2554 if (timeout != NULL) 2555 abs_timeout_init2(&timo, timeout); 2556 2557 blocked_readers = 0; 2558 for (;;) { 2559 rv = fueword32(&rwlock->rw_state, &state); 2560 if (rv == -1) { 2561 umtx_key_release(&uq->uq_key); 2562 return (EFAULT); 2563 } 2564 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2565 rv = casueword32(&rwlock->rw_state, state, 2566 &oldstate, state | URWLOCK_WRITE_OWNER); 2567 if (rv == -1) { 2568 umtx_key_release(&uq->uq_key); 2569 return (EFAULT); 2570 } 2571 if (oldstate == state) { 2572 umtx_key_release(&uq->uq_key); 2573 return (0); 2574 } 2575 state = oldstate; 2576 error = umtxq_check_susp(td); 2577 if (error != 0) 2578 break; 2579 } 2580 2581 if (error) { 2582 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2583 blocked_readers != 0) { 2584 umtxq_lock(&uq->uq_key); 2585 umtxq_busy(&uq->uq_key); 2586 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2587 umtxq_unbusy(&uq->uq_key); 2588 umtxq_unlock(&uq->uq_key); 2589 } 2590 2591 break; 2592 } 2593 2594 /* grab monitor lock */ 2595 umtxq_lock(&uq->uq_key); 2596 umtxq_busy(&uq->uq_key); 2597 umtxq_unlock(&uq->uq_key); 2598 2599 /* 2600 * re-read the state, in case it changed between the try-lock above 2601 * and the check below 2602 */ 2603 rv = fueword32(&rwlock->rw_state, &state); 2604 if (rv == -1) 2605 error = EFAULT; 2606 2607 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2608 URWLOCK_READER_COUNT(state) != 0) && 2609 (state & URWLOCK_WRITE_WAITERS) == 0) { 2610 rv = casueword32(&rwlock->rw_state, state, 2611 &oldstate, state | URWLOCK_WRITE_WAITERS); 2612 if (rv == -1) { 2613 error = EFAULT; 2614 break; 2615 } 2616 if (oldstate == state) 2617 goto sleep; 2618 state = oldstate; 2619 error = umtxq_check_susp(td); 2620 if (error != 0) 2621 break; 2622 } 2623 if (error != 0) { 2624 umtxq_unbusy_unlocked(&uq->uq_key); 2625 break; 2626 } 2627 2628 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2629 umtxq_unbusy_unlocked(&uq->uq_key); 2630 error = umtxq_check_susp(td); 2631 if (error != 0) 2632 break; 2633 continue; 2634 } 2635 sleep: 2636 rv = fueword32(&rwlock->rw_blocked_writers, 2637 &blocked_writers); 2638 if (rv == -1) { 2639 umtxq_unbusy_unlocked(&uq->uq_key); 2640 error = EFAULT; 2641 break; 2642 } 2643 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2644 2645 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2646 umtxq_lock(&uq->uq_key); 2647 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2648 umtxq_unbusy(&uq->uq_key); 2649 2650 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2651 NULL : &timo); 2652 2653 umtxq_busy(&uq->uq_key); 2654 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2655 umtxq_unlock(&uq->uq_key); 2656 if (error) 2657 break; 2658 rv = fueword32(&rwlock->rw_state, &state); 2659 if (rv == -1) { 2660 error = EFAULT; 2661 break; 2662 } 2663 } 2664 2665 rv = fueword32(&rwlock->rw_blocked_writers, 2666 &blocked_writers); 2667 if (rv == -1) { 2668 umtxq_unbusy_unlocked(&uq->uq_key); 2669 error = EFAULT; 2670 break; 2671 } 2672 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2673 if (blocked_writers == 1) { 2674 rv = fueword32(&rwlock->rw_state, &state); 2675 if (rv == -1) { 2676 umtxq_unbusy_unlocked(&uq->uq_key); 2677 error = EFAULT; 2678 break; 2679 } 2680 for (;;) { 2681 rv = casueword32(&rwlock->rw_state, state, 2682 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2683 if (rv == -1) { 2684 error = EFAULT; 2685 break; 2686 } 2687 if (oldstate == state) 2688 break; 2689 state = oldstate; 2690 error = umtxq_check_susp(td); 2691 /* 2692 * We are leaving the URWLOCK_WRITE_WAITERS 2693 * behind, but this should not harm the 2694 * correctness. 2695 */ 2696 if (error != 0) 2697 break; 2698 } 2699 rv = fueword32(&rwlock->rw_blocked_readers, 2700 &blocked_readers); 2701 if (rv == -1) { 2702 umtxq_unbusy_unlocked(&uq->uq_key); 2703 error = EFAULT; 2704 break; 2705 } 2706 } else 2707 blocked_readers = 0; 2708 2709 umtxq_unbusy_unlocked(&uq->uq_key); 2710 } 2711 2712 umtx_key_release(&uq->uq_key); 2713 if (error == ERESTART) 2714 error = EINTR; 2715 return (error); 2716 } 2717 2718 static int 2719 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2720 { 2721 struct umtx_q *uq; 2722 uint32_t flags; 2723 int32_t state, oldstate; 2724 int error, rv, q, count; 2725 2726 uq = td->td_umtxq; 2727 error = fueword32(&rwlock->rw_flags, &flags); 2728 if (error == -1) 2729 return (EFAULT); 2730 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2731 if (error != 0) 2732 return (error); 2733 2734 error = fueword32(&rwlock->rw_state, &state); 2735 if (error == -1) { 2736 error = EFAULT; 2737 goto out; 2738 } 2739 if (state & URWLOCK_WRITE_OWNER) { 2740 for (;;) { 2741 rv = casueword32(&rwlock->rw_state, state, 2742 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2743 if (rv == -1) { 2744 error = EFAULT; 2745 goto out; 2746 } 2747 if (oldstate != state) { 2748 state = oldstate; 2749 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2750 error = EPERM; 2751 goto out; 2752 } 2753 error = umtxq_check_susp(td); 2754 if (error != 0) 2755 goto out; 2756 } else 2757 break; 2758 } 2759 } else if (URWLOCK_READER_COUNT(state) != 0) { 2760 for (;;) { 2761 rv = casueword32(&rwlock->rw_state, state, 2762 &oldstate, state - 1); 2763 if (rv == -1) { 2764 error = EFAULT; 2765 goto out; 2766 } 2767 if (oldstate != state) { 2768 state = oldstate; 2769 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2770 error = EPERM; 2771 goto out; 2772 } 2773 error = umtxq_check_susp(td); 2774 if (error != 0) 2775 goto out; 2776 } else 2777 break; 2778 } 2779 } else { 2780 error = EPERM; 2781 goto out; 2782 } 2783 2784 count = 0; 2785 2786 if (!(flags & URWLOCK_PREFER_READER)) { 2787 if (state & URWLOCK_WRITE_WAITERS) { 2788 count = 1; 2789 q = UMTX_EXCLUSIVE_QUEUE; 2790 } else if (state & URWLOCK_READ_WAITERS) { 2791 count = INT_MAX; 2792 q = UMTX_SHARED_QUEUE; 2793 } 2794 } else { 2795 if (state & URWLOCK_READ_WAITERS) { 2796 count = INT_MAX; 2797 q = UMTX_SHARED_QUEUE; 2798 } else if (state & URWLOCK_WRITE_WAITERS) { 2799 count = 1; 2800 q = UMTX_EXCLUSIVE_QUEUE; 2801 } 2802 } 2803 2804 if (count) { 2805 umtxq_lock(&uq->uq_key); 2806 umtxq_busy(&uq->uq_key); 2807 umtxq_signal_queue(&uq->uq_key, count, q); 2808 umtxq_unbusy(&uq->uq_key); 2809 umtxq_unlock(&uq->uq_key); 2810 } 2811 out: 2812 umtx_key_release(&uq->uq_key); 2813 return (error); 2814 } 2815 2816 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 2817 static int 2818 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 2819 { 2820 struct abs_timeout timo; 2821 struct umtx_q *uq; 2822 uint32_t flags, count, count1; 2823 int error, rv; 2824 2825 uq = td->td_umtxq; 2826 error = fueword32(&sem->_flags, &flags); 2827 if (error == -1) 2828 return (EFAULT); 2829 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2830 if (error != 0) 2831 return (error); 2832 2833 if (timeout != NULL) 2834 abs_timeout_init2(&timo, timeout); 2835 2836 umtxq_lock(&uq->uq_key); 2837 umtxq_busy(&uq->uq_key); 2838 umtxq_insert(uq); 2839 umtxq_unlock(&uq->uq_key); 2840 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 2841 if (rv == 0) 2842 rv = fueword32(&sem->_count, &count); 2843 if (rv == -1 || count != 0) { 2844 umtxq_lock(&uq->uq_key); 2845 umtxq_unbusy(&uq->uq_key); 2846 umtxq_remove(uq); 2847 umtxq_unlock(&uq->uq_key); 2848 umtx_key_release(&uq->uq_key); 2849 return (rv == -1 ? EFAULT : 0); 2850 } 2851 umtxq_lock(&uq->uq_key); 2852 umtxq_unbusy(&uq->uq_key); 2853 2854 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2855 2856 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2857 error = 0; 2858 else { 2859 umtxq_remove(uq); 2860 /* A relative timeout cannot be restarted. */ 2861 if (error == ERESTART && timeout != NULL && 2862 (timeout->_flags & UMTX_ABSTIME) == 0) 2863 error = EINTR; 2864 } 2865 umtxq_unlock(&uq->uq_key); 2866 umtx_key_release(&uq->uq_key); 2867 return (error); 2868 } 2869 2870 /* 2871 * Signal a userland semaphore. 2872 */ 2873 static int 2874 do_sem_wake(struct thread *td, struct _usem *sem) 2875 { 2876 struct umtx_key key; 2877 int error, cnt; 2878 uint32_t flags; 2879 2880 error = fueword32(&sem->_flags, &flags); 2881 if (error == -1) 2882 return (EFAULT); 2883 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2884 return (error); 2885 umtxq_lock(&key); 2886 umtxq_busy(&key); 2887 cnt = umtxq_count(&key); 2888 if (cnt > 0) { 2889 umtxq_signal(&key, 1); 2890 /* 2891 * Check if count is greater than 0, this means the memory is 2892 * still being referenced by user code, so we can safely 2893 * update _has_waiters flag. 2894 */ 2895 if (cnt == 1) { 2896 umtxq_unlock(&key); 2897 error = suword32(&sem->_has_waiters, 0); 2898 umtxq_lock(&key); 2899 if (error == -1) 2900 error = EFAULT; 2901 } 2902 } 2903 umtxq_unbusy(&key); 2904 umtxq_unlock(&key); 2905 umtx_key_release(&key); 2906 return (error); 2907 } 2908 #endif 2909 2910 static int 2911 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 2912 { 2913 struct abs_timeout timo; 2914 struct umtx_q *uq; 2915 uint32_t count, flags; 2916 int error, rv; 2917 2918 uq = td->td_umtxq; 2919 flags = fuword32(&sem->_flags); 2920 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2921 if (error != 0) 2922 return (error); 2923 2924 if (timeout != NULL) 2925 abs_timeout_init2(&timo, timeout); 2926 2927 umtxq_lock(&uq->uq_key); 2928 umtxq_busy(&uq->uq_key); 2929 umtxq_insert(uq); 2930 umtxq_unlock(&uq->uq_key); 2931 rv = fueword32(&sem->_count, &count); 2932 if (rv == -1) { 2933 umtxq_lock(&uq->uq_key); 2934 umtxq_unbusy(&uq->uq_key); 2935 umtxq_remove(uq); 2936 umtxq_unlock(&uq->uq_key); 2937 umtx_key_release(&uq->uq_key); 2938 return (EFAULT); 2939 } 2940 for (;;) { 2941 if (USEM_COUNT(count) != 0) { 2942 umtxq_lock(&uq->uq_key); 2943 umtxq_unbusy(&uq->uq_key); 2944 umtxq_remove(uq); 2945 umtxq_unlock(&uq->uq_key); 2946 umtx_key_release(&uq->uq_key); 2947 return (0); 2948 } 2949 if (count == USEM_HAS_WAITERS) 2950 break; 2951 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 2952 if (rv == -1) { 2953 umtxq_lock(&uq->uq_key); 2954 umtxq_unbusy(&uq->uq_key); 2955 umtxq_remove(uq); 2956 umtxq_unlock(&uq->uq_key); 2957 umtx_key_release(&uq->uq_key); 2958 return (EFAULT); 2959 } 2960 if (count == 0) 2961 break; 2962 } 2963 umtxq_lock(&uq->uq_key); 2964 umtxq_unbusy(&uq->uq_key); 2965 2966 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2967 2968 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2969 error = 0; 2970 else { 2971 umtxq_remove(uq); 2972 /* A relative timeout cannot be restarted. */ 2973 if (error == ERESTART && timeout != NULL && 2974 (timeout->_flags & UMTX_ABSTIME) == 0) 2975 error = EINTR; 2976 } 2977 umtxq_unlock(&uq->uq_key); 2978 umtx_key_release(&uq->uq_key); 2979 return (error); 2980 } 2981 2982 /* 2983 * Signal a userland semaphore. 2984 */ 2985 static int 2986 do_sem2_wake(struct thread *td, struct _usem2 *sem) 2987 { 2988 struct umtx_key key; 2989 int error, cnt, rv; 2990 uint32_t count, flags; 2991 2992 rv = fueword32(&sem->_flags, &flags); 2993 if (rv == -1) 2994 return (EFAULT); 2995 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2996 return (error); 2997 umtxq_lock(&key); 2998 umtxq_busy(&key); 2999 cnt = umtxq_count(&key); 3000 if (cnt > 0) { 3001 umtxq_signal(&key, 1); 3002 3003 /* 3004 * If this was the last sleeping thread, clear the waiters 3005 * flag in _count. 3006 */ 3007 if (cnt == 1) { 3008 umtxq_unlock(&key); 3009 rv = fueword32(&sem->_count, &count); 3010 while (rv != -1 && count & USEM_HAS_WAITERS) 3011 rv = casueword32(&sem->_count, count, &count, 3012 count & ~USEM_HAS_WAITERS); 3013 if (rv == -1) 3014 error = EFAULT; 3015 umtxq_lock(&key); 3016 } 3017 } 3018 umtxq_unbusy(&key); 3019 umtxq_unlock(&key); 3020 umtx_key_release(&key); 3021 return (error); 3022 } 3023 3024 inline int 3025 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3026 { 3027 int error; 3028 3029 error = copyin(addr, tsp, sizeof(struct timespec)); 3030 if (error == 0) { 3031 if (tsp->tv_sec < 0 || 3032 tsp->tv_nsec >= 1000000000 || 3033 tsp->tv_nsec < 0) 3034 error = EINVAL; 3035 } 3036 return (error); 3037 } 3038 3039 static inline int 3040 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3041 { 3042 int error; 3043 3044 if (size <= sizeof(struct timespec)) { 3045 tp->_clockid = CLOCK_REALTIME; 3046 tp->_flags = 0; 3047 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3048 } else 3049 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3050 if (error != 0) 3051 return (error); 3052 if (tp->_timeout.tv_sec < 0 || 3053 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3054 return (EINVAL); 3055 return (0); 3056 } 3057 3058 static int 3059 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3060 { 3061 3062 return (EOPNOTSUPP); 3063 } 3064 3065 static int 3066 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3067 { 3068 struct _umtx_time timeout, *tm_p; 3069 int error; 3070 3071 if (uap->uaddr2 == NULL) 3072 tm_p = NULL; 3073 else { 3074 error = umtx_copyin_umtx_time( 3075 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3076 if (error != 0) 3077 return (error); 3078 tm_p = &timeout; 3079 } 3080 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3081 } 3082 3083 static int 3084 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3085 { 3086 struct _umtx_time timeout, *tm_p; 3087 int error; 3088 3089 if (uap->uaddr2 == NULL) 3090 tm_p = NULL; 3091 else { 3092 error = umtx_copyin_umtx_time( 3093 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3094 if (error != 0) 3095 return (error); 3096 tm_p = &timeout; 3097 } 3098 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3099 } 3100 3101 static int 3102 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3103 { 3104 struct _umtx_time *tm_p, timeout; 3105 int error; 3106 3107 if (uap->uaddr2 == NULL) 3108 tm_p = NULL; 3109 else { 3110 error = umtx_copyin_umtx_time( 3111 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3112 if (error != 0) 3113 return (error); 3114 tm_p = &timeout; 3115 } 3116 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3117 } 3118 3119 static int 3120 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3121 { 3122 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3123 } 3124 3125 #define BATCH_SIZE 128 3126 static int 3127 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3128 { 3129 int count = uap->val; 3130 void *uaddrs[BATCH_SIZE]; 3131 char **upp = (char **)uap->obj; 3132 int tocopy; 3133 int error = 0; 3134 int i, pos = 0; 3135 3136 while (count > 0) { 3137 tocopy = count; 3138 if (tocopy > BATCH_SIZE) 3139 tocopy = BATCH_SIZE; 3140 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3141 if (error != 0) 3142 break; 3143 for (i = 0; i < tocopy; ++i) 3144 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3145 count -= tocopy; 3146 pos += tocopy; 3147 } 3148 return (error); 3149 } 3150 3151 static int 3152 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3153 { 3154 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3155 } 3156 3157 static int 3158 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3159 { 3160 struct _umtx_time *tm_p, timeout; 3161 int error; 3162 3163 /* Allow a null timespec (wait forever). */ 3164 if (uap->uaddr2 == NULL) 3165 tm_p = NULL; 3166 else { 3167 error = umtx_copyin_umtx_time( 3168 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3169 if (error != 0) 3170 return (error); 3171 tm_p = &timeout; 3172 } 3173 return do_lock_umutex(td, uap->obj, tm_p, 0); 3174 } 3175 3176 static int 3177 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3178 { 3179 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3180 } 3181 3182 static int 3183 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3184 { 3185 struct _umtx_time *tm_p, timeout; 3186 int error; 3187 3188 /* Allow a null timespec (wait forever). */ 3189 if (uap->uaddr2 == NULL) 3190 tm_p = NULL; 3191 else { 3192 error = umtx_copyin_umtx_time( 3193 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3194 if (error != 0) 3195 return (error); 3196 tm_p = &timeout; 3197 } 3198 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3199 } 3200 3201 static int 3202 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3203 { 3204 return do_wake_umutex(td, uap->obj); 3205 } 3206 3207 static int 3208 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3209 { 3210 return do_unlock_umutex(td, uap->obj); 3211 } 3212 3213 static int 3214 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3215 { 3216 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3217 } 3218 3219 static int 3220 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3221 { 3222 struct timespec *ts, timeout; 3223 int error; 3224 3225 /* Allow a null timespec (wait forever). */ 3226 if (uap->uaddr2 == NULL) 3227 ts = NULL; 3228 else { 3229 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3230 if (error != 0) 3231 return (error); 3232 ts = &timeout; 3233 } 3234 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3235 } 3236 3237 static int 3238 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3239 { 3240 return do_cv_signal(td, uap->obj); 3241 } 3242 3243 static int 3244 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3245 { 3246 return do_cv_broadcast(td, uap->obj); 3247 } 3248 3249 static int 3250 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3251 { 3252 struct _umtx_time timeout; 3253 int error; 3254 3255 /* Allow a null timespec (wait forever). */ 3256 if (uap->uaddr2 == NULL) { 3257 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3258 } else { 3259 error = umtx_copyin_umtx_time(uap->uaddr2, 3260 (size_t)uap->uaddr1, &timeout); 3261 if (error != 0) 3262 return (error); 3263 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3264 } 3265 return (error); 3266 } 3267 3268 static int 3269 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3270 { 3271 struct _umtx_time timeout; 3272 int error; 3273 3274 /* Allow a null timespec (wait forever). */ 3275 if (uap->uaddr2 == NULL) { 3276 error = do_rw_wrlock(td, uap->obj, 0); 3277 } else { 3278 error = umtx_copyin_umtx_time(uap->uaddr2, 3279 (size_t)uap->uaddr1, &timeout); 3280 if (error != 0) 3281 return (error); 3282 3283 error = do_rw_wrlock(td, uap->obj, &timeout); 3284 } 3285 return (error); 3286 } 3287 3288 static int 3289 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3290 { 3291 return do_rw_unlock(td, uap->obj); 3292 } 3293 3294 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3295 static int 3296 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3297 { 3298 struct _umtx_time *tm_p, timeout; 3299 int error; 3300 3301 /* Allow a null timespec (wait forever). */ 3302 if (uap->uaddr2 == NULL) 3303 tm_p = NULL; 3304 else { 3305 error = umtx_copyin_umtx_time( 3306 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3307 if (error != 0) 3308 return (error); 3309 tm_p = &timeout; 3310 } 3311 return (do_sem_wait(td, uap->obj, tm_p)); 3312 } 3313 3314 static int 3315 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3316 { 3317 return do_sem_wake(td, uap->obj); 3318 } 3319 #endif 3320 3321 static int 3322 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3323 { 3324 return do_wake2_umutex(td, uap->obj, uap->val); 3325 } 3326 3327 static int 3328 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3329 { 3330 struct _umtx_time *tm_p, timeout; 3331 int error; 3332 3333 /* Allow a null timespec (wait forever). */ 3334 if (uap->uaddr2 == NULL) 3335 tm_p = NULL; 3336 else { 3337 error = umtx_copyin_umtx_time( 3338 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3339 if (error != 0) 3340 return (error); 3341 tm_p = &timeout; 3342 } 3343 return (do_sem2_wait(td, uap->obj, tm_p)); 3344 } 3345 3346 static int 3347 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3348 { 3349 return do_sem2_wake(td, uap->obj); 3350 } 3351 3352 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3353 3354 static _umtx_op_func op_table[] = { 3355 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3356 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3357 __umtx_op_wait, /* UMTX_OP_WAIT */ 3358 __umtx_op_wake, /* UMTX_OP_WAKE */ 3359 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3360 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3361 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3362 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3363 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3364 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3365 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3366 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3367 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3368 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3369 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3370 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3371 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3372 __umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */ 3373 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3374 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3375 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3376 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3377 #else 3378 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3379 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3380 #endif 3381 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3382 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3383 __umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */ 3384 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3385 }; 3386 3387 int 3388 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3389 { 3390 if ((unsigned)uap->op < UMTX_OP_MAX) 3391 return (*op_table[uap->op])(td, uap); 3392 return (EINVAL); 3393 } 3394 3395 #ifdef COMPAT_FREEBSD32 3396 3397 struct timespec32 { 3398 int32_t tv_sec; 3399 int32_t tv_nsec; 3400 }; 3401 3402 struct umtx_time32 { 3403 struct timespec32 timeout; 3404 uint32_t flags; 3405 uint32_t clockid; 3406 }; 3407 3408 static inline int 3409 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3410 { 3411 struct timespec32 ts32; 3412 int error; 3413 3414 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3415 if (error == 0) { 3416 if (ts32.tv_sec < 0 || 3417 ts32.tv_nsec >= 1000000000 || 3418 ts32.tv_nsec < 0) 3419 error = EINVAL; 3420 else { 3421 tsp->tv_sec = ts32.tv_sec; 3422 tsp->tv_nsec = ts32.tv_nsec; 3423 } 3424 } 3425 return (error); 3426 } 3427 3428 static inline int 3429 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3430 { 3431 struct umtx_time32 t32; 3432 int error; 3433 3434 t32.clockid = CLOCK_REALTIME; 3435 t32.flags = 0; 3436 if (size <= sizeof(struct timespec32)) 3437 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3438 else 3439 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3440 if (error != 0) 3441 return (error); 3442 if (t32.timeout.tv_sec < 0 || 3443 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3444 return (EINVAL); 3445 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3446 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3447 tp->_flags = t32.flags; 3448 tp->_clockid = t32.clockid; 3449 return (0); 3450 } 3451 3452 static int 3453 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3454 { 3455 struct _umtx_time *tm_p, timeout; 3456 int error; 3457 3458 if (uap->uaddr2 == NULL) 3459 tm_p = NULL; 3460 else { 3461 error = umtx_copyin_umtx_time32(uap->uaddr2, 3462 (size_t)uap->uaddr1, &timeout); 3463 if (error != 0) 3464 return (error); 3465 tm_p = &timeout; 3466 } 3467 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3468 } 3469 3470 static int 3471 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3472 { 3473 struct _umtx_time *tm_p, timeout; 3474 int error; 3475 3476 /* Allow a null timespec (wait forever). */ 3477 if (uap->uaddr2 == NULL) 3478 tm_p = NULL; 3479 else { 3480 error = umtx_copyin_umtx_time(uap->uaddr2, 3481 (size_t)uap->uaddr1, &timeout); 3482 if (error != 0) 3483 return (error); 3484 tm_p = &timeout; 3485 } 3486 return do_lock_umutex(td, uap->obj, tm_p, 0); 3487 } 3488 3489 static int 3490 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3491 { 3492 struct _umtx_time *tm_p, timeout; 3493 int error; 3494 3495 /* Allow a null timespec (wait forever). */ 3496 if (uap->uaddr2 == NULL) 3497 tm_p = NULL; 3498 else { 3499 error = umtx_copyin_umtx_time32(uap->uaddr2, 3500 (size_t)uap->uaddr1, &timeout); 3501 if (error != 0) 3502 return (error); 3503 tm_p = &timeout; 3504 } 3505 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3506 } 3507 3508 static int 3509 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3510 { 3511 struct timespec *ts, timeout; 3512 int error; 3513 3514 /* Allow a null timespec (wait forever). */ 3515 if (uap->uaddr2 == NULL) 3516 ts = NULL; 3517 else { 3518 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3519 if (error != 0) 3520 return (error); 3521 ts = &timeout; 3522 } 3523 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3524 } 3525 3526 static int 3527 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3528 { 3529 struct _umtx_time timeout; 3530 int error; 3531 3532 /* Allow a null timespec (wait forever). */ 3533 if (uap->uaddr2 == NULL) { 3534 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3535 } else { 3536 error = umtx_copyin_umtx_time32(uap->uaddr2, 3537 (size_t)uap->uaddr1, &timeout); 3538 if (error != 0) 3539 return (error); 3540 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3541 } 3542 return (error); 3543 } 3544 3545 static int 3546 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3547 { 3548 struct _umtx_time timeout; 3549 int error; 3550 3551 /* Allow a null timespec (wait forever). */ 3552 if (uap->uaddr2 == NULL) { 3553 error = do_rw_wrlock(td, uap->obj, 0); 3554 } else { 3555 error = umtx_copyin_umtx_time32(uap->uaddr2, 3556 (size_t)uap->uaddr1, &timeout); 3557 if (error != 0) 3558 return (error); 3559 error = do_rw_wrlock(td, uap->obj, &timeout); 3560 } 3561 return (error); 3562 } 3563 3564 static int 3565 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3566 { 3567 struct _umtx_time *tm_p, timeout; 3568 int error; 3569 3570 if (uap->uaddr2 == NULL) 3571 tm_p = NULL; 3572 else { 3573 error = umtx_copyin_umtx_time32( 3574 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3575 if (error != 0) 3576 return (error); 3577 tm_p = &timeout; 3578 } 3579 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3580 } 3581 3582 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3583 static int 3584 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3585 { 3586 struct _umtx_time *tm_p, timeout; 3587 int error; 3588 3589 /* Allow a null timespec (wait forever). */ 3590 if (uap->uaddr2 == NULL) 3591 tm_p = NULL; 3592 else { 3593 error = umtx_copyin_umtx_time32(uap->uaddr2, 3594 (size_t)uap->uaddr1, &timeout); 3595 if (error != 0) 3596 return (error); 3597 tm_p = &timeout; 3598 } 3599 return (do_sem_wait(td, uap->obj, tm_p)); 3600 } 3601 #endif 3602 3603 static int 3604 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3605 { 3606 struct _umtx_time *tm_p, timeout; 3607 int error; 3608 3609 /* Allow a null timespec (wait forever). */ 3610 if (uap->uaddr2 == NULL) 3611 tm_p = NULL; 3612 else { 3613 error = umtx_copyin_umtx_time32(uap->uaddr2, 3614 (size_t)uap->uaddr1, &timeout); 3615 if (error != 0) 3616 return (error); 3617 tm_p = &timeout; 3618 } 3619 return (do_sem2_wait(td, uap->obj, tm_p)); 3620 } 3621 3622 static int 3623 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3624 { 3625 int count = uap->val; 3626 uint32_t uaddrs[BATCH_SIZE]; 3627 uint32_t **upp = (uint32_t **)uap->obj; 3628 int tocopy; 3629 int error = 0; 3630 int i, pos = 0; 3631 3632 while (count > 0) { 3633 tocopy = count; 3634 if (tocopy > BATCH_SIZE) 3635 tocopy = BATCH_SIZE; 3636 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3637 if (error != 0) 3638 break; 3639 for (i = 0; i < tocopy; ++i) 3640 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3641 INT_MAX, 1); 3642 count -= tocopy; 3643 pos += tocopy; 3644 } 3645 return (error); 3646 } 3647 3648 static _umtx_op_func op_table_compat32[] = { 3649 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3650 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3651 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3652 __umtx_op_wake, /* UMTX_OP_WAKE */ 3653 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3654 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3655 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3656 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3657 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3658 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3659 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3660 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3661 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3662 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3663 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3664 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3665 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3666 __umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */ 3667 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3668 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3669 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3670 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3671 #else 3672 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3673 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3674 #endif 3675 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3676 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3677 __umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */ 3678 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3679 }; 3680 3681 int 3682 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3683 { 3684 if ((unsigned)uap->op < UMTX_OP_MAX) 3685 return (*op_table_compat32[uap->op])(td, 3686 (struct _umtx_op_args *)uap); 3687 return (EINVAL); 3688 } 3689 #endif 3690 3691 void 3692 umtx_thread_init(struct thread *td) 3693 { 3694 td->td_umtxq = umtxq_alloc(); 3695 td->td_umtxq->uq_thread = td; 3696 } 3697 3698 void 3699 umtx_thread_fini(struct thread *td) 3700 { 3701 umtxq_free(td->td_umtxq); 3702 } 3703 3704 /* 3705 * It will be called when new thread is created, e.g fork(). 3706 */ 3707 void 3708 umtx_thread_alloc(struct thread *td) 3709 { 3710 struct umtx_q *uq; 3711 3712 uq = td->td_umtxq; 3713 uq->uq_inherited_pri = PRI_MAX; 3714 3715 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3716 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3717 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3718 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3719 } 3720 3721 /* 3722 * exec() hook. 3723 */ 3724 static void 3725 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3726 struct image_params *imgp __unused) 3727 { 3728 umtx_thread_cleanup(curthread); 3729 } 3730 3731 /* 3732 * thread_exit() hook. 3733 */ 3734 void 3735 umtx_thread_exit(struct thread *td) 3736 { 3737 umtx_thread_cleanup(td); 3738 } 3739 3740 /* 3741 * clean up umtx data. 3742 */ 3743 static void 3744 umtx_thread_cleanup(struct thread *td) 3745 { 3746 struct umtx_q *uq; 3747 struct umtx_pi *pi; 3748 3749 if ((uq = td->td_umtxq) == NULL) 3750 return; 3751 3752 mtx_lock_spin(&umtx_lock); 3753 uq->uq_inherited_pri = PRI_MAX; 3754 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3755 pi->pi_owner = NULL; 3756 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3757 } 3758 mtx_unlock_spin(&umtx_lock); 3759 thread_lock(td); 3760 sched_lend_user_prio(td, PRI_MAX); 3761 thread_unlock(td); 3762 } 3763