1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 173 /* 174 * Don't propagate time-sharing priority, there is a security reason, 175 * a user can simply introduce PI-mutex, let thread A lock the mutex, 176 * and let another thread B block on the mutex, because B is 177 * sleeping, its priority will be boosted, this causes A's priority to 178 * be boosted via priority propagating too and will never be lowered even 179 * if it is using 100%CPU, this is unfair to other processes. 180 */ 181 182 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 183 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 184 PRI_MAX_TIMESHARE : (td)->td_user_pri) 185 186 #define GOLDEN_RATIO_PRIME 2654404609U 187 #define UMTX_CHAINS 512 188 #define UMTX_SHIFTS (__WORD_BIT - 9) 189 190 #define GET_SHARE(flags) \ 191 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 192 193 #define BUSY_SPINS 200 194 195 struct abs_timeout { 196 int clockid; 197 struct timespec cur; 198 struct timespec end; 199 }; 200 201 static uma_zone_t umtx_pi_zone; 202 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 203 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 204 static int umtx_pi_allocated; 205 206 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 207 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 208 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 209 210 #ifdef UMTX_PROFILING 211 static long max_length; 212 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 213 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 214 #endif 215 216 static void umtxq_sysinit(void *); 217 static void umtxq_hash(struct umtx_key *key); 218 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 219 static void umtxq_lock(struct umtx_key *key); 220 static void umtxq_unlock(struct umtx_key *key); 221 static void umtxq_busy(struct umtx_key *key); 222 static void umtxq_unbusy(struct umtx_key *key); 223 static void umtxq_insert_queue(struct umtx_q *uq, int q); 224 static void umtxq_remove_queue(struct umtx_q *uq, int q); 225 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 226 static int umtxq_count(struct umtx_key *key); 227 static struct umtx_pi *umtx_pi_alloc(int); 228 static void umtx_pi_free(struct umtx_pi *pi); 229 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 230 static void umtx_thread_cleanup(struct thread *td); 231 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 232 struct image_params *imgp __unused); 233 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 234 235 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 236 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 237 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 238 239 static struct mtx umtx_lock; 240 241 #ifdef UMTX_PROFILING 242 static void 243 umtx_init_profiling(void) 244 { 245 struct sysctl_oid *chain_oid; 246 char chain_name[10]; 247 int i; 248 249 for (i = 0; i < UMTX_CHAINS; ++i) { 250 snprintf(chain_name, sizeof(chain_name), "%d", i); 251 chain_oid = SYSCTL_ADD_NODE(NULL, 252 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 253 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 254 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 255 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 256 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 257 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 258 } 259 } 260 261 static int 262 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 263 { 264 char buf[512]; 265 struct sbuf sb; 266 struct umtxq_chain *uc; 267 u_int fract, i, j, tot, whole; 268 u_int sf0, sf1, sf2, sf3, sf4; 269 u_int si0, si1, si2, si3, si4; 270 u_int sw0, sw1, sw2, sw3, sw4; 271 272 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 273 for (i = 0; i < 2; i++) { 274 tot = 0; 275 for (j = 0; j < UMTX_CHAINS; ++j) { 276 uc = &umtxq_chains[i][j]; 277 mtx_lock(&uc->uc_lock); 278 tot += uc->max_length; 279 mtx_unlock(&uc->uc_lock); 280 } 281 if (tot == 0) 282 sbuf_printf(&sb, "%u) Empty ", i); 283 else { 284 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 285 si0 = si1 = si2 = si3 = si4 = 0; 286 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 287 for (j = 0; j < UMTX_CHAINS; j++) { 288 uc = &umtxq_chains[i][j]; 289 mtx_lock(&uc->uc_lock); 290 whole = uc->max_length * 100; 291 mtx_unlock(&uc->uc_lock); 292 fract = (whole % tot) * 100; 293 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 294 sf0 = fract; 295 si0 = j; 296 sw0 = whole; 297 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 298 sf1)) { 299 sf1 = fract; 300 si1 = j; 301 sw1 = whole; 302 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 303 sf2)) { 304 sf2 = fract; 305 si2 = j; 306 sw2 = whole; 307 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 308 sf3)) { 309 sf3 = fract; 310 si3 = j; 311 sw3 = whole; 312 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 313 sf4)) { 314 sf4 = fract; 315 si4 = j; 316 sw4 = whole; 317 } 318 } 319 sbuf_printf(&sb, "queue %u:\n", i); 320 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 321 sf0 / tot, si0); 322 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 323 sf1 / tot, si1); 324 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 325 sf2 / tot, si2); 326 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 327 sf3 / tot, si3); 328 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 329 sf4 / tot, si4); 330 } 331 } 332 sbuf_trim(&sb); 333 sbuf_finish(&sb); 334 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 335 sbuf_delete(&sb); 336 return (0); 337 } 338 339 static int 340 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 341 { 342 struct umtxq_chain *uc; 343 u_int i, j; 344 int clear, error; 345 346 clear = 0; 347 error = sysctl_handle_int(oidp, &clear, 0, req); 348 if (error != 0 || req->newptr == NULL) 349 return (error); 350 351 if (clear != 0) { 352 for (i = 0; i < 2; ++i) { 353 for (j = 0; j < UMTX_CHAINS; ++j) { 354 uc = &umtxq_chains[i][j]; 355 mtx_lock(&uc->uc_lock); 356 uc->length = 0; 357 uc->max_length = 0; 358 mtx_unlock(&uc->uc_lock); 359 } 360 } 361 } 362 return (0); 363 } 364 365 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 366 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 367 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 368 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 369 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 370 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 371 #endif 372 373 static void 374 umtxq_sysinit(void *arg __unused) 375 { 376 int i, j; 377 378 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 379 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 380 for (i = 0; i < 2; ++i) { 381 for (j = 0; j < UMTX_CHAINS; ++j) { 382 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 383 MTX_DEF | MTX_DUPOK); 384 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 386 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 387 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 388 umtxq_chains[i][j].uc_busy = 0; 389 umtxq_chains[i][j].uc_waiters = 0; 390 #ifdef UMTX_PROFILING 391 umtxq_chains[i][j].length = 0; 392 umtxq_chains[i][j].max_length = 0; 393 #endif 394 } 395 } 396 #ifdef UMTX_PROFILING 397 umtx_init_profiling(); 398 #endif 399 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 400 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 401 EVENTHANDLER_PRI_ANY); 402 } 403 404 struct umtx_q * 405 umtxq_alloc(void) 406 { 407 struct umtx_q *uq; 408 409 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 410 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 411 TAILQ_INIT(&uq->uq_spare_queue->head); 412 TAILQ_INIT(&uq->uq_pi_contested); 413 uq->uq_inherited_pri = PRI_MAX; 414 return (uq); 415 } 416 417 void 418 umtxq_free(struct umtx_q *uq) 419 { 420 MPASS(uq->uq_spare_queue != NULL); 421 free(uq->uq_spare_queue, M_UMTX); 422 free(uq, M_UMTX); 423 } 424 425 static inline void 426 umtxq_hash(struct umtx_key *key) 427 { 428 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 429 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 430 } 431 432 static inline struct umtxq_chain * 433 umtxq_getchain(struct umtx_key *key) 434 { 435 if (key->type <= TYPE_SEM) 436 return (&umtxq_chains[1][key->hash]); 437 return (&umtxq_chains[0][key->hash]); 438 } 439 440 /* 441 * Lock a chain. 442 */ 443 static inline void 444 umtxq_lock(struct umtx_key *key) 445 { 446 struct umtxq_chain *uc; 447 448 uc = umtxq_getchain(key); 449 mtx_lock(&uc->uc_lock); 450 } 451 452 /* 453 * Unlock a chain. 454 */ 455 static inline void 456 umtxq_unlock(struct umtx_key *key) 457 { 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 mtx_unlock(&uc->uc_lock); 462 } 463 464 /* 465 * Set chain to busy state when following operation 466 * may be blocked (kernel mutex can not be used). 467 */ 468 static inline void 469 umtxq_busy(struct umtx_key *key) 470 { 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(key); 474 mtx_assert(&uc->uc_lock, MA_OWNED); 475 if (uc->uc_busy) { 476 #ifdef SMP 477 if (smp_cpus > 1) { 478 int count = BUSY_SPINS; 479 if (count > 0) { 480 umtxq_unlock(key); 481 while (uc->uc_busy && --count > 0) 482 cpu_spinwait(); 483 umtxq_lock(key); 484 } 485 } 486 #endif 487 while (uc->uc_busy) { 488 uc->uc_waiters++; 489 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 490 uc->uc_waiters--; 491 } 492 } 493 uc->uc_busy = 1; 494 } 495 496 /* 497 * Unbusy a chain. 498 */ 499 static inline void 500 umtxq_unbusy(struct umtx_key *key) 501 { 502 struct umtxq_chain *uc; 503 504 uc = umtxq_getchain(key); 505 mtx_assert(&uc->uc_lock, MA_OWNED); 506 KASSERT(uc->uc_busy != 0, ("not busy")); 507 uc->uc_busy = 0; 508 if (uc->uc_waiters) 509 wakeup_one(uc); 510 } 511 512 static inline void 513 umtxq_unbusy_unlocked(struct umtx_key *key) 514 { 515 516 umtxq_lock(key); 517 umtxq_unbusy(key); 518 umtxq_unlock(key); 519 } 520 521 static struct umtxq_queue * 522 umtxq_queue_lookup(struct umtx_key *key, int q) 523 { 524 struct umtxq_queue *uh; 525 struct umtxq_chain *uc; 526 527 uc = umtxq_getchain(key); 528 UMTXQ_LOCKED_ASSERT(uc); 529 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 530 if (umtx_key_match(&uh->key, key)) 531 return (uh); 532 } 533 534 return (NULL); 535 } 536 537 static inline void 538 umtxq_insert_queue(struct umtx_q *uq, int q) 539 { 540 struct umtxq_queue *uh; 541 struct umtxq_chain *uc; 542 543 uc = umtxq_getchain(&uq->uq_key); 544 UMTXQ_LOCKED_ASSERT(uc); 545 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 546 uh = umtxq_queue_lookup(&uq->uq_key, q); 547 if (uh != NULL) { 548 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 549 } else { 550 uh = uq->uq_spare_queue; 551 uh->key = uq->uq_key; 552 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 553 #ifdef UMTX_PROFILING 554 uc->length++; 555 if (uc->length > uc->max_length) { 556 uc->max_length = uc->length; 557 if (uc->max_length > max_length) 558 max_length = uc->max_length; 559 } 560 #endif 561 } 562 uq->uq_spare_queue = NULL; 563 564 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 565 uh->length++; 566 uq->uq_flags |= UQF_UMTXQ; 567 uq->uq_cur_queue = uh; 568 return; 569 } 570 571 static inline void 572 umtxq_remove_queue(struct umtx_q *uq, int q) 573 { 574 struct umtxq_chain *uc; 575 struct umtxq_queue *uh; 576 577 uc = umtxq_getchain(&uq->uq_key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 if (uq->uq_flags & UQF_UMTXQ) { 580 uh = uq->uq_cur_queue; 581 TAILQ_REMOVE(&uh->head, uq, uq_link); 582 uh->length--; 583 uq->uq_flags &= ~UQF_UMTXQ; 584 if (TAILQ_EMPTY(&uh->head)) { 585 KASSERT(uh->length == 0, 586 ("inconsistent umtxq_queue length")); 587 #ifdef UMTX_PROFILING 588 uc->length--; 589 #endif 590 LIST_REMOVE(uh, link); 591 } else { 592 uh = LIST_FIRST(&uc->uc_spare_queue); 593 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 594 LIST_REMOVE(uh, link); 595 } 596 uq->uq_spare_queue = uh; 597 uq->uq_cur_queue = NULL; 598 } 599 } 600 601 /* 602 * Check if there are multiple waiters 603 */ 604 static int 605 umtxq_count(struct umtx_key *key) 606 { 607 struct umtxq_chain *uc; 608 struct umtxq_queue *uh; 609 610 uc = umtxq_getchain(key); 611 UMTXQ_LOCKED_ASSERT(uc); 612 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 613 if (uh != NULL) 614 return (uh->length); 615 return (0); 616 } 617 618 /* 619 * Check if there are multiple PI waiters and returns first 620 * waiter. 621 */ 622 static int 623 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 624 { 625 struct umtxq_chain *uc; 626 struct umtxq_queue *uh; 627 628 *first = NULL; 629 uc = umtxq_getchain(key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 632 if (uh != NULL) { 633 *first = TAILQ_FIRST(&uh->head); 634 return (uh->length); 635 } 636 return (0); 637 } 638 639 static int 640 umtxq_check_susp(struct thread *td) 641 { 642 struct proc *p; 643 int error; 644 645 /* 646 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 647 * eventually break the lockstep loop. 648 */ 649 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 650 return (0); 651 error = 0; 652 p = td->td_proc; 653 PROC_LOCK(p); 654 if (P_SHOULDSTOP(p) || 655 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 656 if (p->p_flag & P_SINGLE_EXIT) 657 error = EINTR; 658 else 659 error = ERESTART; 660 } 661 PROC_UNLOCK(p); 662 return (error); 663 } 664 665 /* 666 * Wake up threads waiting on an userland object. 667 */ 668 669 static int 670 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 671 { 672 struct umtxq_chain *uc; 673 struct umtxq_queue *uh; 674 struct umtx_q *uq; 675 int ret; 676 677 ret = 0; 678 uc = umtxq_getchain(key); 679 UMTXQ_LOCKED_ASSERT(uc); 680 uh = umtxq_queue_lookup(key, q); 681 if (uh != NULL) { 682 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 683 umtxq_remove_queue(uq, q); 684 wakeup(uq); 685 if (++ret >= n_wake) 686 return (ret); 687 } 688 } 689 return (ret); 690 } 691 692 693 /* 694 * Wake up specified thread. 695 */ 696 static inline void 697 umtxq_signal_thread(struct umtx_q *uq) 698 { 699 struct umtxq_chain *uc; 700 701 uc = umtxq_getchain(&uq->uq_key); 702 UMTXQ_LOCKED_ASSERT(uc); 703 umtxq_remove(uq); 704 wakeup(uq); 705 } 706 707 static inline int 708 tstohz(const struct timespec *tsp) 709 { 710 struct timeval tv; 711 712 TIMESPEC_TO_TIMEVAL(&tv, tsp); 713 return tvtohz(&tv); 714 } 715 716 static void 717 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 718 const struct timespec *timeout) 719 { 720 721 timo->clockid = clockid; 722 if (!absolute) { 723 kern_clock_gettime(curthread, clockid, &timo->end); 724 timo->cur = timo->end; 725 timespecadd(&timo->end, timeout); 726 } else { 727 timo->end = *timeout; 728 kern_clock_gettime(curthread, clockid, &timo->cur); 729 } 730 } 731 732 static void 733 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 734 { 735 736 abs_timeout_init(timo, umtxtime->_clockid, 737 (umtxtime->_flags & UMTX_ABSTIME) != 0, 738 &umtxtime->_timeout); 739 } 740 741 static inline void 742 abs_timeout_update(struct abs_timeout *timo) 743 { 744 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 745 } 746 747 static int 748 abs_timeout_gethz(struct abs_timeout *timo) 749 { 750 struct timespec tts; 751 752 if (timespeccmp(&timo->end, &timo->cur, <=)) 753 return (-1); 754 tts = timo->end; 755 timespecsub(&tts, &timo->cur); 756 return (tstohz(&tts)); 757 } 758 759 /* 760 * Put thread into sleep state, before sleeping, check if 761 * thread was removed from umtx queue. 762 */ 763 static inline int 764 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 765 { 766 struct umtxq_chain *uc; 767 int error, timo; 768 769 uc = umtxq_getchain(&uq->uq_key); 770 UMTXQ_LOCKED_ASSERT(uc); 771 for (;;) { 772 if (!(uq->uq_flags & UQF_UMTXQ)) 773 return (0); 774 if (abstime != NULL) { 775 timo = abs_timeout_gethz(abstime); 776 if (timo < 0) 777 return (ETIMEDOUT); 778 } else 779 timo = 0; 780 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 781 if (error != EWOULDBLOCK) { 782 umtxq_lock(&uq->uq_key); 783 break; 784 } 785 if (abstime != NULL) 786 abs_timeout_update(abstime); 787 umtxq_lock(&uq->uq_key); 788 } 789 return (error); 790 } 791 792 /* 793 * Convert userspace address into unique logical address. 794 */ 795 int 796 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 797 { 798 struct thread *td = curthread; 799 vm_map_t map; 800 vm_map_entry_t entry; 801 vm_pindex_t pindex; 802 vm_prot_t prot; 803 boolean_t wired; 804 805 key->type = type; 806 if (share == THREAD_SHARE) { 807 key->shared = 0; 808 key->info.private.vs = td->td_proc->p_vmspace; 809 key->info.private.addr = (uintptr_t)addr; 810 } else { 811 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 812 map = &td->td_proc->p_vmspace->vm_map; 813 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 814 &entry, &key->info.shared.object, &pindex, &prot, 815 &wired) != KERN_SUCCESS) { 816 return EFAULT; 817 } 818 819 if ((share == PROCESS_SHARE) || 820 (share == AUTO_SHARE && 821 VM_INHERIT_SHARE == entry->inheritance)) { 822 key->shared = 1; 823 key->info.shared.offset = entry->offset + entry->start - 824 (vm_offset_t)addr; 825 vm_object_reference(key->info.shared.object); 826 } else { 827 key->shared = 0; 828 key->info.private.vs = td->td_proc->p_vmspace; 829 key->info.private.addr = (uintptr_t)addr; 830 } 831 vm_map_lookup_done(map, entry); 832 } 833 834 umtxq_hash(key); 835 return (0); 836 } 837 838 /* 839 * Release key. 840 */ 841 void 842 umtx_key_release(struct umtx_key *key) 843 { 844 if (key->shared) 845 vm_object_deallocate(key->info.shared.object); 846 } 847 848 /* 849 * Fetch and compare value, sleep on the address if value is not changed. 850 */ 851 static int 852 do_wait(struct thread *td, void *addr, u_long id, 853 struct _umtx_time *timeout, int compat32, int is_private) 854 { 855 struct abs_timeout timo; 856 struct umtx_q *uq; 857 u_long tmp; 858 uint32_t tmp32; 859 int error = 0; 860 861 uq = td->td_umtxq; 862 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 863 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 864 return (error); 865 866 if (timeout != NULL) 867 abs_timeout_init2(&timo, timeout); 868 869 umtxq_lock(&uq->uq_key); 870 umtxq_insert(uq); 871 umtxq_unlock(&uq->uq_key); 872 if (compat32 == 0) { 873 error = fueword(addr, &tmp); 874 if (error != 0) 875 error = EFAULT; 876 } else { 877 error = fueword32(addr, &tmp32); 878 if (error == 0) 879 tmp = tmp32; 880 else 881 error = EFAULT; 882 } 883 umtxq_lock(&uq->uq_key); 884 if (error == 0) { 885 if (tmp == id) 886 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 887 NULL : &timo); 888 if ((uq->uq_flags & UQF_UMTXQ) == 0) 889 error = 0; 890 else 891 umtxq_remove(uq); 892 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 893 umtxq_remove(uq); 894 } 895 umtxq_unlock(&uq->uq_key); 896 umtx_key_release(&uq->uq_key); 897 if (error == ERESTART) 898 error = EINTR; 899 return (error); 900 } 901 902 /* 903 * Wake up threads sleeping on the specified address. 904 */ 905 int 906 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 907 { 908 struct umtx_key key; 909 int ret; 910 911 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 912 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 913 return (ret); 914 umtxq_lock(&key); 915 ret = umtxq_signal(&key, n_wake); 916 umtxq_unlock(&key); 917 umtx_key_release(&key); 918 return (0); 919 } 920 921 /* 922 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 923 */ 924 static int 925 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 926 struct _umtx_time *timeout, int mode) 927 { 928 struct abs_timeout timo; 929 struct umtx_q *uq; 930 uint32_t owner, old, id; 931 int error, rv; 932 933 id = td->td_tid; 934 uq = td->td_umtxq; 935 error = 0; 936 if (timeout != NULL) 937 abs_timeout_init2(&timo, timeout); 938 939 /* 940 * Care must be exercised when dealing with umtx structure. It 941 * can fault on any access. 942 */ 943 for (;;) { 944 rv = fueword32(&m->m_owner, &owner); 945 if (rv == -1) 946 return (EFAULT); 947 if (mode == _UMUTEX_WAIT) { 948 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 949 return (0); 950 } else { 951 /* 952 * Try the uncontested case. This should be done in userland. 953 */ 954 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 955 &owner, id); 956 /* The address was invalid. */ 957 if (rv == -1) 958 return (EFAULT); 959 960 /* The acquire succeeded. */ 961 if (owner == UMUTEX_UNOWNED) 962 return (0); 963 964 /* If no one owns it but it is contested try to acquire it. */ 965 if (owner == UMUTEX_CONTESTED) { 966 rv = casueword32(&m->m_owner, 967 UMUTEX_CONTESTED, &owner, 968 id | UMUTEX_CONTESTED); 969 /* The address was invalid. */ 970 if (rv == -1) 971 return (EFAULT); 972 973 if (owner == UMUTEX_CONTESTED) 974 return (0); 975 976 rv = umtxq_check_susp(td); 977 if (rv != 0) 978 return (rv); 979 980 /* If this failed the lock has changed, restart. */ 981 continue; 982 } 983 } 984 985 if (mode == _UMUTEX_TRY) 986 return (EBUSY); 987 988 /* 989 * If we caught a signal, we have retried and now 990 * exit immediately. 991 */ 992 if (error != 0) 993 return (error); 994 995 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 996 GET_SHARE(flags), &uq->uq_key)) != 0) 997 return (error); 998 999 umtxq_lock(&uq->uq_key); 1000 umtxq_busy(&uq->uq_key); 1001 umtxq_insert(uq); 1002 umtxq_unlock(&uq->uq_key); 1003 1004 /* 1005 * Set the contested bit so that a release in user space 1006 * knows to use the system call for unlock. If this fails 1007 * either some one else has acquired the lock or it has been 1008 * released. 1009 */ 1010 rv = casueword32(&m->m_owner, owner, &old, 1011 owner | UMUTEX_CONTESTED); 1012 1013 /* The address was invalid. */ 1014 if (rv == -1) { 1015 umtxq_lock(&uq->uq_key); 1016 umtxq_remove(uq); 1017 umtxq_unbusy(&uq->uq_key); 1018 umtxq_unlock(&uq->uq_key); 1019 umtx_key_release(&uq->uq_key); 1020 return (EFAULT); 1021 } 1022 1023 /* 1024 * We set the contested bit, sleep. Otherwise the lock changed 1025 * and we need to retry or we lost a race to the thread 1026 * unlocking the umtx. 1027 */ 1028 umtxq_lock(&uq->uq_key); 1029 umtxq_unbusy(&uq->uq_key); 1030 if (old == owner) 1031 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1032 NULL : &timo); 1033 umtxq_remove(uq); 1034 umtxq_unlock(&uq->uq_key); 1035 umtx_key_release(&uq->uq_key); 1036 1037 if (error == 0) 1038 error = umtxq_check_susp(td); 1039 } 1040 1041 return (0); 1042 } 1043 1044 /* 1045 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1046 */ 1047 static int 1048 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1049 { 1050 struct umtx_key key; 1051 uint32_t owner, old, id; 1052 int error; 1053 int count; 1054 1055 id = td->td_tid; 1056 /* 1057 * Make sure we own this mtx. 1058 */ 1059 error = fueword32(&m->m_owner, &owner); 1060 if (error == -1) 1061 return (EFAULT); 1062 1063 if ((owner & ~UMUTEX_CONTESTED) != id) 1064 return (EPERM); 1065 1066 if ((owner & UMUTEX_CONTESTED) == 0) { 1067 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1068 if (error == -1) 1069 return (EFAULT); 1070 if (old == owner) 1071 return (0); 1072 owner = old; 1073 } 1074 1075 /* We should only ever be in here for contested locks */ 1076 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1077 &key)) != 0) 1078 return (error); 1079 1080 umtxq_lock(&key); 1081 umtxq_busy(&key); 1082 count = umtxq_count(&key); 1083 umtxq_unlock(&key); 1084 1085 /* 1086 * When unlocking the umtx, it must be marked as unowned if 1087 * there is zero or one thread only waiting for it. 1088 * Otherwise, it must be marked as contested. 1089 */ 1090 error = casueword32(&m->m_owner, owner, &old, 1091 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1092 umtxq_lock(&key); 1093 umtxq_signal(&key,1); 1094 umtxq_unbusy(&key); 1095 umtxq_unlock(&key); 1096 umtx_key_release(&key); 1097 if (error == -1) 1098 return (EFAULT); 1099 if (old != owner) 1100 return (EINVAL); 1101 return (0); 1102 } 1103 1104 /* 1105 * Check if the mutex is available and wake up a waiter, 1106 * only for simple mutex. 1107 */ 1108 static int 1109 do_wake_umutex(struct thread *td, struct umutex *m) 1110 { 1111 struct umtx_key key; 1112 uint32_t owner; 1113 uint32_t flags; 1114 int error; 1115 int count; 1116 1117 error = fueword32(&m->m_owner, &owner); 1118 if (error == -1) 1119 return (EFAULT); 1120 1121 if ((owner & ~UMUTEX_CONTESTED) != 0) 1122 return (0); 1123 1124 error = fueword32(&m->m_flags, &flags); 1125 if (error == -1) 1126 return (EFAULT); 1127 1128 /* We should only ever be in here for contested locks */ 1129 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1130 &key)) != 0) 1131 return (error); 1132 1133 umtxq_lock(&key); 1134 umtxq_busy(&key); 1135 count = umtxq_count(&key); 1136 umtxq_unlock(&key); 1137 1138 if (count <= 1) { 1139 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1140 UMUTEX_UNOWNED); 1141 if (error == -1) 1142 error = EFAULT; 1143 } 1144 1145 umtxq_lock(&key); 1146 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1147 umtxq_signal(&key, 1); 1148 umtxq_unbusy(&key); 1149 umtxq_unlock(&key); 1150 umtx_key_release(&key); 1151 return (error); 1152 } 1153 1154 /* 1155 * Check if the mutex has waiters and tries to fix contention bit. 1156 */ 1157 static int 1158 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1159 { 1160 struct umtx_key key; 1161 uint32_t owner, old; 1162 int type; 1163 int error; 1164 int count; 1165 1166 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1167 case 0: 1168 type = TYPE_NORMAL_UMUTEX; 1169 break; 1170 case UMUTEX_PRIO_INHERIT: 1171 type = TYPE_PI_UMUTEX; 1172 break; 1173 case UMUTEX_PRIO_PROTECT: 1174 type = TYPE_PP_UMUTEX; 1175 break; 1176 default: 1177 return (EINVAL); 1178 } 1179 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1180 &key)) != 0) 1181 return (error); 1182 1183 owner = 0; 1184 umtxq_lock(&key); 1185 umtxq_busy(&key); 1186 count = umtxq_count(&key); 1187 umtxq_unlock(&key); 1188 /* 1189 * Only repair contention bit if there is a waiter, this means the mutex 1190 * is still being referenced by userland code, otherwise don't update 1191 * any memory. 1192 */ 1193 if (count > 1) { 1194 error = fueword32(&m->m_owner, &owner); 1195 if (error == -1) 1196 error = EFAULT; 1197 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1198 error = casueword32(&m->m_owner, owner, &old, 1199 owner | UMUTEX_CONTESTED); 1200 if (error == -1) { 1201 error = EFAULT; 1202 break; 1203 } 1204 if (old == owner) 1205 break; 1206 owner = old; 1207 error = umtxq_check_susp(td); 1208 if (error != 0) 1209 break; 1210 } 1211 } else if (count == 1) { 1212 error = fueword32(&m->m_owner, &owner); 1213 if (error == -1) 1214 error = EFAULT; 1215 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1216 (owner & UMUTEX_CONTESTED) == 0) { 1217 error = casueword32(&m->m_owner, owner, &old, 1218 owner | UMUTEX_CONTESTED); 1219 if (error == -1) { 1220 error = EFAULT; 1221 break; 1222 } 1223 if (old == owner) 1224 break; 1225 owner = old; 1226 error = umtxq_check_susp(td); 1227 if (error != 0) 1228 break; 1229 } 1230 } 1231 umtxq_lock(&key); 1232 if (error == EFAULT) { 1233 umtxq_signal(&key, INT_MAX); 1234 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1235 umtxq_signal(&key, 1); 1236 umtxq_unbusy(&key); 1237 umtxq_unlock(&key); 1238 umtx_key_release(&key); 1239 return (error); 1240 } 1241 1242 static inline struct umtx_pi * 1243 umtx_pi_alloc(int flags) 1244 { 1245 struct umtx_pi *pi; 1246 1247 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1248 TAILQ_INIT(&pi->pi_blocked); 1249 atomic_add_int(&umtx_pi_allocated, 1); 1250 return (pi); 1251 } 1252 1253 static inline void 1254 umtx_pi_free(struct umtx_pi *pi) 1255 { 1256 uma_zfree(umtx_pi_zone, pi); 1257 atomic_add_int(&umtx_pi_allocated, -1); 1258 } 1259 1260 /* 1261 * Adjust the thread's position on a pi_state after its priority has been 1262 * changed. 1263 */ 1264 static int 1265 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1266 { 1267 struct umtx_q *uq, *uq1, *uq2; 1268 struct thread *td1; 1269 1270 mtx_assert(&umtx_lock, MA_OWNED); 1271 if (pi == NULL) 1272 return (0); 1273 1274 uq = td->td_umtxq; 1275 1276 /* 1277 * Check if the thread needs to be moved on the blocked chain. 1278 * It needs to be moved if either its priority is lower than 1279 * the previous thread or higher than the next thread. 1280 */ 1281 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1282 uq2 = TAILQ_NEXT(uq, uq_lockq); 1283 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1284 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1285 /* 1286 * Remove thread from blocked chain and determine where 1287 * it should be moved to. 1288 */ 1289 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1290 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1291 td1 = uq1->uq_thread; 1292 MPASS(td1->td_proc->p_magic == P_MAGIC); 1293 if (UPRI(td1) > UPRI(td)) 1294 break; 1295 } 1296 1297 if (uq1 == NULL) 1298 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1299 else 1300 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1301 } 1302 return (1); 1303 } 1304 1305 static struct umtx_pi * 1306 umtx_pi_next(struct umtx_pi *pi) 1307 { 1308 struct umtx_q *uq_owner; 1309 1310 if (pi->pi_owner == NULL) 1311 return (NULL); 1312 uq_owner = pi->pi_owner->td_umtxq; 1313 if (uq_owner == NULL) 1314 return (NULL); 1315 return (uq_owner->uq_pi_blocked); 1316 } 1317 1318 /* 1319 * Floyd's Cycle-Finding Algorithm. 1320 */ 1321 static bool 1322 umtx_pi_check_loop(struct umtx_pi *pi) 1323 { 1324 struct umtx_pi *pi1; /* fast iterator */ 1325 1326 mtx_assert(&umtx_lock, MA_OWNED); 1327 if (pi == NULL) 1328 return (false); 1329 pi1 = pi; 1330 for (;;) { 1331 pi = umtx_pi_next(pi); 1332 if (pi == NULL) 1333 break; 1334 pi1 = umtx_pi_next(pi1); 1335 if (pi1 == NULL) 1336 break; 1337 pi1 = umtx_pi_next(pi1); 1338 if (pi1 == NULL) 1339 break; 1340 if (pi == pi1) 1341 return (true); 1342 } 1343 return (false); 1344 } 1345 1346 /* 1347 * Propagate priority when a thread is blocked on POSIX 1348 * PI mutex. 1349 */ 1350 static void 1351 umtx_propagate_priority(struct thread *td) 1352 { 1353 struct umtx_q *uq; 1354 struct umtx_pi *pi; 1355 int pri; 1356 1357 mtx_assert(&umtx_lock, MA_OWNED); 1358 pri = UPRI(td); 1359 uq = td->td_umtxq; 1360 pi = uq->uq_pi_blocked; 1361 if (pi == NULL) 1362 return; 1363 if (umtx_pi_check_loop(pi)) 1364 return; 1365 1366 for (;;) { 1367 td = pi->pi_owner; 1368 if (td == NULL || td == curthread) 1369 return; 1370 1371 MPASS(td->td_proc != NULL); 1372 MPASS(td->td_proc->p_magic == P_MAGIC); 1373 1374 thread_lock(td); 1375 if (td->td_lend_user_pri > pri) 1376 sched_lend_user_prio(td, pri); 1377 else { 1378 thread_unlock(td); 1379 break; 1380 } 1381 thread_unlock(td); 1382 1383 /* 1384 * Pick up the lock that td is blocked on. 1385 */ 1386 uq = td->td_umtxq; 1387 pi = uq->uq_pi_blocked; 1388 if (pi == NULL) 1389 break; 1390 /* Resort td on the list if needed. */ 1391 umtx_pi_adjust_thread(pi, td); 1392 } 1393 } 1394 1395 /* 1396 * Unpropagate priority for a PI mutex when a thread blocked on 1397 * it is interrupted by signal or resumed by others. 1398 */ 1399 static void 1400 umtx_repropagate_priority(struct umtx_pi *pi) 1401 { 1402 struct umtx_q *uq, *uq_owner; 1403 struct umtx_pi *pi2; 1404 int pri; 1405 1406 mtx_assert(&umtx_lock, MA_OWNED); 1407 1408 if (umtx_pi_check_loop(pi)) 1409 return; 1410 while (pi != NULL && pi->pi_owner != NULL) { 1411 pri = PRI_MAX; 1412 uq_owner = pi->pi_owner->td_umtxq; 1413 1414 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1415 uq = TAILQ_FIRST(&pi2->pi_blocked); 1416 if (uq != NULL) { 1417 if (pri > UPRI(uq->uq_thread)) 1418 pri = UPRI(uq->uq_thread); 1419 } 1420 } 1421 1422 if (pri > uq_owner->uq_inherited_pri) 1423 pri = uq_owner->uq_inherited_pri; 1424 thread_lock(pi->pi_owner); 1425 sched_lend_user_prio(pi->pi_owner, pri); 1426 thread_unlock(pi->pi_owner); 1427 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1428 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1429 } 1430 } 1431 1432 /* 1433 * Insert a PI mutex into owned list. 1434 */ 1435 static void 1436 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1437 { 1438 struct umtx_q *uq_owner; 1439 1440 uq_owner = owner->td_umtxq; 1441 mtx_assert(&umtx_lock, MA_OWNED); 1442 if (pi->pi_owner != NULL) 1443 panic("pi_ower != NULL"); 1444 pi->pi_owner = owner; 1445 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1446 } 1447 1448 /* 1449 * Claim ownership of a PI mutex. 1450 */ 1451 static int 1452 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1453 { 1454 struct umtx_q *uq, *uq_owner; 1455 1456 uq_owner = owner->td_umtxq; 1457 mtx_lock_spin(&umtx_lock); 1458 if (pi->pi_owner == owner) { 1459 mtx_unlock_spin(&umtx_lock); 1460 return (0); 1461 } 1462 1463 if (pi->pi_owner != NULL) { 1464 /* 1465 * userland may have already messed the mutex, sigh. 1466 */ 1467 mtx_unlock_spin(&umtx_lock); 1468 return (EPERM); 1469 } 1470 umtx_pi_setowner(pi, owner); 1471 uq = TAILQ_FIRST(&pi->pi_blocked); 1472 if (uq != NULL) { 1473 int pri; 1474 1475 pri = UPRI(uq->uq_thread); 1476 thread_lock(owner); 1477 if (pri < UPRI(owner)) 1478 sched_lend_user_prio(owner, pri); 1479 thread_unlock(owner); 1480 } 1481 mtx_unlock_spin(&umtx_lock); 1482 return (0); 1483 } 1484 1485 /* 1486 * Adjust a thread's order position in its blocked PI mutex, 1487 * this may result new priority propagating process. 1488 */ 1489 void 1490 umtx_pi_adjust(struct thread *td, u_char oldpri) 1491 { 1492 struct umtx_q *uq; 1493 struct umtx_pi *pi; 1494 1495 uq = td->td_umtxq; 1496 mtx_lock_spin(&umtx_lock); 1497 /* 1498 * Pick up the lock that td is blocked on. 1499 */ 1500 pi = uq->uq_pi_blocked; 1501 if (pi != NULL) { 1502 umtx_pi_adjust_thread(pi, td); 1503 umtx_repropagate_priority(pi); 1504 } 1505 mtx_unlock_spin(&umtx_lock); 1506 } 1507 1508 /* 1509 * Sleep on a PI mutex. 1510 */ 1511 static int 1512 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1513 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1514 { 1515 struct umtxq_chain *uc; 1516 struct thread *td, *td1; 1517 struct umtx_q *uq1; 1518 int pri; 1519 int error = 0; 1520 1521 td = uq->uq_thread; 1522 KASSERT(td == curthread, ("inconsistent uq_thread")); 1523 uc = umtxq_getchain(&uq->uq_key); 1524 UMTXQ_LOCKED_ASSERT(uc); 1525 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1526 umtxq_insert(uq); 1527 mtx_lock_spin(&umtx_lock); 1528 if (pi->pi_owner == NULL) { 1529 mtx_unlock_spin(&umtx_lock); 1530 /* XXX Only look up thread in current process. */ 1531 td1 = tdfind(owner, curproc->p_pid); 1532 mtx_lock_spin(&umtx_lock); 1533 if (td1 != NULL) { 1534 if (pi->pi_owner == NULL) 1535 umtx_pi_setowner(pi, td1); 1536 PROC_UNLOCK(td1->td_proc); 1537 } 1538 } 1539 1540 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1541 pri = UPRI(uq1->uq_thread); 1542 if (pri > UPRI(td)) 1543 break; 1544 } 1545 1546 if (uq1 != NULL) 1547 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1548 else 1549 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1550 1551 uq->uq_pi_blocked = pi; 1552 thread_lock(td); 1553 td->td_flags |= TDF_UPIBLOCKED; 1554 thread_unlock(td); 1555 umtx_propagate_priority(td); 1556 mtx_unlock_spin(&umtx_lock); 1557 umtxq_unbusy(&uq->uq_key); 1558 1559 error = umtxq_sleep(uq, wmesg, timo); 1560 umtxq_remove(uq); 1561 1562 mtx_lock_spin(&umtx_lock); 1563 uq->uq_pi_blocked = NULL; 1564 thread_lock(td); 1565 td->td_flags &= ~TDF_UPIBLOCKED; 1566 thread_unlock(td); 1567 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1568 umtx_repropagate_priority(pi); 1569 mtx_unlock_spin(&umtx_lock); 1570 umtxq_unlock(&uq->uq_key); 1571 1572 return (error); 1573 } 1574 1575 /* 1576 * Add reference count for a PI mutex. 1577 */ 1578 static void 1579 umtx_pi_ref(struct umtx_pi *pi) 1580 { 1581 struct umtxq_chain *uc; 1582 1583 uc = umtxq_getchain(&pi->pi_key); 1584 UMTXQ_LOCKED_ASSERT(uc); 1585 pi->pi_refcount++; 1586 } 1587 1588 /* 1589 * Decrease reference count for a PI mutex, if the counter 1590 * is decreased to zero, its memory space is freed. 1591 */ 1592 static void 1593 umtx_pi_unref(struct umtx_pi *pi) 1594 { 1595 struct umtxq_chain *uc; 1596 1597 uc = umtxq_getchain(&pi->pi_key); 1598 UMTXQ_LOCKED_ASSERT(uc); 1599 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1600 if (--pi->pi_refcount == 0) { 1601 mtx_lock_spin(&umtx_lock); 1602 if (pi->pi_owner != NULL) { 1603 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1604 pi, pi_link); 1605 pi->pi_owner = NULL; 1606 } 1607 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1608 ("blocked queue not empty")); 1609 mtx_unlock_spin(&umtx_lock); 1610 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1611 umtx_pi_free(pi); 1612 } 1613 } 1614 1615 /* 1616 * Find a PI mutex in hash table. 1617 */ 1618 static struct umtx_pi * 1619 umtx_pi_lookup(struct umtx_key *key) 1620 { 1621 struct umtxq_chain *uc; 1622 struct umtx_pi *pi; 1623 1624 uc = umtxq_getchain(key); 1625 UMTXQ_LOCKED_ASSERT(uc); 1626 1627 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1628 if (umtx_key_match(&pi->pi_key, key)) { 1629 return (pi); 1630 } 1631 } 1632 return (NULL); 1633 } 1634 1635 /* 1636 * Insert a PI mutex into hash table. 1637 */ 1638 static inline void 1639 umtx_pi_insert(struct umtx_pi *pi) 1640 { 1641 struct umtxq_chain *uc; 1642 1643 uc = umtxq_getchain(&pi->pi_key); 1644 UMTXQ_LOCKED_ASSERT(uc); 1645 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1646 } 1647 1648 /* 1649 * Lock a PI mutex. 1650 */ 1651 static int 1652 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1653 struct _umtx_time *timeout, int try) 1654 { 1655 struct abs_timeout timo; 1656 struct umtx_q *uq; 1657 struct umtx_pi *pi, *new_pi; 1658 uint32_t id, owner, old; 1659 int error, rv; 1660 1661 id = td->td_tid; 1662 uq = td->td_umtxq; 1663 1664 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1665 &uq->uq_key)) != 0) 1666 return (error); 1667 1668 if (timeout != NULL) 1669 abs_timeout_init2(&timo, timeout); 1670 1671 umtxq_lock(&uq->uq_key); 1672 pi = umtx_pi_lookup(&uq->uq_key); 1673 if (pi == NULL) { 1674 new_pi = umtx_pi_alloc(M_NOWAIT); 1675 if (new_pi == NULL) { 1676 umtxq_unlock(&uq->uq_key); 1677 new_pi = umtx_pi_alloc(M_WAITOK); 1678 umtxq_lock(&uq->uq_key); 1679 pi = umtx_pi_lookup(&uq->uq_key); 1680 if (pi != NULL) { 1681 umtx_pi_free(new_pi); 1682 new_pi = NULL; 1683 } 1684 } 1685 if (new_pi != NULL) { 1686 new_pi->pi_key = uq->uq_key; 1687 umtx_pi_insert(new_pi); 1688 pi = new_pi; 1689 } 1690 } 1691 umtx_pi_ref(pi); 1692 umtxq_unlock(&uq->uq_key); 1693 1694 /* 1695 * Care must be exercised when dealing with umtx structure. It 1696 * can fault on any access. 1697 */ 1698 for (;;) { 1699 /* 1700 * Try the uncontested case. This should be done in userland. 1701 */ 1702 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1703 /* The address was invalid. */ 1704 if (rv == -1) { 1705 error = EFAULT; 1706 break; 1707 } 1708 1709 /* The acquire succeeded. */ 1710 if (owner == UMUTEX_UNOWNED) { 1711 error = 0; 1712 break; 1713 } 1714 1715 /* If no one owns it but it is contested try to acquire it. */ 1716 if (owner == UMUTEX_CONTESTED) { 1717 rv = casueword32(&m->m_owner, 1718 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1719 /* The address was invalid. */ 1720 if (rv == -1) { 1721 error = EFAULT; 1722 break; 1723 } 1724 1725 if (owner == UMUTEX_CONTESTED) { 1726 umtxq_lock(&uq->uq_key); 1727 umtxq_busy(&uq->uq_key); 1728 error = umtx_pi_claim(pi, td); 1729 umtxq_unbusy(&uq->uq_key); 1730 umtxq_unlock(&uq->uq_key); 1731 break; 1732 } 1733 1734 error = umtxq_check_susp(td); 1735 if (error != 0) 1736 break; 1737 1738 /* If this failed the lock has changed, restart. */ 1739 continue; 1740 } 1741 1742 if ((owner & ~UMUTEX_CONTESTED) == id) { 1743 error = EDEADLK; 1744 break; 1745 } 1746 1747 if (try != 0) { 1748 error = EBUSY; 1749 break; 1750 } 1751 1752 /* 1753 * If we caught a signal, we have retried and now 1754 * exit immediately. 1755 */ 1756 if (error != 0) 1757 break; 1758 1759 umtxq_lock(&uq->uq_key); 1760 umtxq_busy(&uq->uq_key); 1761 umtxq_unlock(&uq->uq_key); 1762 1763 /* 1764 * Set the contested bit so that a release in user space 1765 * knows to use the system call for unlock. If this fails 1766 * either some one else has acquired the lock or it has been 1767 * released. 1768 */ 1769 rv = casueword32(&m->m_owner, owner, &old, 1770 owner | UMUTEX_CONTESTED); 1771 1772 /* The address was invalid. */ 1773 if (rv == -1) { 1774 umtxq_unbusy_unlocked(&uq->uq_key); 1775 error = EFAULT; 1776 break; 1777 } 1778 1779 umtxq_lock(&uq->uq_key); 1780 /* 1781 * We set the contested bit, sleep. Otherwise the lock changed 1782 * and we need to retry or we lost a race to the thread 1783 * unlocking the umtx. 1784 */ 1785 if (old == owner) { 1786 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1787 "umtxpi", timeout == NULL ? NULL : &timo); 1788 if (error != 0) 1789 continue; 1790 } else { 1791 umtxq_unbusy(&uq->uq_key); 1792 umtxq_unlock(&uq->uq_key); 1793 } 1794 1795 error = umtxq_check_susp(td); 1796 if (error != 0) 1797 break; 1798 } 1799 1800 umtxq_lock(&uq->uq_key); 1801 umtx_pi_unref(pi); 1802 umtxq_unlock(&uq->uq_key); 1803 1804 umtx_key_release(&uq->uq_key); 1805 return (error); 1806 } 1807 1808 /* 1809 * Unlock a PI mutex. 1810 */ 1811 static int 1812 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1813 { 1814 struct umtx_key key; 1815 struct umtx_q *uq_first, *uq_first2, *uq_me; 1816 struct umtx_pi *pi, *pi2; 1817 uint32_t owner, old, id; 1818 int error; 1819 int count; 1820 int pri; 1821 1822 id = td->td_tid; 1823 /* 1824 * Make sure we own this mtx. 1825 */ 1826 error = fueword32(&m->m_owner, &owner); 1827 if (error == -1) 1828 return (EFAULT); 1829 1830 if ((owner & ~UMUTEX_CONTESTED) != id) 1831 return (EPERM); 1832 1833 /* This should be done in userland */ 1834 if ((owner & UMUTEX_CONTESTED) == 0) { 1835 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1836 if (error == -1) 1837 return (EFAULT); 1838 if (old == owner) 1839 return (0); 1840 owner = old; 1841 } 1842 1843 /* We should only ever be in here for contested locks */ 1844 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1845 &key)) != 0) 1846 return (error); 1847 1848 umtxq_lock(&key); 1849 umtxq_busy(&key); 1850 count = umtxq_count_pi(&key, &uq_first); 1851 if (uq_first != NULL) { 1852 mtx_lock_spin(&umtx_lock); 1853 pi = uq_first->uq_pi_blocked; 1854 KASSERT(pi != NULL, ("pi == NULL?")); 1855 if (pi->pi_owner != curthread) { 1856 mtx_unlock_spin(&umtx_lock); 1857 umtxq_unbusy(&key); 1858 umtxq_unlock(&key); 1859 umtx_key_release(&key); 1860 /* userland messed the mutex */ 1861 return (EPERM); 1862 } 1863 uq_me = curthread->td_umtxq; 1864 pi->pi_owner = NULL; 1865 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1866 /* get highest priority thread which is still sleeping. */ 1867 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1868 while (uq_first != NULL && 1869 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 1870 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 1871 } 1872 pri = PRI_MAX; 1873 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1874 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1875 if (uq_first2 != NULL) { 1876 if (pri > UPRI(uq_first2->uq_thread)) 1877 pri = UPRI(uq_first2->uq_thread); 1878 } 1879 } 1880 thread_lock(curthread); 1881 sched_lend_user_prio(curthread, pri); 1882 thread_unlock(curthread); 1883 mtx_unlock_spin(&umtx_lock); 1884 if (uq_first) 1885 umtxq_signal_thread(uq_first); 1886 } 1887 umtxq_unlock(&key); 1888 1889 /* 1890 * When unlocking the umtx, it must be marked as unowned if 1891 * there is zero or one thread only waiting for it. 1892 * Otherwise, it must be marked as contested. 1893 */ 1894 error = casueword32(&m->m_owner, owner, &old, 1895 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1896 1897 umtxq_unbusy_unlocked(&key); 1898 umtx_key_release(&key); 1899 if (error == -1) 1900 return (EFAULT); 1901 if (old != owner) 1902 return (EINVAL); 1903 return (0); 1904 } 1905 1906 /* 1907 * Lock a PP mutex. 1908 */ 1909 static int 1910 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 1911 struct _umtx_time *timeout, int try) 1912 { 1913 struct abs_timeout timo; 1914 struct umtx_q *uq, *uq2; 1915 struct umtx_pi *pi; 1916 uint32_t ceiling; 1917 uint32_t owner, id; 1918 int error, pri, old_inherited_pri, su, rv; 1919 1920 id = td->td_tid; 1921 uq = td->td_umtxq; 1922 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1923 &uq->uq_key)) != 0) 1924 return (error); 1925 1926 if (timeout != NULL) 1927 abs_timeout_init2(&timo, timeout); 1928 1929 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1930 for (;;) { 1931 old_inherited_pri = uq->uq_inherited_pri; 1932 umtxq_lock(&uq->uq_key); 1933 umtxq_busy(&uq->uq_key); 1934 umtxq_unlock(&uq->uq_key); 1935 1936 rv = fueword32(&m->m_ceilings[0], &ceiling); 1937 if (rv == -1) { 1938 error = EFAULT; 1939 goto out; 1940 } 1941 ceiling = RTP_PRIO_MAX - ceiling; 1942 if (ceiling > RTP_PRIO_MAX) { 1943 error = EINVAL; 1944 goto out; 1945 } 1946 1947 mtx_lock_spin(&umtx_lock); 1948 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1949 mtx_unlock_spin(&umtx_lock); 1950 error = EINVAL; 1951 goto out; 1952 } 1953 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1954 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1955 thread_lock(td); 1956 if (uq->uq_inherited_pri < UPRI(td)) 1957 sched_lend_user_prio(td, uq->uq_inherited_pri); 1958 thread_unlock(td); 1959 } 1960 mtx_unlock_spin(&umtx_lock); 1961 1962 rv = casueword32(&m->m_owner, 1963 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1964 /* The address was invalid. */ 1965 if (rv == -1) { 1966 error = EFAULT; 1967 break; 1968 } 1969 1970 if (owner == UMUTEX_CONTESTED) { 1971 error = 0; 1972 break; 1973 } 1974 1975 if (try != 0) { 1976 error = EBUSY; 1977 break; 1978 } 1979 1980 /* 1981 * If we caught a signal, we have retried and now 1982 * exit immediately. 1983 */ 1984 if (error != 0) 1985 break; 1986 1987 umtxq_lock(&uq->uq_key); 1988 umtxq_insert(uq); 1989 umtxq_unbusy(&uq->uq_key); 1990 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 1991 NULL : &timo); 1992 umtxq_remove(uq); 1993 umtxq_unlock(&uq->uq_key); 1994 1995 mtx_lock_spin(&umtx_lock); 1996 uq->uq_inherited_pri = old_inherited_pri; 1997 pri = PRI_MAX; 1998 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1999 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2000 if (uq2 != NULL) { 2001 if (pri > UPRI(uq2->uq_thread)) 2002 pri = UPRI(uq2->uq_thread); 2003 } 2004 } 2005 if (pri > uq->uq_inherited_pri) 2006 pri = uq->uq_inherited_pri; 2007 thread_lock(td); 2008 sched_lend_user_prio(td, pri); 2009 thread_unlock(td); 2010 mtx_unlock_spin(&umtx_lock); 2011 } 2012 2013 if (error != 0) { 2014 mtx_lock_spin(&umtx_lock); 2015 uq->uq_inherited_pri = old_inherited_pri; 2016 pri = PRI_MAX; 2017 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2018 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2019 if (uq2 != NULL) { 2020 if (pri > UPRI(uq2->uq_thread)) 2021 pri = UPRI(uq2->uq_thread); 2022 } 2023 } 2024 if (pri > uq->uq_inherited_pri) 2025 pri = uq->uq_inherited_pri; 2026 thread_lock(td); 2027 sched_lend_user_prio(td, pri); 2028 thread_unlock(td); 2029 mtx_unlock_spin(&umtx_lock); 2030 } 2031 2032 out: 2033 umtxq_unbusy_unlocked(&uq->uq_key); 2034 umtx_key_release(&uq->uq_key); 2035 return (error); 2036 } 2037 2038 /* 2039 * Unlock a PP mutex. 2040 */ 2041 static int 2042 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2043 { 2044 struct umtx_key key; 2045 struct umtx_q *uq, *uq2; 2046 struct umtx_pi *pi; 2047 uint32_t owner, id; 2048 uint32_t rceiling; 2049 int error, pri, new_inherited_pri, su; 2050 2051 id = td->td_tid; 2052 uq = td->td_umtxq; 2053 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2054 2055 /* 2056 * Make sure we own this mtx. 2057 */ 2058 error = fueword32(&m->m_owner, &owner); 2059 if (error == -1) 2060 return (EFAULT); 2061 2062 if ((owner & ~UMUTEX_CONTESTED) != id) 2063 return (EPERM); 2064 2065 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2066 if (error != 0) 2067 return (error); 2068 2069 if (rceiling == -1) 2070 new_inherited_pri = PRI_MAX; 2071 else { 2072 rceiling = RTP_PRIO_MAX - rceiling; 2073 if (rceiling > RTP_PRIO_MAX) 2074 return (EINVAL); 2075 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2076 } 2077 2078 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2079 &key)) != 0) 2080 return (error); 2081 umtxq_lock(&key); 2082 umtxq_busy(&key); 2083 umtxq_unlock(&key); 2084 /* 2085 * For priority protected mutex, always set unlocked state 2086 * to UMUTEX_CONTESTED, so that userland always enters kernel 2087 * to lock the mutex, it is necessary because thread priority 2088 * has to be adjusted for such mutex. 2089 */ 2090 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 2091 2092 umtxq_lock(&key); 2093 if (error == 0) 2094 umtxq_signal(&key, 1); 2095 umtxq_unbusy(&key); 2096 umtxq_unlock(&key); 2097 2098 if (error == -1) 2099 error = EFAULT; 2100 else { 2101 mtx_lock_spin(&umtx_lock); 2102 if (su != 0) 2103 uq->uq_inherited_pri = new_inherited_pri; 2104 pri = PRI_MAX; 2105 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2106 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2107 if (uq2 != NULL) { 2108 if (pri > UPRI(uq2->uq_thread)) 2109 pri = UPRI(uq2->uq_thread); 2110 } 2111 } 2112 if (pri > uq->uq_inherited_pri) 2113 pri = uq->uq_inherited_pri; 2114 thread_lock(td); 2115 sched_lend_user_prio(td, pri); 2116 thread_unlock(td); 2117 mtx_unlock_spin(&umtx_lock); 2118 } 2119 umtx_key_release(&key); 2120 return (error); 2121 } 2122 2123 static int 2124 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2125 uint32_t *old_ceiling) 2126 { 2127 struct umtx_q *uq; 2128 uint32_t save_ceiling; 2129 uint32_t owner, id; 2130 uint32_t flags; 2131 int error, rv; 2132 2133 error = fueword32(&m->m_flags, &flags); 2134 if (error == -1) 2135 return (EFAULT); 2136 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2137 return (EINVAL); 2138 if (ceiling > RTP_PRIO_MAX) 2139 return (EINVAL); 2140 id = td->td_tid; 2141 uq = td->td_umtxq; 2142 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2143 &uq->uq_key)) != 0) 2144 return (error); 2145 for (;;) { 2146 umtxq_lock(&uq->uq_key); 2147 umtxq_busy(&uq->uq_key); 2148 umtxq_unlock(&uq->uq_key); 2149 2150 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2151 if (rv == -1) { 2152 error = EFAULT; 2153 break; 2154 } 2155 2156 rv = casueword32(&m->m_owner, 2157 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2158 if (rv == -1) { 2159 error = EFAULT; 2160 break; 2161 } 2162 2163 if (owner == UMUTEX_CONTESTED) { 2164 suword32(&m->m_ceilings[0], ceiling); 2165 suword32(&m->m_owner, UMUTEX_CONTESTED); 2166 error = 0; 2167 break; 2168 } 2169 2170 if ((owner & ~UMUTEX_CONTESTED) == id) { 2171 suword32(&m->m_ceilings[0], ceiling); 2172 error = 0; 2173 break; 2174 } 2175 2176 /* 2177 * If we caught a signal, we have retried and now 2178 * exit immediately. 2179 */ 2180 if (error != 0) 2181 break; 2182 2183 /* 2184 * We set the contested bit, sleep. Otherwise the lock changed 2185 * and we need to retry or we lost a race to the thread 2186 * unlocking the umtx. 2187 */ 2188 umtxq_lock(&uq->uq_key); 2189 umtxq_insert(uq); 2190 umtxq_unbusy(&uq->uq_key); 2191 error = umtxq_sleep(uq, "umtxpp", NULL); 2192 umtxq_remove(uq); 2193 umtxq_unlock(&uq->uq_key); 2194 } 2195 umtxq_lock(&uq->uq_key); 2196 if (error == 0) 2197 umtxq_signal(&uq->uq_key, INT_MAX); 2198 umtxq_unbusy(&uq->uq_key); 2199 umtxq_unlock(&uq->uq_key); 2200 umtx_key_release(&uq->uq_key); 2201 if (error == 0 && old_ceiling != NULL) 2202 suword32(old_ceiling, save_ceiling); 2203 return (error); 2204 } 2205 2206 /* 2207 * Lock a userland POSIX mutex. 2208 */ 2209 static int 2210 do_lock_umutex(struct thread *td, struct umutex *m, 2211 struct _umtx_time *timeout, int mode) 2212 { 2213 uint32_t flags; 2214 int error; 2215 2216 error = fueword32(&m->m_flags, &flags); 2217 if (error == -1) 2218 return (EFAULT); 2219 2220 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2221 case 0: 2222 error = do_lock_normal(td, m, flags, timeout, mode); 2223 break; 2224 case UMUTEX_PRIO_INHERIT: 2225 error = do_lock_pi(td, m, flags, timeout, mode); 2226 break; 2227 case UMUTEX_PRIO_PROTECT: 2228 error = do_lock_pp(td, m, flags, timeout, mode); 2229 break; 2230 default: 2231 return (EINVAL); 2232 } 2233 if (timeout == NULL) { 2234 if (error == EINTR && mode != _UMUTEX_WAIT) 2235 error = ERESTART; 2236 } else { 2237 /* Timed-locking is not restarted. */ 2238 if (error == ERESTART) 2239 error = EINTR; 2240 } 2241 return (error); 2242 } 2243 2244 /* 2245 * Unlock a userland POSIX mutex. 2246 */ 2247 static int 2248 do_unlock_umutex(struct thread *td, struct umutex *m) 2249 { 2250 uint32_t flags; 2251 int error; 2252 2253 error = fueword32(&m->m_flags, &flags); 2254 if (error == -1) 2255 return (EFAULT); 2256 2257 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2258 case 0: 2259 return (do_unlock_normal(td, m, flags)); 2260 case UMUTEX_PRIO_INHERIT: 2261 return (do_unlock_pi(td, m, flags)); 2262 case UMUTEX_PRIO_PROTECT: 2263 return (do_unlock_pp(td, m, flags)); 2264 } 2265 2266 return (EINVAL); 2267 } 2268 2269 static int 2270 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2271 struct timespec *timeout, u_long wflags) 2272 { 2273 struct abs_timeout timo; 2274 struct umtx_q *uq; 2275 uint32_t flags, clockid, hasw; 2276 int error; 2277 2278 uq = td->td_umtxq; 2279 error = fueword32(&cv->c_flags, &flags); 2280 if (error == -1) 2281 return (EFAULT); 2282 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2283 if (error != 0) 2284 return (error); 2285 2286 if ((wflags & CVWAIT_CLOCKID) != 0) { 2287 error = fueword32(&cv->c_clockid, &clockid); 2288 if (error == -1) { 2289 umtx_key_release(&uq->uq_key); 2290 return (EFAULT); 2291 } 2292 if (clockid < CLOCK_REALTIME || 2293 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2294 /* hmm, only HW clock id will work. */ 2295 umtx_key_release(&uq->uq_key); 2296 return (EINVAL); 2297 } 2298 } else { 2299 clockid = CLOCK_REALTIME; 2300 } 2301 2302 umtxq_lock(&uq->uq_key); 2303 umtxq_busy(&uq->uq_key); 2304 umtxq_insert(uq); 2305 umtxq_unlock(&uq->uq_key); 2306 2307 /* 2308 * Set c_has_waiters to 1 before releasing user mutex, also 2309 * don't modify cache line when unnecessary. 2310 */ 2311 error = fueword32(&cv->c_has_waiters, &hasw); 2312 if (error == 0 && hasw == 0) 2313 suword32(&cv->c_has_waiters, 1); 2314 2315 umtxq_unbusy_unlocked(&uq->uq_key); 2316 2317 error = do_unlock_umutex(td, m); 2318 2319 if (timeout != NULL) 2320 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2321 timeout); 2322 2323 umtxq_lock(&uq->uq_key); 2324 if (error == 0) { 2325 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2326 NULL : &timo); 2327 } 2328 2329 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2330 error = 0; 2331 else { 2332 /* 2333 * This must be timeout,interrupted by signal or 2334 * surprious wakeup, clear c_has_waiter flag when 2335 * necessary. 2336 */ 2337 umtxq_busy(&uq->uq_key); 2338 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2339 int oldlen = uq->uq_cur_queue->length; 2340 umtxq_remove(uq); 2341 if (oldlen == 1) { 2342 umtxq_unlock(&uq->uq_key); 2343 suword32(&cv->c_has_waiters, 0); 2344 umtxq_lock(&uq->uq_key); 2345 } 2346 } 2347 umtxq_unbusy(&uq->uq_key); 2348 if (error == ERESTART) 2349 error = EINTR; 2350 } 2351 2352 umtxq_unlock(&uq->uq_key); 2353 umtx_key_release(&uq->uq_key); 2354 return (error); 2355 } 2356 2357 /* 2358 * Signal a userland condition variable. 2359 */ 2360 static int 2361 do_cv_signal(struct thread *td, struct ucond *cv) 2362 { 2363 struct umtx_key key; 2364 int error, cnt, nwake; 2365 uint32_t flags; 2366 2367 error = fueword32(&cv->c_flags, &flags); 2368 if (error == -1) 2369 return (EFAULT); 2370 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2371 return (error); 2372 umtxq_lock(&key); 2373 umtxq_busy(&key); 2374 cnt = umtxq_count(&key); 2375 nwake = umtxq_signal(&key, 1); 2376 if (cnt <= nwake) { 2377 umtxq_unlock(&key); 2378 error = suword32(&cv->c_has_waiters, 0); 2379 if (error == -1) 2380 error = EFAULT; 2381 umtxq_lock(&key); 2382 } 2383 umtxq_unbusy(&key); 2384 umtxq_unlock(&key); 2385 umtx_key_release(&key); 2386 return (error); 2387 } 2388 2389 static int 2390 do_cv_broadcast(struct thread *td, struct ucond *cv) 2391 { 2392 struct umtx_key key; 2393 int error; 2394 uint32_t flags; 2395 2396 error = fueword32(&cv->c_flags, &flags); 2397 if (error == -1) 2398 return (EFAULT); 2399 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2400 return (error); 2401 2402 umtxq_lock(&key); 2403 umtxq_busy(&key); 2404 umtxq_signal(&key, INT_MAX); 2405 umtxq_unlock(&key); 2406 2407 error = suword32(&cv->c_has_waiters, 0); 2408 if (error == -1) 2409 error = EFAULT; 2410 2411 umtxq_unbusy_unlocked(&key); 2412 2413 umtx_key_release(&key); 2414 return (error); 2415 } 2416 2417 static int 2418 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2419 { 2420 struct abs_timeout timo; 2421 struct umtx_q *uq; 2422 uint32_t flags, wrflags; 2423 int32_t state, oldstate; 2424 int32_t blocked_readers; 2425 int error, rv; 2426 2427 uq = td->td_umtxq; 2428 error = fueword32(&rwlock->rw_flags, &flags); 2429 if (error == -1) 2430 return (EFAULT); 2431 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2432 if (error != 0) 2433 return (error); 2434 2435 if (timeout != NULL) 2436 abs_timeout_init2(&timo, timeout); 2437 2438 wrflags = URWLOCK_WRITE_OWNER; 2439 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2440 wrflags |= URWLOCK_WRITE_WAITERS; 2441 2442 for (;;) { 2443 rv = fueword32(&rwlock->rw_state, &state); 2444 if (rv == -1) { 2445 umtx_key_release(&uq->uq_key); 2446 return (EFAULT); 2447 } 2448 2449 /* try to lock it */ 2450 while (!(state & wrflags)) { 2451 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2452 umtx_key_release(&uq->uq_key); 2453 return (EAGAIN); 2454 } 2455 rv = casueword32(&rwlock->rw_state, state, 2456 &oldstate, state + 1); 2457 if (rv == -1) { 2458 umtx_key_release(&uq->uq_key); 2459 return (EFAULT); 2460 } 2461 if (oldstate == state) { 2462 umtx_key_release(&uq->uq_key); 2463 return (0); 2464 } 2465 error = umtxq_check_susp(td); 2466 if (error != 0) 2467 break; 2468 state = oldstate; 2469 } 2470 2471 if (error) 2472 break; 2473 2474 /* grab monitor lock */ 2475 umtxq_lock(&uq->uq_key); 2476 umtxq_busy(&uq->uq_key); 2477 umtxq_unlock(&uq->uq_key); 2478 2479 /* 2480 * re-read the state, in case it changed between the try-lock above 2481 * and the check below 2482 */ 2483 rv = fueword32(&rwlock->rw_state, &state); 2484 if (rv == -1) 2485 error = EFAULT; 2486 2487 /* set read contention bit */ 2488 while (error == 0 && (state & wrflags) && 2489 !(state & URWLOCK_READ_WAITERS)) { 2490 rv = casueword32(&rwlock->rw_state, state, 2491 &oldstate, state | URWLOCK_READ_WAITERS); 2492 if (rv == -1) { 2493 error = EFAULT; 2494 break; 2495 } 2496 if (oldstate == state) 2497 goto sleep; 2498 state = oldstate; 2499 error = umtxq_check_susp(td); 2500 if (error != 0) 2501 break; 2502 } 2503 if (error != 0) { 2504 umtxq_unbusy_unlocked(&uq->uq_key); 2505 break; 2506 } 2507 2508 /* state is changed while setting flags, restart */ 2509 if (!(state & wrflags)) { 2510 umtxq_unbusy_unlocked(&uq->uq_key); 2511 error = umtxq_check_susp(td); 2512 if (error != 0) 2513 break; 2514 continue; 2515 } 2516 2517 sleep: 2518 /* contention bit is set, before sleeping, increase read waiter count */ 2519 rv = fueword32(&rwlock->rw_blocked_readers, 2520 &blocked_readers); 2521 if (rv == -1) { 2522 umtxq_unbusy_unlocked(&uq->uq_key); 2523 error = EFAULT; 2524 break; 2525 } 2526 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2527 2528 while (state & wrflags) { 2529 umtxq_lock(&uq->uq_key); 2530 umtxq_insert(uq); 2531 umtxq_unbusy(&uq->uq_key); 2532 2533 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2534 NULL : &timo); 2535 2536 umtxq_busy(&uq->uq_key); 2537 umtxq_remove(uq); 2538 umtxq_unlock(&uq->uq_key); 2539 if (error) 2540 break; 2541 rv = fueword32(&rwlock->rw_state, &state); 2542 if (rv == -1) { 2543 error = EFAULT; 2544 break; 2545 } 2546 } 2547 2548 /* decrease read waiter count, and may clear read contention bit */ 2549 rv = fueword32(&rwlock->rw_blocked_readers, 2550 &blocked_readers); 2551 if (rv == -1) { 2552 umtxq_unbusy_unlocked(&uq->uq_key); 2553 error = EFAULT; 2554 break; 2555 } 2556 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2557 if (blocked_readers == 1) { 2558 rv = fueword32(&rwlock->rw_state, &state); 2559 if (rv == -1) 2560 error = EFAULT; 2561 while (error == 0) { 2562 rv = casueword32(&rwlock->rw_state, state, 2563 &oldstate, state & ~URWLOCK_READ_WAITERS); 2564 if (rv == -1) { 2565 error = EFAULT; 2566 break; 2567 } 2568 if (oldstate == state) 2569 break; 2570 state = oldstate; 2571 error = umtxq_check_susp(td); 2572 } 2573 } 2574 2575 umtxq_unbusy_unlocked(&uq->uq_key); 2576 if (error != 0) 2577 break; 2578 } 2579 umtx_key_release(&uq->uq_key); 2580 if (error == ERESTART) 2581 error = EINTR; 2582 return (error); 2583 } 2584 2585 static int 2586 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2587 { 2588 struct abs_timeout timo; 2589 struct umtx_q *uq; 2590 uint32_t flags; 2591 int32_t state, oldstate; 2592 int32_t blocked_writers; 2593 int32_t blocked_readers; 2594 int error, rv; 2595 2596 uq = td->td_umtxq; 2597 error = fueword32(&rwlock->rw_flags, &flags); 2598 if (error == -1) 2599 return (EFAULT); 2600 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2601 if (error != 0) 2602 return (error); 2603 2604 if (timeout != NULL) 2605 abs_timeout_init2(&timo, timeout); 2606 2607 blocked_readers = 0; 2608 for (;;) { 2609 rv = fueword32(&rwlock->rw_state, &state); 2610 if (rv == -1) { 2611 umtx_key_release(&uq->uq_key); 2612 return (EFAULT); 2613 } 2614 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2615 rv = casueword32(&rwlock->rw_state, state, 2616 &oldstate, state | URWLOCK_WRITE_OWNER); 2617 if (rv == -1) { 2618 umtx_key_release(&uq->uq_key); 2619 return (EFAULT); 2620 } 2621 if (oldstate == state) { 2622 umtx_key_release(&uq->uq_key); 2623 return (0); 2624 } 2625 state = oldstate; 2626 error = umtxq_check_susp(td); 2627 if (error != 0) 2628 break; 2629 } 2630 2631 if (error) { 2632 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2633 blocked_readers != 0) { 2634 umtxq_lock(&uq->uq_key); 2635 umtxq_busy(&uq->uq_key); 2636 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2637 umtxq_unbusy(&uq->uq_key); 2638 umtxq_unlock(&uq->uq_key); 2639 } 2640 2641 break; 2642 } 2643 2644 /* grab monitor lock */ 2645 umtxq_lock(&uq->uq_key); 2646 umtxq_busy(&uq->uq_key); 2647 umtxq_unlock(&uq->uq_key); 2648 2649 /* 2650 * re-read the state, in case it changed between the try-lock above 2651 * and the check below 2652 */ 2653 rv = fueword32(&rwlock->rw_state, &state); 2654 if (rv == -1) 2655 error = EFAULT; 2656 2657 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2658 URWLOCK_READER_COUNT(state) != 0) && 2659 (state & URWLOCK_WRITE_WAITERS) == 0) { 2660 rv = casueword32(&rwlock->rw_state, state, 2661 &oldstate, state | URWLOCK_WRITE_WAITERS); 2662 if (rv == -1) { 2663 error = EFAULT; 2664 break; 2665 } 2666 if (oldstate == state) 2667 goto sleep; 2668 state = oldstate; 2669 error = umtxq_check_susp(td); 2670 if (error != 0) 2671 break; 2672 } 2673 if (error != 0) { 2674 umtxq_unbusy_unlocked(&uq->uq_key); 2675 break; 2676 } 2677 2678 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2679 umtxq_unbusy_unlocked(&uq->uq_key); 2680 error = umtxq_check_susp(td); 2681 if (error != 0) 2682 break; 2683 continue; 2684 } 2685 sleep: 2686 rv = fueword32(&rwlock->rw_blocked_writers, 2687 &blocked_writers); 2688 if (rv == -1) { 2689 umtxq_unbusy_unlocked(&uq->uq_key); 2690 error = EFAULT; 2691 break; 2692 } 2693 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2694 2695 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2696 umtxq_lock(&uq->uq_key); 2697 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2698 umtxq_unbusy(&uq->uq_key); 2699 2700 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2701 NULL : &timo); 2702 2703 umtxq_busy(&uq->uq_key); 2704 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2705 umtxq_unlock(&uq->uq_key); 2706 if (error) 2707 break; 2708 rv = fueword32(&rwlock->rw_state, &state); 2709 if (rv == -1) { 2710 error = EFAULT; 2711 break; 2712 } 2713 } 2714 2715 rv = fueword32(&rwlock->rw_blocked_writers, 2716 &blocked_writers); 2717 if (rv == -1) { 2718 umtxq_unbusy_unlocked(&uq->uq_key); 2719 error = EFAULT; 2720 break; 2721 } 2722 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2723 if (blocked_writers == 1) { 2724 rv = fueword32(&rwlock->rw_state, &state); 2725 if (rv == -1) { 2726 umtxq_unbusy_unlocked(&uq->uq_key); 2727 error = EFAULT; 2728 break; 2729 } 2730 for (;;) { 2731 rv = casueword32(&rwlock->rw_state, state, 2732 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2733 if (rv == -1) { 2734 error = EFAULT; 2735 break; 2736 } 2737 if (oldstate == state) 2738 break; 2739 state = oldstate; 2740 error = umtxq_check_susp(td); 2741 /* 2742 * We are leaving the URWLOCK_WRITE_WAITERS 2743 * behind, but this should not harm the 2744 * correctness. 2745 */ 2746 if (error != 0) 2747 break; 2748 } 2749 rv = fueword32(&rwlock->rw_blocked_readers, 2750 &blocked_readers); 2751 if (rv == -1) { 2752 umtxq_unbusy_unlocked(&uq->uq_key); 2753 error = EFAULT; 2754 break; 2755 } 2756 } else 2757 blocked_readers = 0; 2758 2759 umtxq_unbusy_unlocked(&uq->uq_key); 2760 } 2761 2762 umtx_key_release(&uq->uq_key); 2763 if (error == ERESTART) 2764 error = EINTR; 2765 return (error); 2766 } 2767 2768 static int 2769 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2770 { 2771 struct umtx_q *uq; 2772 uint32_t flags; 2773 int32_t state, oldstate; 2774 int error, rv, q, count; 2775 2776 uq = td->td_umtxq; 2777 error = fueword32(&rwlock->rw_flags, &flags); 2778 if (error == -1) 2779 return (EFAULT); 2780 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2781 if (error != 0) 2782 return (error); 2783 2784 error = fueword32(&rwlock->rw_state, &state); 2785 if (error == -1) { 2786 error = EFAULT; 2787 goto out; 2788 } 2789 if (state & URWLOCK_WRITE_OWNER) { 2790 for (;;) { 2791 rv = casueword32(&rwlock->rw_state, state, 2792 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2793 if (rv == -1) { 2794 error = EFAULT; 2795 goto out; 2796 } 2797 if (oldstate != state) { 2798 state = oldstate; 2799 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2800 error = EPERM; 2801 goto out; 2802 } 2803 error = umtxq_check_susp(td); 2804 if (error != 0) 2805 goto out; 2806 } else 2807 break; 2808 } 2809 } else if (URWLOCK_READER_COUNT(state) != 0) { 2810 for (;;) { 2811 rv = casueword32(&rwlock->rw_state, state, 2812 &oldstate, state - 1); 2813 if (rv == -1) { 2814 error = EFAULT; 2815 goto out; 2816 } 2817 if (oldstate != state) { 2818 state = oldstate; 2819 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2820 error = EPERM; 2821 goto out; 2822 } 2823 error = umtxq_check_susp(td); 2824 if (error != 0) 2825 goto out; 2826 } else 2827 break; 2828 } 2829 } else { 2830 error = EPERM; 2831 goto out; 2832 } 2833 2834 count = 0; 2835 2836 if (!(flags & URWLOCK_PREFER_READER)) { 2837 if (state & URWLOCK_WRITE_WAITERS) { 2838 count = 1; 2839 q = UMTX_EXCLUSIVE_QUEUE; 2840 } else if (state & URWLOCK_READ_WAITERS) { 2841 count = INT_MAX; 2842 q = UMTX_SHARED_QUEUE; 2843 } 2844 } else { 2845 if (state & URWLOCK_READ_WAITERS) { 2846 count = INT_MAX; 2847 q = UMTX_SHARED_QUEUE; 2848 } else if (state & URWLOCK_WRITE_WAITERS) { 2849 count = 1; 2850 q = UMTX_EXCLUSIVE_QUEUE; 2851 } 2852 } 2853 2854 if (count) { 2855 umtxq_lock(&uq->uq_key); 2856 umtxq_busy(&uq->uq_key); 2857 umtxq_signal_queue(&uq->uq_key, count, q); 2858 umtxq_unbusy(&uq->uq_key); 2859 umtxq_unlock(&uq->uq_key); 2860 } 2861 out: 2862 umtx_key_release(&uq->uq_key); 2863 return (error); 2864 } 2865 2866 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 2867 static int 2868 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 2869 { 2870 struct abs_timeout timo; 2871 struct umtx_q *uq; 2872 uint32_t flags, count, count1; 2873 int error, rv; 2874 2875 uq = td->td_umtxq; 2876 error = fueword32(&sem->_flags, &flags); 2877 if (error == -1) 2878 return (EFAULT); 2879 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2880 if (error != 0) 2881 return (error); 2882 2883 if (timeout != NULL) 2884 abs_timeout_init2(&timo, timeout); 2885 2886 umtxq_lock(&uq->uq_key); 2887 umtxq_busy(&uq->uq_key); 2888 umtxq_insert(uq); 2889 umtxq_unlock(&uq->uq_key); 2890 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 2891 if (rv == 0) 2892 rv = fueword32(&sem->_count, &count); 2893 if (rv == -1 || count != 0) { 2894 umtxq_lock(&uq->uq_key); 2895 umtxq_unbusy(&uq->uq_key); 2896 umtxq_remove(uq); 2897 umtxq_unlock(&uq->uq_key); 2898 umtx_key_release(&uq->uq_key); 2899 return (rv == -1 ? EFAULT : 0); 2900 } 2901 umtxq_lock(&uq->uq_key); 2902 umtxq_unbusy(&uq->uq_key); 2903 2904 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2905 2906 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2907 error = 0; 2908 else { 2909 umtxq_remove(uq); 2910 /* A relative timeout cannot be restarted. */ 2911 if (error == ERESTART && timeout != NULL && 2912 (timeout->_flags & UMTX_ABSTIME) == 0) 2913 error = EINTR; 2914 } 2915 umtxq_unlock(&uq->uq_key); 2916 umtx_key_release(&uq->uq_key); 2917 return (error); 2918 } 2919 2920 /* 2921 * Signal a userland semaphore. 2922 */ 2923 static int 2924 do_sem_wake(struct thread *td, struct _usem *sem) 2925 { 2926 struct umtx_key key; 2927 int error, cnt; 2928 uint32_t flags; 2929 2930 error = fueword32(&sem->_flags, &flags); 2931 if (error == -1) 2932 return (EFAULT); 2933 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2934 return (error); 2935 umtxq_lock(&key); 2936 umtxq_busy(&key); 2937 cnt = umtxq_count(&key); 2938 if (cnt > 0) { 2939 umtxq_signal(&key, 1); 2940 /* 2941 * Check if count is greater than 0, this means the memory is 2942 * still being referenced by user code, so we can safely 2943 * update _has_waiters flag. 2944 */ 2945 if (cnt == 1) { 2946 umtxq_unlock(&key); 2947 error = suword32(&sem->_has_waiters, 0); 2948 umtxq_lock(&key); 2949 if (error == -1) 2950 error = EFAULT; 2951 } 2952 } 2953 umtxq_unbusy(&key); 2954 umtxq_unlock(&key); 2955 umtx_key_release(&key); 2956 return (error); 2957 } 2958 #endif 2959 2960 static int 2961 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 2962 { 2963 struct abs_timeout timo; 2964 struct umtx_q *uq; 2965 uint32_t count, flags; 2966 int error, rv; 2967 2968 uq = td->td_umtxq; 2969 flags = fuword32(&sem->_flags); 2970 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2971 if (error != 0) 2972 return (error); 2973 2974 if (timeout != NULL) 2975 abs_timeout_init2(&timo, timeout); 2976 2977 umtxq_lock(&uq->uq_key); 2978 umtxq_busy(&uq->uq_key); 2979 umtxq_insert(uq); 2980 umtxq_unlock(&uq->uq_key); 2981 rv = fueword32(&sem->_count, &count); 2982 if (rv == -1) { 2983 umtxq_lock(&uq->uq_key); 2984 umtxq_unbusy(&uq->uq_key); 2985 umtxq_remove(uq); 2986 umtxq_unlock(&uq->uq_key); 2987 umtx_key_release(&uq->uq_key); 2988 return (EFAULT); 2989 } 2990 for (;;) { 2991 if (USEM_COUNT(count) != 0) { 2992 umtxq_lock(&uq->uq_key); 2993 umtxq_unbusy(&uq->uq_key); 2994 umtxq_remove(uq); 2995 umtxq_unlock(&uq->uq_key); 2996 umtx_key_release(&uq->uq_key); 2997 return (0); 2998 } 2999 if (count == USEM_HAS_WAITERS) 3000 break; 3001 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3002 if (rv == -1) { 3003 umtxq_lock(&uq->uq_key); 3004 umtxq_unbusy(&uq->uq_key); 3005 umtxq_remove(uq); 3006 umtxq_unlock(&uq->uq_key); 3007 umtx_key_release(&uq->uq_key); 3008 return (EFAULT); 3009 } 3010 if (count == 0) 3011 break; 3012 } 3013 umtxq_lock(&uq->uq_key); 3014 umtxq_unbusy(&uq->uq_key); 3015 3016 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3017 3018 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3019 error = 0; 3020 else { 3021 umtxq_remove(uq); 3022 /* A relative timeout cannot be restarted. */ 3023 if (error == ERESTART && timeout != NULL && 3024 (timeout->_flags & UMTX_ABSTIME) == 0) 3025 error = EINTR; 3026 } 3027 umtxq_unlock(&uq->uq_key); 3028 umtx_key_release(&uq->uq_key); 3029 return (error); 3030 } 3031 3032 /* 3033 * Signal a userland semaphore. 3034 */ 3035 static int 3036 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3037 { 3038 struct umtx_key key; 3039 int error, cnt, rv; 3040 uint32_t count, flags; 3041 3042 rv = fueword32(&sem->_flags, &flags); 3043 if (rv == -1) 3044 return (EFAULT); 3045 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3046 return (error); 3047 umtxq_lock(&key); 3048 umtxq_busy(&key); 3049 cnt = umtxq_count(&key); 3050 if (cnt > 0) { 3051 umtxq_signal(&key, 1); 3052 3053 /* 3054 * If this was the last sleeping thread, clear the waiters 3055 * flag in _count. 3056 */ 3057 if (cnt == 1) { 3058 umtxq_unlock(&key); 3059 rv = fueword32(&sem->_count, &count); 3060 while (rv != -1 && count & USEM_HAS_WAITERS) 3061 rv = casueword32(&sem->_count, count, &count, 3062 count & ~USEM_HAS_WAITERS); 3063 if (rv == -1) 3064 error = EFAULT; 3065 umtxq_lock(&key); 3066 } 3067 } 3068 umtxq_unbusy(&key); 3069 umtxq_unlock(&key); 3070 umtx_key_release(&key); 3071 return (error); 3072 } 3073 3074 inline int 3075 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3076 { 3077 int error; 3078 3079 error = copyin(addr, tsp, sizeof(struct timespec)); 3080 if (error == 0) { 3081 if (tsp->tv_sec < 0 || 3082 tsp->tv_nsec >= 1000000000 || 3083 tsp->tv_nsec < 0) 3084 error = EINVAL; 3085 } 3086 return (error); 3087 } 3088 3089 static inline int 3090 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3091 { 3092 int error; 3093 3094 if (size <= sizeof(struct timespec)) { 3095 tp->_clockid = CLOCK_REALTIME; 3096 tp->_flags = 0; 3097 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3098 } else 3099 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3100 if (error != 0) 3101 return (error); 3102 if (tp->_timeout.tv_sec < 0 || 3103 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3104 return (EINVAL); 3105 return (0); 3106 } 3107 3108 static int 3109 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3110 { 3111 3112 return (EOPNOTSUPP); 3113 } 3114 3115 static int 3116 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3117 { 3118 struct _umtx_time timeout, *tm_p; 3119 int error; 3120 3121 if (uap->uaddr2 == NULL) 3122 tm_p = NULL; 3123 else { 3124 error = umtx_copyin_umtx_time( 3125 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3126 if (error != 0) 3127 return (error); 3128 tm_p = &timeout; 3129 } 3130 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3131 } 3132 3133 static int 3134 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3135 { 3136 struct _umtx_time timeout, *tm_p; 3137 int error; 3138 3139 if (uap->uaddr2 == NULL) 3140 tm_p = NULL; 3141 else { 3142 error = umtx_copyin_umtx_time( 3143 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3144 if (error != 0) 3145 return (error); 3146 tm_p = &timeout; 3147 } 3148 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3149 } 3150 3151 static int 3152 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3153 { 3154 struct _umtx_time *tm_p, timeout; 3155 int error; 3156 3157 if (uap->uaddr2 == NULL) 3158 tm_p = NULL; 3159 else { 3160 error = umtx_copyin_umtx_time( 3161 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3162 if (error != 0) 3163 return (error); 3164 tm_p = &timeout; 3165 } 3166 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3167 } 3168 3169 static int 3170 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3171 { 3172 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3173 } 3174 3175 #define BATCH_SIZE 128 3176 static int 3177 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3178 { 3179 int count = uap->val; 3180 void *uaddrs[BATCH_SIZE]; 3181 char **upp = (char **)uap->obj; 3182 int tocopy; 3183 int error = 0; 3184 int i, pos = 0; 3185 3186 while (count > 0) { 3187 tocopy = count; 3188 if (tocopy > BATCH_SIZE) 3189 tocopy = BATCH_SIZE; 3190 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3191 if (error != 0) 3192 break; 3193 for (i = 0; i < tocopy; ++i) 3194 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3195 count -= tocopy; 3196 pos += tocopy; 3197 } 3198 return (error); 3199 } 3200 3201 static int 3202 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3203 { 3204 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3205 } 3206 3207 static int 3208 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3209 { 3210 struct _umtx_time *tm_p, timeout; 3211 int error; 3212 3213 /* Allow a null timespec (wait forever). */ 3214 if (uap->uaddr2 == NULL) 3215 tm_p = NULL; 3216 else { 3217 error = umtx_copyin_umtx_time( 3218 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3219 if (error != 0) 3220 return (error); 3221 tm_p = &timeout; 3222 } 3223 return do_lock_umutex(td, uap->obj, tm_p, 0); 3224 } 3225 3226 static int 3227 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3228 { 3229 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3230 } 3231 3232 static int 3233 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3234 { 3235 struct _umtx_time *tm_p, timeout; 3236 int error; 3237 3238 /* Allow a null timespec (wait forever). */ 3239 if (uap->uaddr2 == NULL) 3240 tm_p = NULL; 3241 else { 3242 error = umtx_copyin_umtx_time( 3243 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3244 if (error != 0) 3245 return (error); 3246 tm_p = &timeout; 3247 } 3248 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3249 } 3250 3251 static int 3252 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3253 { 3254 return do_wake_umutex(td, uap->obj); 3255 } 3256 3257 static int 3258 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3259 { 3260 return do_unlock_umutex(td, uap->obj); 3261 } 3262 3263 static int 3264 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3265 { 3266 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3267 } 3268 3269 static int 3270 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3271 { 3272 struct timespec *ts, timeout; 3273 int error; 3274 3275 /* Allow a null timespec (wait forever). */ 3276 if (uap->uaddr2 == NULL) 3277 ts = NULL; 3278 else { 3279 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3280 if (error != 0) 3281 return (error); 3282 ts = &timeout; 3283 } 3284 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3285 } 3286 3287 static int 3288 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3289 { 3290 return do_cv_signal(td, uap->obj); 3291 } 3292 3293 static int 3294 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3295 { 3296 return do_cv_broadcast(td, uap->obj); 3297 } 3298 3299 static int 3300 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3301 { 3302 struct _umtx_time timeout; 3303 int error; 3304 3305 /* Allow a null timespec (wait forever). */ 3306 if (uap->uaddr2 == NULL) { 3307 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3308 } else { 3309 error = umtx_copyin_umtx_time(uap->uaddr2, 3310 (size_t)uap->uaddr1, &timeout); 3311 if (error != 0) 3312 return (error); 3313 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3314 } 3315 return (error); 3316 } 3317 3318 static int 3319 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3320 { 3321 struct _umtx_time timeout; 3322 int error; 3323 3324 /* Allow a null timespec (wait forever). */ 3325 if (uap->uaddr2 == NULL) { 3326 error = do_rw_wrlock(td, uap->obj, 0); 3327 } else { 3328 error = umtx_copyin_umtx_time(uap->uaddr2, 3329 (size_t)uap->uaddr1, &timeout); 3330 if (error != 0) 3331 return (error); 3332 3333 error = do_rw_wrlock(td, uap->obj, &timeout); 3334 } 3335 return (error); 3336 } 3337 3338 static int 3339 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3340 { 3341 return do_rw_unlock(td, uap->obj); 3342 } 3343 3344 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3345 static int 3346 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3347 { 3348 struct _umtx_time *tm_p, timeout; 3349 int error; 3350 3351 /* Allow a null timespec (wait forever). */ 3352 if (uap->uaddr2 == NULL) 3353 tm_p = NULL; 3354 else { 3355 error = umtx_copyin_umtx_time( 3356 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3357 if (error != 0) 3358 return (error); 3359 tm_p = &timeout; 3360 } 3361 return (do_sem_wait(td, uap->obj, tm_p)); 3362 } 3363 3364 static int 3365 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3366 { 3367 return do_sem_wake(td, uap->obj); 3368 } 3369 #endif 3370 3371 static int 3372 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3373 { 3374 return do_wake2_umutex(td, uap->obj, uap->val); 3375 } 3376 3377 static int 3378 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3379 { 3380 struct _umtx_time *tm_p, timeout; 3381 int error; 3382 3383 /* Allow a null timespec (wait forever). */ 3384 if (uap->uaddr2 == NULL) 3385 tm_p = NULL; 3386 else { 3387 error = umtx_copyin_umtx_time( 3388 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3389 if (error != 0) 3390 return (error); 3391 tm_p = &timeout; 3392 } 3393 return (do_sem2_wait(td, uap->obj, tm_p)); 3394 } 3395 3396 static int 3397 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3398 { 3399 return do_sem2_wake(td, uap->obj); 3400 } 3401 3402 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3403 3404 static _umtx_op_func op_table[] = { 3405 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3406 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3407 __umtx_op_wait, /* UMTX_OP_WAIT */ 3408 __umtx_op_wake, /* UMTX_OP_WAKE */ 3409 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3410 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3411 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3412 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3413 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3414 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3415 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3416 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3417 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3418 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3419 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3420 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3421 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3422 __umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */ 3423 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3424 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3425 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3426 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3427 #else 3428 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3429 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3430 #endif 3431 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3432 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3433 __umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */ 3434 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3435 }; 3436 3437 int 3438 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3439 { 3440 if ((unsigned)uap->op < UMTX_OP_MAX) 3441 return (*op_table[uap->op])(td, uap); 3442 return (EINVAL); 3443 } 3444 3445 #ifdef COMPAT_FREEBSD32 3446 3447 struct timespec32 { 3448 int32_t tv_sec; 3449 int32_t tv_nsec; 3450 }; 3451 3452 struct umtx_time32 { 3453 struct timespec32 timeout; 3454 uint32_t flags; 3455 uint32_t clockid; 3456 }; 3457 3458 static inline int 3459 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3460 { 3461 struct timespec32 ts32; 3462 int error; 3463 3464 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3465 if (error == 0) { 3466 if (ts32.tv_sec < 0 || 3467 ts32.tv_nsec >= 1000000000 || 3468 ts32.tv_nsec < 0) 3469 error = EINVAL; 3470 else { 3471 tsp->tv_sec = ts32.tv_sec; 3472 tsp->tv_nsec = ts32.tv_nsec; 3473 } 3474 } 3475 return (error); 3476 } 3477 3478 static inline int 3479 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3480 { 3481 struct umtx_time32 t32; 3482 int error; 3483 3484 t32.clockid = CLOCK_REALTIME; 3485 t32.flags = 0; 3486 if (size <= sizeof(struct timespec32)) 3487 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3488 else 3489 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3490 if (error != 0) 3491 return (error); 3492 if (t32.timeout.tv_sec < 0 || 3493 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3494 return (EINVAL); 3495 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3496 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3497 tp->_flags = t32.flags; 3498 tp->_clockid = t32.clockid; 3499 return (0); 3500 } 3501 3502 static int 3503 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3504 { 3505 struct _umtx_time *tm_p, timeout; 3506 int error; 3507 3508 if (uap->uaddr2 == NULL) 3509 tm_p = NULL; 3510 else { 3511 error = umtx_copyin_umtx_time32(uap->uaddr2, 3512 (size_t)uap->uaddr1, &timeout); 3513 if (error != 0) 3514 return (error); 3515 tm_p = &timeout; 3516 } 3517 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3518 } 3519 3520 static int 3521 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3522 { 3523 struct _umtx_time *tm_p, timeout; 3524 int error; 3525 3526 /* Allow a null timespec (wait forever). */ 3527 if (uap->uaddr2 == NULL) 3528 tm_p = NULL; 3529 else { 3530 error = umtx_copyin_umtx_time(uap->uaddr2, 3531 (size_t)uap->uaddr1, &timeout); 3532 if (error != 0) 3533 return (error); 3534 tm_p = &timeout; 3535 } 3536 return do_lock_umutex(td, uap->obj, tm_p, 0); 3537 } 3538 3539 static int 3540 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3541 { 3542 struct _umtx_time *tm_p, timeout; 3543 int error; 3544 3545 /* Allow a null timespec (wait forever). */ 3546 if (uap->uaddr2 == NULL) 3547 tm_p = NULL; 3548 else { 3549 error = umtx_copyin_umtx_time32(uap->uaddr2, 3550 (size_t)uap->uaddr1, &timeout); 3551 if (error != 0) 3552 return (error); 3553 tm_p = &timeout; 3554 } 3555 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3556 } 3557 3558 static int 3559 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3560 { 3561 struct timespec *ts, timeout; 3562 int error; 3563 3564 /* Allow a null timespec (wait forever). */ 3565 if (uap->uaddr2 == NULL) 3566 ts = NULL; 3567 else { 3568 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3569 if (error != 0) 3570 return (error); 3571 ts = &timeout; 3572 } 3573 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3574 } 3575 3576 static int 3577 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3578 { 3579 struct _umtx_time timeout; 3580 int error; 3581 3582 /* Allow a null timespec (wait forever). */ 3583 if (uap->uaddr2 == NULL) { 3584 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3585 } else { 3586 error = umtx_copyin_umtx_time32(uap->uaddr2, 3587 (size_t)uap->uaddr1, &timeout); 3588 if (error != 0) 3589 return (error); 3590 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3591 } 3592 return (error); 3593 } 3594 3595 static int 3596 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3597 { 3598 struct _umtx_time timeout; 3599 int error; 3600 3601 /* Allow a null timespec (wait forever). */ 3602 if (uap->uaddr2 == NULL) { 3603 error = do_rw_wrlock(td, uap->obj, 0); 3604 } else { 3605 error = umtx_copyin_umtx_time32(uap->uaddr2, 3606 (size_t)uap->uaddr1, &timeout); 3607 if (error != 0) 3608 return (error); 3609 error = do_rw_wrlock(td, uap->obj, &timeout); 3610 } 3611 return (error); 3612 } 3613 3614 static int 3615 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3616 { 3617 struct _umtx_time *tm_p, timeout; 3618 int error; 3619 3620 if (uap->uaddr2 == NULL) 3621 tm_p = NULL; 3622 else { 3623 error = umtx_copyin_umtx_time32( 3624 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3625 if (error != 0) 3626 return (error); 3627 tm_p = &timeout; 3628 } 3629 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3630 } 3631 3632 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3633 static int 3634 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3635 { 3636 struct _umtx_time *tm_p, timeout; 3637 int error; 3638 3639 /* Allow a null timespec (wait forever). */ 3640 if (uap->uaddr2 == NULL) 3641 tm_p = NULL; 3642 else { 3643 error = umtx_copyin_umtx_time32(uap->uaddr2, 3644 (size_t)uap->uaddr1, &timeout); 3645 if (error != 0) 3646 return (error); 3647 tm_p = &timeout; 3648 } 3649 return (do_sem_wait(td, uap->obj, tm_p)); 3650 } 3651 #endif 3652 3653 static int 3654 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3655 { 3656 struct _umtx_time *tm_p, timeout; 3657 int error; 3658 3659 /* Allow a null timespec (wait forever). */ 3660 if (uap->uaddr2 == NULL) 3661 tm_p = NULL; 3662 else { 3663 error = umtx_copyin_umtx_time32(uap->uaddr2, 3664 (size_t)uap->uaddr1, &timeout); 3665 if (error != 0) 3666 return (error); 3667 tm_p = &timeout; 3668 } 3669 return (do_sem2_wait(td, uap->obj, tm_p)); 3670 } 3671 3672 static int 3673 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3674 { 3675 int count = uap->val; 3676 uint32_t uaddrs[BATCH_SIZE]; 3677 uint32_t **upp = (uint32_t **)uap->obj; 3678 int tocopy; 3679 int error = 0; 3680 int i, pos = 0; 3681 3682 while (count > 0) { 3683 tocopy = count; 3684 if (tocopy > BATCH_SIZE) 3685 tocopy = BATCH_SIZE; 3686 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3687 if (error != 0) 3688 break; 3689 for (i = 0; i < tocopy; ++i) 3690 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3691 INT_MAX, 1); 3692 count -= tocopy; 3693 pos += tocopy; 3694 } 3695 return (error); 3696 } 3697 3698 static _umtx_op_func op_table_compat32[] = { 3699 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3700 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3701 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3702 __umtx_op_wake, /* UMTX_OP_WAKE */ 3703 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3704 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3705 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3706 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3707 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3708 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3709 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3710 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3711 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3712 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3713 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3714 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3715 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3716 __umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */ 3717 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3718 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3719 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3720 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3721 #else 3722 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3723 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3724 #endif 3725 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3726 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3727 __umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */ 3728 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3729 }; 3730 3731 int 3732 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3733 { 3734 if ((unsigned)uap->op < UMTX_OP_MAX) 3735 return (*op_table_compat32[uap->op])(td, 3736 (struct _umtx_op_args *)uap); 3737 return (EINVAL); 3738 } 3739 #endif 3740 3741 void 3742 umtx_thread_init(struct thread *td) 3743 { 3744 td->td_umtxq = umtxq_alloc(); 3745 td->td_umtxq->uq_thread = td; 3746 } 3747 3748 void 3749 umtx_thread_fini(struct thread *td) 3750 { 3751 umtxq_free(td->td_umtxq); 3752 } 3753 3754 /* 3755 * It will be called when new thread is created, e.g fork(). 3756 */ 3757 void 3758 umtx_thread_alloc(struct thread *td) 3759 { 3760 struct umtx_q *uq; 3761 3762 uq = td->td_umtxq; 3763 uq->uq_inherited_pri = PRI_MAX; 3764 3765 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3766 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3767 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3768 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3769 } 3770 3771 /* 3772 * exec() hook. 3773 */ 3774 static void 3775 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3776 struct image_params *imgp __unused) 3777 { 3778 umtx_thread_cleanup(curthread); 3779 } 3780 3781 /* 3782 * thread_exit() hook. 3783 */ 3784 void 3785 umtx_thread_exit(struct thread *td) 3786 { 3787 umtx_thread_cleanup(td); 3788 } 3789 3790 /* 3791 * clean up umtx data. 3792 */ 3793 static void 3794 umtx_thread_cleanup(struct thread *td) 3795 { 3796 struct umtx_q *uq; 3797 struct umtx_pi *pi; 3798 3799 if ((uq = td->td_umtxq) == NULL) 3800 return; 3801 3802 mtx_lock_spin(&umtx_lock); 3803 uq->uq_inherited_pri = PRI_MAX; 3804 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3805 pi->pi_owner = NULL; 3806 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3807 } 3808 mtx_unlock_spin(&umtx_lock); 3809 thread_lock(td); 3810 sched_lend_user_prio(td, PRI_MAX); 3811 thread_unlock(td); 3812 } 3813