1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 173 /* 174 * Don't propagate time-sharing priority, there is a security reason, 175 * a user can simply introduce PI-mutex, let thread A lock the mutex, 176 * and let another thread B block on the mutex, because B is 177 * sleeping, its priority will be boosted, this causes A's priority to 178 * be boosted via priority propagating too and will never be lowered even 179 * if it is using 100%CPU, this is unfair to other processes. 180 */ 181 182 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 183 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 184 PRI_MAX_TIMESHARE : (td)->td_user_pri) 185 186 #define GOLDEN_RATIO_PRIME 2654404609U 187 #define UMTX_CHAINS 512 188 #define UMTX_SHIFTS (__WORD_BIT - 9) 189 190 #define GET_SHARE(flags) \ 191 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 192 193 #define BUSY_SPINS 200 194 195 struct abs_timeout { 196 int clockid; 197 struct timespec cur; 198 struct timespec end; 199 }; 200 201 static uma_zone_t umtx_pi_zone; 202 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 203 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 204 static int umtx_pi_allocated; 205 206 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 207 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 208 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 209 210 #ifdef UMTX_PROFILING 211 static long max_length; 212 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 213 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 214 #endif 215 216 static void umtxq_sysinit(void *); 217 static void umtxq_hash(struct umtx_key *key); 218 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 219 static void umtxq_lock(struct umtx_key *key); 220 static void umtxq_unlock(struct umtx_key *key); 221 static void umtxq_busy(struct umtx_key *key); 222 static void umtxq_unbusy(struct umtx_key *key); 223 static void umtxq_insert_queue(struct umtx_q *uq, int q); 224 static void umtxq_remove_queue(struct umtx_q *uq, int q); 225 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 226 static int umtxq_count(struct umtx_key *key); 227 static struct umtx_pi *umtx_pi_alloc(int); 228 static void umtx_pi_free(struct umtx_pi *pi); 229 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 230 static void umtx_thread_cleanup(struct thread *td); 231 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 232 struct image_params *imgp __unused); 233 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 234 235 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 236 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 237 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 238 239 static struct mtx umtx_lock; 240 241 #ifdef UMTX_PROFILING 242 static void 243 umtx_init_profiling(void) 244 { 245 struct sysctl_oid *chain_oid; 246 char chain_name[10]; 247 int i; 248 249 for (i = 0; i < UMTX_CHAINS; ++i) { 250 snprintf(chain_name, sizeof(chain_name), "%d", i); 251 chain_oid = SYSCTL_ADD_NODE(NULL, 252 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 253 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 254 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 255 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 256 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 257 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 258 } 259 } 260 261 static int 262 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 263 { 264 char buf[512]; 265 struct sbuf sb; 266 struct umtxq_chain *uc; 267 u_int fract, i, j, tot, whole; 268 u_int sf0, sf1, sf2, sf3, sf4; 269 u_int si0, si1, si2, si3, si4; 270 u_int sw0, sw1, sw2, sw3, sw4; 271 272 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 273 for (i = 0; i < 2; i++) { 274 tot = 0; 275 for (j = 0; j < UMTX_CHAINS; ++j) { 276 uc = &umtxq_chains[i][j]; 277 mtx_lock(&uc->uc_lock); 278 tot += uc->max_length; 279 mtx_unlock(&uc->uc_lock); 280 } 281 if (tot == 0) 282 sbuf_printf(&sb, "%u) Empty ", i); 283 else { 284 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 285 si0 = si1 = si2 = si3 = si4 = 0; 286 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 287 for (j = 0; j < UMTX_CHAINS; j++) { 288 uc = &umtxq_chains[i][j]; 289 mtx_lock(&uc->uc_lock); 290 whole = uc->max_length * 100; 291 mtx_unlock(&uc->uc_lock); 292 fract = (whole % tot) * 100; 293 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 294 sf0 = fract; 295 si0 = j; 296 sw0 = whole; 297 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 298 sf1)) { 299 sf1 = fract; 300 si1 = j; 301 sw1 = whole; 302 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 303 sf2)) { 304 sf2 = fract; 305 si2 = j; 306 sw2 = whole; 307 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 308 sf3)) { 309 sf3 = fract; 310 si3 = j; 311 sw3 = whole; 312 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 313 sf4)) { 314 sf4 = fract; 315 si4 = j; 316 sw4 = whole; 317 } 318 } 319 sbuf_printf(&sb, "queue %u:\n", i); 320 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 321 sf0 / tot, si0); 322 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 323 sf1 / tot, si1); 324 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 325 sf2 / tot, si2); 326 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 327 sf3 / tot, si3); 328 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 329 sf4 / tot, si4); 330 } 331 } 332 sbuf_trim(&sb); 333 sbuf_finish(&sb); 334 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 335 sbuf_delete(&sb); 336 return (0); 337 } 338 339 static int 340 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 341 { 342 struct umtxq_chain *uc; 343 u_int i, j; 344 int clear, error; 345 346 clear = 0; 347 error = sysctl_handle_int(oidp, &clear, 0, req); 348 if (error != 0 || req->newptr == NULL) 349 return (error); 350 351 if (clear != 0) { 352 for (i = 0; i < 2; ++i) { 353 for (j = 0; j < UMTX_CHAINS; ++j) { 354 uc = &umtxq_chains[i][j]; 355 mtx_lock(&uc->uc_lock); 356 uc->length = 0; 357 uc->max_length = 0; 358 mtx_unlock(&uc->uc_lock); 359 } 360 } 361 } 362 return (0); 363 } 364 365 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 366 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 367 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 368 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 369 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 370 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 371 #endif 372 373 static void 374 umtxq_sysinit(void *arg __unused) 375 { 376 int i, j; 377 378 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 379 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 380 for (i = 0; i < 2; ++i) { 381 for (j = 0; j < UMTX_CHAINS; ++j) { 382 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 383 MTX_DEF | MTX_DUPOK); 384 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 386 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 387 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 388 umtxq_chains[i][j].uc_busy = 0; 389 umtxq_chains[i][j].uc_waiters = 0; 390 #ifdef UMTX_PROFILING 391 umtxq_chains[i][j].length = 0; 392 umtxq_chains[i][j].max_length = 0; 393 #endif 394 } 395 } 396 #ifdef UMTX_PROFILING 397 umtx_init_profiling(); 398 #endif 399 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 400 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 401 EVENTHANDLER_PRI_ANY); 402 } 403 404 struct umtx_q * 405 umtxq_alloc(void) 406 { 407 struct umtx_q *uq; 408 409 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 410 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 411 TAILQ_INIT(&uq->uq_spare_queue->head); 412 TAILQ_INIT(&uq->uq_pi_contested); 413 uq->uq_inherited_pri = PRI_MAX; 414 return (uq); 415 } 416 417 void 418 umtxq_free(struct umtx_q *uq) 419 { 420 MPASS(uq->uq_spare_queue != NULL); 421 free(uq->uq_spare_queue, M_UMTX); 422 free(uq, M_UMTX); 423 } 424 425 static inline void 426 umtxq_hash(struct umtx_key *key) 427 { 428 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 429 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 430 } 431 432 static inline struct umtxq_chain * 433 umtxq_getchain(struct umtx_key *key) 434 { 435 if (key->type <= TYPE_SEM) 436 return (&umtxq_chains[1][key->hash]); 437 return (&umtxq_chains[0][key->hash]); 438 } 439 440 /* 441 * Lock a chain. 442 */ 443 static inline void 444 umtxq_lock(struct umtx_key *key) 445 { 446 struct umtxq_chain *uc; 447 448 uc = umtxq_getchain(key); 449 mtx_lock(&uc->uc_lock); 450 } 451 452 /* 453 * Unlock a chain. 454 */ 455 static inline void 456 umtxq_unlock(struct umtx_key *key) 457 { 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 mtx_unlock(&uc->uc_lock); 462 } 463 464 /* 465 * Set chain to busy state when following operation 466 * may be blocked (kernel mutex can not be used). 467 */ 468 static inline void 469 umtxq_busy(struct umtx_key *key) 470 { 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(key); 474 mtx_assert(&uc->uc_lock, MA_OWNED); 475 if (uc->uc_busy) { 476 #ifdef SMP 477 if (smp_cpus > 1) { 478 int count = BUSY_SPINS; 479 if (count > 0) { 480 umtxq_unlock(key); 481 while (uc->uc_busy && --count > 0) 482 cpu_spinwait(); 483 umtxq_lock(key); 484 } 485 } 486 #endif 487 while (uc->uc_busy) { 488 uc->uc_waiters++; 489 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 490 uc->uc_waiters--; 491 } 492 } 493 uc->uc_busy = 1; 494 } 495 496 /* 497 * Unbusy a chain. 498 */ 499 static inline void 500 umtxq_unbusy(struct umtx_key *key) 501 { 502 struct umtxq_chain *uc; 503 504 uc = umtxq_getchain(key); 505 mtx_assert(&uc->uc_lock, MA_OWNED); 506 KASSERT(uc->uc_busy != 0, ("not busy")); 507 uc->uc_busy = 0; 508 if (uc->uc_waiters) 509 wakeup_one(uc); 510 } 511 512 static inline void 513 umtxq_unbusy_unlocked(struct umtx_key *key) 514 { 515 516 umtxq_lock(key); 517 umtxq_unbusy(key); 518 umtxq_unlock(key); 519 } 520 521 static struct umtxq_queue * 522 umtxq_queue_lookup(struct umtx_key *key, int q) 523 { 524 struct umtxq_queue *uh; 525 struct umtxq_chain *uc; 526 527 uc = umtxq_getchain(key); 528 UMTXQ_LOCKED_ASSERT(uc); 529 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 530 if (umtx_key_match(&uh->key, key)) 531 return (uh); 532 } 533 534 return (NULL); 535 } 536 537 static inline void 538 umtxq_insert_queue(struct umtx_q *uq, int q) 539 { 540 struct umtxq_queue *uh; 541 struct umtxq_chain *uc; 542 543 uc = umtxq_getchain(&uq->uq_key); 544 UMTXQ_LOCKED_ASSERT(uc); 545 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 546 uh = umtxq_queue_lookup(&uq->uq_key, q); 547 if (uh != NULL) { 548 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 549 } else { 550 uh = uq->uq_spare_queue; 551 uh->key = uq->uq_key; 552 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 553 #ifdef UMTX_PROFILING 554 uc->length++; 555 if (uc->length > uc->max_length) { 556 uc->max_length = uc->length; 557 if (uc->max_length > max_length) 558 max_length = uc->max_length; 559 } 560 #endif 561 } 562 uq->uq_spare_queue = NULL; 563 564 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 565 uh->length++; 566 uq->uq_flags |= UQF_UMTXQ; 567 uq->uq_cur_queue = uh; 568 return; 569 } 570 571 static inline void 572 umtxq_remove_queue(struct umtx_q *uq, int q) 573 { 574 struct umtxq_chain *uc; 575 struct umtxq_queue *uh; 576 577 uc = umtxq_getchain(&uq->uq_key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 if (uq->uq_flags & UQF_UMTXQ) { 580 uh = uq->uq_cur_queue; 581 TAILQ_REMOVE(&uh->head, uq, uq_link); 582 uh->length--; 583 uq->uq_flags &= ~UQF_UMTXQ; 584 if (TAILQ_EMPTY(&uh->head)) { 585 KASSERT(uh->length == 0, 586 ("inconsistent umtxq_queue length")); 587 #ifdef UMTX_PROFILING 588 uc->length--; 589 #endif 590 LIST_REMOVE(uh, link); 591 } else { 592 uh = LIST_FIRST(&uc->uc_spare_queue); 593 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 594 LIST_REMOVE(uh, link); 595 } 596 uq->uq_spare_queue = uh; 597 uq->uq_cur_queue = NULL; 598 } 599 } 600 601 /* 602 * Check if there are multiple waiters 603 */ 604 static int 605 umtxq_count(struct umtx_key *key) 606 { 607 struct umtxq_chain *uc; 608 struct umtxq_queue *uh; 609 610 uc = umtxq_getchain(key); 611 UMTXQ_LOCKED_ASSERT(uc); 612 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 613 if (uh != NULL) 614 return (uh->length); 615 return (0); 616 } 617 618 /* 619 * Check if there are multiple PI waiters and returns first 620 * waiter. 621 */ 622 static int 623 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 624 { 625 struct umtxq_chain *uc; 626 struct umtxq_queue *uh; 627 628 *first = NULL; 629 uc = umtxq_getchain(key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 632 if (uh != NULL) { 633 *first = TAILQ_FIRST(&uh->head); 634 return (uh->length); 635 } 636 return (0); 637 } 638 639 static int 640 umtxq_check_susp(struct thread *td) 641 { 642 struct proc *p; 643 int error; 644 645 /* 646 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 647 * eventually break the lockstep loop. 648 */ 649 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 650 return (0); 651 error = 0; 652 p = td->td_proc; 653 PROC_LOCK(p); 654 if (P_SHOULDSTOP(p) || 655 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 656 if (p->p_flag & P_SINGLE_EXIT) 657 error = EINTR; 658 else 659 error = ERESTART; 660 } 661 PROC_UNLOCK(p); 662 return (error); 663 } 664 665 /* 666 * Wake up threads waiting on an userland object. 667 */ 668 669 static int 670 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 671 { 672 struct umtxq_chain *uc; 673 struct umtxq_queue *uh; 674 struct umtx_q *uq; 675 int ret; 676 677 ret = 0; 678 uc = umtxq_getchain(key); 679 UMTXQ_LOCKED_ASSERT(uc); 680 uh = umtxq_queue_lookup(key, q); 681 if (uh != NULL) { 682 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 683 umtxq_remove_queue(uq, q); 684 wakeup(uq); 685 if (++ret >= n_wake) 686 return (ret); 687 } 688 } 689 return (ret); 690 } 691 692 693 /* 694 * Wake up specified thread. 695 */ 696 static inline void 697 umtxq_signal_thread(struct umtx_q *uq) 698 { 699 struct umtxq_chain *uc; 700 701 uc = umtxq_getchain(&uq->uq_key); 702 UMTXQ_LOCKED_ASSERT(uc); 703 umtxq_remove(uq); 704 wakeup(uq); 705 } 706 707 static inline int 708 tstohz(const struct timespec *tsp) 709 { 710 struct timeval tv; 711 712 TIMESPEC_TO_TIMEVAL(&tv, tsp); 713 return tvtohz(&tv); 714 } 715 716 static void 717 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 718 const struct timespec *timeout) 719 { 720 721 timo->clockid = clockid; 722 if (!absolute) { 723 kern_clock_gettime(curthread, clockid, &timo->end); 724 timo->cur = timo->end; 725 timespecadd(&timo->end, timeout); 726 } else { 727 timo->end = *timeout; 728 kern_clock_gettime(curthread, clockid, &timo->cur); 729 } 730 } 731 732 static void 733 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 734 { 735 736 abs_timeout_init(timo, umtxtime->_clockid, 737 (umtxtime->_flags & UMTX_ABSTIME) != 0, 738 &umtxtime->_timeout); 739 } 740 741 static inline void 742 abs_timeout_update(struct abs_timeout *timo) 743 { 744 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 745 } 746 747 static int 748 abs_timeout_gethz(struct abs_timeout *timo) 749 { 750 struct timespec tts; 751 752 if (timespeccmp(&timo->end, &timo->cur, <=)) 753 return (-1); 754 tts = timo->end; 755 timespecsub(&tts, &timo->cur); 756 return (tstohz(&tts)); 757 } 758 759 /* 760 * Put thread into sleep state, before sleeping, check if 761 * thread was removed from umtx queue. 762 */ 763 static inline int 764 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 765 { 766 struct umtxq_chain *uc; 767 int error, timo; 768 769 uc = umtxq_getchain(&uq->uq_key); 770 UMTXQ_LOCKED_ASSERT(uc); 771 for (;;) { 772 if (!(uq->uq_flags & UQF_UMTXQ)) 773 return (0); 774 if (abstime != NULL) { 775 timo = abs_timeout_gethz(abstime); 776 if (timo < 0) 777 return (ETIMEDOUT); 778 } else 779 timo = 0; 780 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 781 if (error != EWOULDBLOCK) { 782 umtxq_lock(&uq->uq_key); 783 break; 784 } 785 if (abstime != NULL) 786 abs_timeout_update(abstime); 787 umtxq_lock(&uq->uq_key); 788 } 789 return (error); 790 } 791 792 /* 793 * Convert userspace address into unique logical address. 794 */ 795 int 796 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 797 { 798 struct thread *td = curthread; 799 vm_map_t map; 800 vm_map_entry_t entry; 801 vm_pindex_t pindex; 802 vm_prot_t prot; 803 boolean_t wired; 804 805 key->type = type; 806 if (share == THREAD_SHARE) { 807 key->shared = 0; 808 key->info.private.vs = td->td_proc->p_vmspace; 809 key->info.private.addr = (uintptr_t)addr; 810 } else { 811 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 812 map = &td->td_proc->p_vmspace->vm_map; 813 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 814 &entry, &key->info.shared.object, &pindex, &prot, 815 &wired) != KERN_SUCCESS) { 816 return EFAULT; 817 } 818 819 if ((share == PROCESS_SHARE) || 820 (share == AUTO_SHARE && 821 VM_INHERIT_SHARE == entry->inheritance)) { 822 key->shared = 1; 823 key->info.shared.offset = entry->offset + entry->start - 824 (vm_offset_t)addr; 825 vm_object_reference(key->info.shared.object); 826 } else { 827 key->shared = 0; 828 key->info.private.vs = td->td_proc->p_vmspace; 829 key->info.private.addr = (uintptr_t)addr; 830 } 831 vm_map_lookup_done(map, entry); 832 } 833 834 umtxq_hash(key); 835 return (0); 836 } 837 838 /* 839 * Release key. 840 */ 841 void 842 umtx_key_release(struct umtx_key *key) 843 { 844 if (key->shared) 845 vm_object_deallocate(key->info.shared.object); 846 } 847 848 /* 849 * Fetch and compare value, sleep on the address if value is not changed. 850 */ 851 static int 852 do_wait(struct thread *td, void *addr, u_long id, 853 struct _umtx_time *timeout, int compat32, int is_private) 854 { 855 struct abs_timeout timo; 856 struct umtx_q *uq; 857 u_long tmp; 858 uint32_t tmp32; 859 int error = 0; 860 861 uq = td->td_umtxq; 862 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 863 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 864 return (error); 865 866 if (timeout != NULL) 867 abs_timeout_init2(&timo, timeout); 868 869 umtxq_lock(&uq->uq_key); 870 umtxq_insert(uq); 871 umtxq_unlock(&uq->uq_key); 872 if (compat32 == 0) { 873 error = fueword(addr, &tmp); 874 if (error != 0) 875 error = EFAULT; 876 } else { 877 error = fueword32(addr, &tmp32); 878 if (error == 0) 879 tmp = tmp32; 880 else 881 error = EFAULT; 882 } 883 umtxq_lock(&uq->uq_key); 884 if (error == 0) { 885 if (tmp == id) 886 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 887 NULL : &timo); 888 if ((uq->uq_flags & UQF_UMTXQ) == 0) 889 error = 0; 890 else 891 umtxq_remove(uq); 892 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 893 umtxq_remove(uq); 894 } 895 umtxq_unlock(&uq->uq_key); 896 umtx_key_release(&uq->uq_key); 897 if (error == ERESTART) 898 error = EINTR; 899 return (error); 900 } 901 902 /* 903 * Wake up threads sleeping on the specified address. 904 */ 905 int 906 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 907 { 908 struct umtx_key key; 909 int ret; 910 911 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 912 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 913 return (ret); 914 umtxq_lock(&key); 915 ret = umtxq_signal(&key, n_wake); 916 umtxq_unlock(&key); 917 umtx_key_release(&key); 918 return (0); 919 } 920 921 /* 922 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 923 */ 924 static int 925 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 926 struct _umtx_time *timeout, int mode) 927 { 928 struct abs_timeout timo; 929 struct umtx_q *uq; 930 uint32_t owner, old, id; 931 int error, rv; 932 933 id = td->td_tid; 934 uq = td->td_umtxq; 935 error = 0; 936 if (timeout != NULL) 937 abs_timeout_init2(&timo, timeout); 938 939 /* 940 * Care must be exercised when dealing with umtx structure. It 941 * can fault on any access. 942 */ 943 for (;;) { 944 rv = fueword32(&m->m_owner, &owner); 945 if (rv == -1) 946 return (EFAULT); 947 if (mode == _UMUTEX_WAIT) { 948 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 949 return (0); 950 } else { 951 /* 952 * Try the uncontested case. This should be done in userland. 953 */ 954 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 955 &owner, id); 956 /* The address was invalid. */ 957 if (rv == -1) 958 return (EFAULT); 959 960 /* The acquire succeeded. */ 961 if (owner == UMUTEX_UNOWNED) 962 return (0); 963 964 /* If no one owns it but it is contested try to acquire it. */ 965 if (owner == UMUTEX_CONTESTED) { 966 rv = casueword32(&m->m_owner, 967 UMUTEX_CONTESTED, &owner, 968 id | UMUTEX_CONTESTED); 969 /* The address was invalid. */ 970 if (rv == -1) 971 return (EFAULT); 972 973 if (owner == UMUTEX_CONTESTED) 974 return (0); 975 976 rv = umtxq_check_susp(td); 977 if (rv != 0) 978 return (rv); 979 980 /* If this failed the lock has changed, restart. */ 981 continue; 982 } 983 } 984 985 if (mode == _UMUTEX_TRY) 986 return (EBUSY); 987 988 /* 989 * If we caught a signal, we have retried and now 990 * exit immediately. 991 */ 992 if (error != 0) 993 return (error); 994 995 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 996 GET_SHARE(flags), &uq->uq_key)) != 0) 997 return (error); 998 999 umtxq_lock(&uq->uq_key); 1000 umtxq_busy(&uq->uq_key); 1001 umtxq_insert(uq); 1002 umtxq_unlock(&uq->uq_key); 1003 1004 /* 1005 * Set the contested bit so that a release in user space 1006 * knows to use the system call for unlock. If this fails 1007 * either some one else has acquired the lock or it has been 1008 * released. 1009 */ 1010 rv = casueword32(&m->m_owner, owner, &old, 1011 owner | UMUTEX_CONTESTED); 1012 1013 /* The address was invalid. */ 1014 if (rv == -1) { 1015 umtxq_lock(&uq->uq_key); 1016 umtxq_remove(uq); 1017 umtxq_unbusy(&uq->uq_key); 1018 umtxq_unlock(&uq->uq_key); 1019 umtx_key_release(&uq->uq_key); 1020 return (EFAULT); 1021 } 1022 1023 /* 1024 * We set the contested bit, sleep. Otherwise the lock changed 1025 * and we need to retry or we lost a race to the thread 1026 * unlocking the umtx. 1027 */ 1028 umtxq_lock(&uq->uq_key); 1029 umtxq_unbusy(&uq->uq_key); 1030 if (old == owner) 1031 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1032 NULL : &timo); 1033 umtxq_remove(uq); 1034 umtxq_unlock(&uq->uq_key); 1035 umtx_key_release(&uq->uq_key); 1036 1037 if (error == 0) 1038 error = umtxq_check_susp(td); 1039 } 1040 1041 return (0); 1042 } 1043 1044 /* 1045 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1046 */ 1047 static int 1048 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1049 { 1050 struct umtx_key key; 1051 uint32_t owner, old, id; 1052 int error; 1053 int count; 1054 1055 id = td->td_tid; 1056 /* 1057 * Make sure we own this mtx. 1058 */ 1059 error = fueword32(&m->m_owner, &owner); 1060 if (error == -1) 1061 return (EFAULT); 1062 1063 if ((owner & ~UMUTEX_CONTESTED) != id) 1064 return (EPERM); 1065 1066 if ((owner & UMUTEX_CONTESTED) == 0) { 1067 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1068 if (error == -1) 1069 return (EFAULT); 1070 if (old == owner) 1071 return (0); 1072 owner = old; 1073 } 1074 1075 /* We should only ever be in here for contested locks */ 1076 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1077 &key)) != 0) 1078 return (error); 1079 1080 umtxq_lock(&key); 1081 umtxq_busy(&key); 1082 count = umtxq_count(&key); 1083 umtxq_unlock(&key); 1084 1085 /* 1086 * When unlocking the umtx, it must be marked as unowned if 1087 * there is zero or one thread only waiting for it. 1088 * Otherwise, it must be marked as contested. 1089 */ 1090 error = casueword32(&m->m_owner, owner, &old, 1091 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1092 umtxq_lock(&key); 1093 umtxq_signal(&key,1); 1094 umtxq_unbusy(&key); 1095 umtxq_unlock(&key); 1096 umtx_key_release(&key); 1097 if (error == -1) 1098 return (EFAULT); 1099 if (old != owner) 1100 return (EINVAL); 1101 return (0); 1102 } 1103 1104 /* 1105 * Check if the mutex is available and wake up a waiter, 1106 * only for simple mutex. 1107 */ 1108 static int 1109 do_wake_umutex(struct thread *td, struct umutex *m) 1110 { 1111 struct umtx_key key; 1112 uint32_t owner; 1113 uint32_t flags; 1114 int error; 1115 int count; 1116 1117 error = fueword32(&m->m_owner, &owner); 1118 if (error == -1) 1119 return (EFAULT); 1120 1121 if ((owner & ~UMUTEX_CONTESTED) != 0) 1122 return (0); 1123 1124 error = fueword32(&m->m_flags, &flags); 1125 if (error == -1) 1126 return (EFAULT); 1127 1128 /* We should only ever be in here for contested locks */ 1129 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1130 &key)) != 0) 1131 return (error); 1132 1133 umtxq_lock(&key); 1134 umtxq_busy(&key); 1135 count = umtxq_count(&key); 1136 umtxq_unlock(&key); 1137 1138 if (count <= 1) { 1139 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1140 UMUTEX_UNOWNED); 1141 if (error == -1) 1142 error = EFAULT; 1143 } 1144 1145 umtxq_lock(&key); 1146 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1147 umtxq_signal(&key, 1); 1148 umtxq_unbusy(&key); 1149 umtxq_unlock(&key); 1150 umtx_key_release(&key); 1151 return (error); 1152 } 1153 1154 /* 1155 * Check if the mutex has waiters and tries to fix contention bit. 1156 */ 1157 static int 1158 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1159 { 1160 struct umtx_key key; 1161 uint32_t owner, old; 1162 int type; 1163 int error; 1164 int count; 1165 1166 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1167 case 0: 1168 type = TYPE_NORMAL_UMUTEX; 1169 break; 1170 case UMUTEX_PRIO_INHERIT: 1171 type = TYPE_PI_UMUTEX; 1172 break; 1173 case UMUTEX_PRIO_PROTECT: 1174 type = TYPE_PP_UMUTEX; 1175 break; 1176 default: 1177 return (EINVAL); 1178 } 1179 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1180 &key)) != 0) 1181 return (error); 1182 1183 owner = 0; 1184 umtxq_lock(&key); 1185 umtxq_busy(&key); 1186 count = umtxq_count(&key); 1187 umtxq_unlock(&key); 1188 /* 1189 * Only repair contention bit if there is a waiter, this means the mutex 1190 * is still being referenced by userland code, otherwise don't update 1191 * any memory. 1192 */ 1193 if (count > 1) { 1194 error = fueword32(&m->m_owner, &owner); 1195 if (error == -1) 1196 error = EFAULT; 1197 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1198 error = casueword32(&m->m_owner, owner, &old, 1199 owner | UMUTEX_CONTESTED); 1200 if (error == -1) { 1201 error = EFAULT; 1202 break; 1203 } 1204 if (old == owner) 1205 break; 1206 owner = old; 1207 error = umtxq_check_susp(td); 1208 if (error != 0) 1209 break; 1210 } 1211 } else if (count == 1) { 1212 error = fueword32(&m->m_owner, &owner); 1213 if (error == -1) 1214 error = EFAULT; 1215 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1216 (owner & UMUTEX_CONTESTED) == 0) { 1217 error = casueword32(&m->m_owner, owner, &old, 1218 owner | UMUTEX_CONTESTED); 1219 if (error == -1) { 1220 error = EFAULT; 1221 break; 1222 } 1223 if (old == owner) 1224 break; 1225 owner = old; 1226 error = umtxq_check_susp(td); 1227 if (error != 0) 1228 break; 1229 } 1230 } 1231 umtxq_lock(&key); 1232 if (error == EFAULT) { 1233 umtxq_signal(&key, INT_MAX); 1234 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1235 umtxq_signal(&key, 1); 1236 umtxq_unbusy(&key); 1237 umtxq_unlock(&key); 1238 umtx_key_release(&key); 1239 return (error); 1240 } 1241 1242 static inline struct umtx_pi * 1243 umtx_pi_alloc(int flags) 1244 { 1245 struct umtx_pi *pi; 1246 1247 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1248 TAILQ_INIT(&pi->pi_blocked); 1249 atomic_add_int(&umtx_pi_allocated, 1); 1250 return (pi); 1251 } 1252 1253 static inline void 1254 umtx_pi_free(struct umtx_pi *pi) 1255 { 1256 uma_zfree(umtx_pi_zone, pi); 1257 atomic_add_int(&umtx_pi_allocated, -1); 1258 } 1259 1260 /* 1261 * Adjust the thread's position on a pi_state after its priority has been 1262 * changed. 1263 */ 1264 static int 1265 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1266 { 1267 struct umtx_q *uq, *uq1, *uq2; 1268 struct thread *td1; 1269 1270 mtx_assert(&umtx_lock, MA_OWNED); 1271 if (pi == NULL) 1272 return (0); 1273 1274 uq = td->td_umtxq; 1275 1276 /* 1277 * Check if the thread needs to be moved on the blocked chain. 1278 * It needs to be moved if either its priority is lower than 1279 * the previous thread or higher than the next thread. 1280 */ 1281 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1282 uq2 = TAILQ_NEXT(uq, uq_lockq); 1283 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1284 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1285 /* 1286 * Remove thread from blocked chain and determine where 1287 * it should be moved to. 1288 */ 1289 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1290 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1291 td1 = uq1->uq_thread; 1292 MPASS(td1->td_proc->p_magic == P_MAGIC); 1293 if (UPRI(td1) > UPRI(td)) 1294 break; 1295 } 1296 1297 if (uq1 == NULL) 1298 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1299 else 1300 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1301 } 1302 return (1); 1303 } 1304 1305 static struct umtx_pi * 1306 umtx_pi_next(struct umtx_pi *pi) 1307 { 1308 struct umtx_q *uq_owner; 1309 1310 if (pi->pi_owner == NULL) 1311 return (NULL); 1312 uq_owner = pi->pi_owner->td_umtxq; 1313 if (uq_owner == NULL) 1314 return (NULL); 1315 return (uq_owner->uq_pi_blocked); 1316 } 1317 1318 /* 1319 * Floyd's Cycle-Finding Algorithm. 1320 */ 1321 static bool 1322 umtx_pi_check_loop(struct umtx_pi *pi) 1323 { 1324 struct umtx_pi *pi1; /* fast iterator */ 1325 1326 mtx_assert(&umtx_lock, MA_OWNED); 1327 if (pi == NULL) 1328 return (false); 1329 pi1 = pi; 1330 for (;;) { 1331 pi = umtx_pi_next(pi); 1332 if (pi == NULL) 1333 break; 1334 pi1 = umtx_pi_next(pi1); 1335 if (pi1 == NULL) 1336 break; 1337 pi1 = umtx_pi_next(pi1); 1338 if (pi1 == NULL) 1339 break; 1340 if (pi == pi1) 1341 return (true); 1342 } 1343 return (false); 1344 } 1345 1346 /* 1347 * Propagate priority when a thread is blocked on POSIX 1348 * PI mutex. 1349 */ 1350 static void 1351 umtx_propagate_priority(struct thread *td) 1352 { 1353 struct umtx_q *uq; 1354 struct umtx_pi *pi; 1355 int pri; 1356 1357 mtx_assert(&umtx_lock, MA_OWNED); 1358 pri = UPRI(td); 1359 uq = td->td_umtxq; 1360 pi = uq->uq_pi_blocked; 1361 if (pi == NULL) 1362 return; 1363 if (umtx_pi_check_loop(pi)) 1364 return; 1365 1366 for (;;) { 1367 td = pi->pi_owner; 1368 if (td == NULL || td == curthread) 1369 return; 1370 1371 MPASS(td->td_proc != NULL); 1372 MPASS(td->td_proc->p_magic == P_MAGIC); 1373 1374 thread_lock(td); 1375 if (td->td_lend_user_pri > pri) 1376 sched_lend_user_prio(td, pri); 1377 else { 1378 thread_unlock(td); 1379 break; 1380 } 1381 thread_unlock(td); 1382 1383 /* 1384 * Pick up the lock that td is blocked on. 1385 */ 1386 uq = td->td_umtxq; 1387 pi = uq->uq_pi_blocked; 1388 if (pi == NULL) 1389 break; 1390 /* Resort td on the list if needed. */ 1391 umtx_pi_adjust_thread(pi, td); 1392 } 1393 } 1394 1395 /* 1396 * Unpropagate priority for a PI mutex when a thread blocked on 1397 * it is interrupted by signal or resumed by others. 1398 */ 1399 static void 1400 umtx_repropagate_priority(struct umtx_pi *pi) 1401 { 1402 struct umtx_q *uq, *uq_owner; 1403 struct umtx_pi *pi2; 1404 int pri; 1405 1406 mtx_assert(&umtx_lock, MA_OWNED); 1407 1408 if (umtx_pi_check_loop(pi)) 1409 return; 1410 while (pi != NULL && pi->pi_owner != NULL) { 1411 pri = PRI_MAX; 1412 uq_owner = pi->pi_owner->td_umtxq; 1413 1414 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1415 uq = TAILQ_FIRST(&pi2->pi_blocked); 1416 if (uq != NULL) { 1417 if (pri > UPRI(uq->uq_thread)) 1418 pri = UPRI(uq->uq_thread); 1419 } 1420 } 1421 1422 if (pri > uq_owner->uq_inherited_pri) 1423 pri = uq_owner->uq_inherited_pri; 1424 thread_lock(pi->pi_owner); 1425 sched_lend_user_prio(pi->pi_owner, pri); 1426 thread_unlock(pi->pi_owner); 1427 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1428 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1429 } 1430 } 1431 1432 /* 1433 * Insert a PI mutex into owned list. 1434 */ 1435 static void 1436 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1437 { 1438 struct umtx_q *uq_owner; 1439 1440 uq_owner = owner->td_umtxq; 1441 mtx_assert(&umtx_lock, MA_OWNED); 1442 if (pi->pi_owner != NULL) 1443 panic("pi_ower != NULL"); 1444 pi->pi_owner = owner; 1445 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1446 } 1447 1448 1449 /* 1450 * Disown a PI mutex, and remove it from the owned list. 1451 */ 1452 static void 1453 umtx_pi_disown(struct umtx_pi *pi) 1454 { 1455 1456 mtx_assert(&umtx_lock, MA_OWNED); 1457 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1458 pi->pi_owner = NULL; 1459 } 1460 1461 /* 1462 * Claim ownership of a PI mutex. 1463 */ 1464 static int 1465 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1466 { 1467 struct umtx_q *uq, *uq_owner; 1468 1469 uq_owner = owner->td_umtxq; 1470 mtx_lock(&umtx_lock); 1471 if (pi->pi_owner == owner) { 1472 mtx_unlock(&umtx_lock); 1473 return (0); 1474 } 1475 1476 if (pi->pi_owner != NULL) { 1477 /* 1478 * userland may have already messed the mutex, sigh. 1479 */ 1480 mtx_unlock(&umtx_lock); 1481 return (EPERM); 1482 } 1483 umtx_pi_setowner(pi, owner); 1484 uq = TAILQ_FIRST(&pi->pi_blocked); 1485 if (uq != NULL) { 1486 int pri; 1487 1488 pri = UPRI(uq->uq_thread); 1489 thread_lock(owner); 1490 if (pri < UPRI(owner)) 1491 sched_lend_user_prio(owner, pri); 1492 thread_unlock(owner); 1493 } 1494 mtx_unlock(&umtx_lock); 1495 return (0); 1496 } 1497 1498 /* 1499 * Adjust a thread's order position in its blocked PI mutex, 1500 * this may result new priority propagating process. 1501 */ 1502 void 1503 umtx_pi_adjust(struct thread *td, u_char oldpri) 1504 { 1505 struct umtx_q *uq; 1506 struct umtx_pi *pi; 1507 1508 uq = td->td_umtxq; 1509 mtx_lock(&umtx_lock); 1510 /* 1511 * Pick up the lock that td is blocked on. 1512 */ 1513 pi = uq->uq_pi_blocked; 1514 if (pi != NULL) { 1515 umtx_pi_adjust_thread(pi, td); 1516 umtx_repropagate_priority(pi); 1517 } 1518 mtx_unlock(&umtx_lock); 1519 } 1520 1521 /* 1522 * Sleep on a PI mutex. 1523 */ 1524 static int 1525 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1526 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1527 { 1528 struct umtxq_chain *uc; 1529 struct thread *td, *td1; 1530 struct umtx_q *uq1; 1531 int pri; 1532 int error = 0; 1533 1534 td = uq->uq_thread; 1535 KASSERT(td == curthread, ("inconsistent uq_thread")); 1536 uc = umtxq_getchain(&uq->uq_key); 1537 UMTXQ_LOCKED_ASSERT(uc); 1538 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1539 umtxq_insert(uq); 1540 mtx_lock(&umtx_lock); 1541 if (pi->pi_owner == NULL) { 1542 mtx_unlock(&umtx_lock); 1543 /* XXX Only look up thread in current process. */ 1544 td1 = tdfind(owner, curproc->p_pid); 1545 mtx_lock(&umtx_lock); 1546 if (td1 != NULL) { 1547 if (pi->pi_owner == NULL) 1548 umtx_pi_setowner(pi, td1); 1549 PROC_UNLOCK(td1->td_proc); 1550 } 1551 } 1552 1553 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1554 pri = UPRI(uq1->uq_thread); 1555 if (pri > UPRI(td)) 1556 break; 1557 } 1558 1559 if (uq1 != NULL) 1560 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1561 else 1562 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1563 1564 uq->uq_pi_blocked = pi; 1565 thread_lock(td); 1566 td->td_flags |= TDF_UPIBLOCKED; 1567 thread_unlock(td); 1568 umtx_propagate_priority(td); 1569 mtx_unlock(&umtx_lock); 1570 umtxq_unbusy(&uq->uq_key); 1571 1572 error = umtxq_sleep(uq, wmesg, timo); 1573 umtxq_remove(uq); 1574 1575 mtx_lock(&umtx_lock); 1576 uq->uq_pi_blocked = NULL; 1577 thread_lock(td); 1578 td->td_flags &= ~TDF_UPIBLOCKED; 1579 thread_unlock(td); 1580 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1581 umtx_repropagate_priority(pi); 1582 mtx_unlock(&umtx_lock); 1583 umtxq_unlock(&uq->uq_key); 1584 1585 return (error); 1586 } 1587 1588 /* 1589 * Add reference count for a PI mutex. 1590 */ 1591 static void 1592 umtx_pi_ref(struct umtx_pi *pi) 1593 { 1594 struct umtxq_chain *uc; 1595 1596 uc = umtxq_getchain(&pi->pi_key); 1597 UMTXQ_LOCKED_ASSERT(uc); 1598 pi->pi_refcount++; 1599 } 1600 1601 /* 1602 * Decrease reference count for a PI mutex, if the counter 1603 * is decreased to zero, its memory space is freed. 1604 */ 1605 static void 1606 umtx_pi_unref(struct umtx_pi *pi) 1607 { 1608 struct umtxq_chain *uc; 1609 1610 uc = umtxq_getchain(&pi->pi_key); 1611 UMTXQ_LOCKED_ASSERT(uc); 1612 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1613 if (--pi->pi_refcount == 0) { 1614 mtx_lock(&umtx_lock); 1615 if (pi->pi_owner != NULL) { 1616 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1617 pi, pi_link); 1618 pi->pi_owner = NULL; 1619 } 1620 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1621 ("blocked queue not empty")); 1622 mtx_unlock(&umtx_lock); 1623 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1624 umtx_pi_free(pi); 1625 } 1626 } 1627 1628 /* 1629 * Find a PI mutex in hash table. 1630 */ 1631 static struct umtx_pi * 1632 umtx_pi_lookup(struct umtx_key *key) 1633 { 1634 struct umtxq_chain *uc; 1635 struct umtx_pi *pi; 1636 1637 uc = umtxq_getchain(key); 1638 UMTXQ_LOCKED_ASSERT(uc); 1639 1640 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1641 if (umtx_key_match(&pi->pi_key, key)) { 1642 return (pi); 1643 } 1644 } 1645 return (NULL); 1646 } 1647 1648 /* 1649 * Insert a PI mutex into hash table. 1650 */ 1651 static inline void 1652 umtx_pi_insert(struct umtx_pi *pi) 1653 { 1654 struct umtxq_chain *uc; 1655 1656 uc = umtxq_getchain(&pi->pi_key); 1657 UMTXQ_LOCKED_ASSERT(uc); 1658 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1659 } 1660 1661 /* 1662 * Lock a PI mutex. 1663 */ 1664 static int 1665 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1666 struct _umtx_time *timeout, int try) 1667 { 1668 struct abs_timeout timo; 1669 struct umtx_q *uq; 1670 struct umtx_pi *pi, *new_pi; 1671 uint32_t id, owner, old; 1672 int error, rv; 1673 1674 id = td->td_tid; 1675 uq = td->td_umtxq; 1676 1677 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1678 &uq->uq_key)) != 0) 1679 return (error); 1680 1681 if (timeout != NULL) 1682 abs_timeout_init2(&timo, timeout); 1683 1684 umtxq_lock(&uq->uq_key); 1685 pi = umtx_pi_lookup(&uq->uq_key); 1686 if (pi == NULL) { 1687 new_pi = umtx_pi_alloc(M_NOWAIT); 1688 if (new_pi == NULL) { 1689 umtxq_unlock(&uq->uq_key); 1690 new_pi = umtx_pi_alloc(M_WAITOK); 1691 umtxq_lock(&uq->uq_key); 1692 pi = umtx_pi_lookup(&uq->uq_key); 1693 if (pi != NULL) { 1694 umtx_pi_free(new_pi); 1695 new_pi = NULL; 1696 } 1697 } 1698 if (new_pi != NULL) { 1699 new_pi->pi_key = uq->uq_key; 1700 umtx_pi_insert(new_pi); 1701 pi = new_pi; 1702 } 1703 } 1704 umtx_pi_ref(pi); 1705 umtxq_unlock(&uq->uq_key); 1706 1707 /* 1708 * Care must be exercised when dealing with umtx structure. It 1709 * can fault on any access. 1710 */ 1711 for (;;) { 1712 /* 1713 * Try the uncontested case. This should be done in userland. 1714 */ 1715 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1716 /* The address was invalid. */ 1717 if (rv == -1) { 1718 error = EFAULT; 1719 break; 1720 } 1721 1722 /* The acquire succeeded. */ 1723 if (owner == UMUTEX_UNOWNED) { 1724 error = 0; 1725 break; 1726 } 1727 1728 /* If no one owns it but it is contested try to acquire it. */ 1729 if (owner == UMUTEX_CONTESTED) { 1730 rv = casueword32(&m->m_owner, 1731 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1732 /* The address was invalid. */ 1733 if (rv == -1) { 1734 error = EFAULT; 1735 break; 1736 } 1737 1738 if (owner == UMUTEX_CONTESTED) { 1739 umtxq_lock(&uq->uq_key); 1740 umtxq_busy(&uq->uq_key); 1741 error = umtx_pi_claim(pi, td); 1742 umtxq_unbusy(&uq->uq_key); 1743 umtxq_unlock(&uq->uq_key); 1744 if (error != 0) { 1745 /* 1746 * Since we're going to return an 1747 * error, restore the m_owner to its 1748 * previous, unowned state to avoid 1749 * compounding the problem. 1750 */ 1751 (void)casuword32(&m->m_owner, 1752 id | UMUTEX_CONTESTED, 1753 UMUTEX_CONTESTED); 1754 } 1755 break; 1756 } 1757 1758 error = umtxq_check_susp(td); 1759 if (error != 0) 1760 break; 1761 1762 /* If this failed the lock has changed, restart. */ 1763 continue; 1764 } 1765 1766 if ((owner & ~UMUTEX_CONTESTED) == id) { 1767 error = EDEADLK; 1768 break; 1769 } 1770 1771 if (try != 0) { 1772 error = EBUSY; 1773 break; 1774 } 1775 1776 /* 1777 * If we caught a signal, we have retried and now 1778 * exit immediately. 1779 */ 1780 if (error != 0) 1781 break; 1782 1783 umtxq_lock(&uq->uq_key); 1784 umtxq_busy(&uq->uq_key); 1785 umtxq_unlock(&uq->uq_key); 1786 1787 /* 1788 * Set the contested bit so that a release in user space 1789 * knows to use the system call for unlock. If this fails 1790 * either some one else has acquired the lock or it has been 1791 * released. 1792 */ 1793 rv = casueword32(&m->m_owner, owner, &old, 1794 owner | UMUTEX_CONTESTED); 1795 1796 /* The address was invalid. */ 1797 if (rv == -1) { 1798 umtxq_unbusy_unlocked(&uq->uq_key); 1799 error = EFAULT; 1800 break; 1801 } 1802 1803 umtxq_lock(&uq->uq_key); 1804 /* 1805 * We set the contested bit, sleep. Otherwise the lock changed 1806 * and we need to retry or we lost a race to the thread 1807 * unlocking the umtx. 1808 */ 1809 if (old == owner) { 1810 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1811 "umtxpi", timeout == NULL ? NULL : &timo); 1812 if (error != 0) 1813 continue; 1814 } else { 1815 umtxq_unbusy(&uq->uq_key); 1816 umtxq_unlock(&uq->uq_key); 1817 } 1818 1819 error = umtxq_check_susp(td); 1820 if (error != 0) 1821 break; 1822 } 1823 1824 umtxq_lock(&uq->uq_key); 1825 umtx_pi_unref(pi); 1826 umtxq_unlock(&uq->uq_key); 1827 1828 umtx_key_release(&uq->uq_key); 1829 return (error); 1830 } 1831 1832 /* 1833 * Unlock a PI mutex. 1834 */ 1835 static int 1836 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1837 { 1838 struct umtx_key key; 1839 struct umtx_q *uq_first, *uq_first2, *uq_me; 1840 struct umtx_pi *pi, *pi2; 1841 uint32_t owner, old, id; 1842 int error; 1843 int count; 1844 int pri; 1845 1846 id = td->td_tid; 1847 /* 1848 * Make sure we own this mtx. 1849 */ 1850 error = fueword32(&m->m_owner, &owner); 1851 if (error == -1) 1852 return (EFAULT); 1853 1854 if ((owner & ~UMUTEX_CONTESTED) != id) 1855 return (EPERM); 1856 1857 /* This should be done in userland */ 1858 if ((owner & UMUTEX_CONTESTED) == 0) { 1859 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1860 if (error == -1) 1861 return (EFAULT); 1862 if (old == owner) 1863 return (0); 1864 owner = old; 1865 } 1866 1867 /* We should only ever be in here for contested locks */ 1868 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1869 &key)) != 0) 1870 return (error); 1871 1872 umtxq_lock(&key); 1873 umtxq_busy(&key); 1874 count = umtxq_count_pi(&key, &uq_first); 1875 if (uq_first != NULL) { 1876 mtx_lock(&umtx_lock); 1877 pi = uq_first->uq_pi_blocked; 1878 KASSERT(pi != NULL, ("pi == NULL?")); 1879 if (pi->pi_owner != curthread) { 1880 mtx_unlock(&umtx_lock); 1881 umtxq_unbusy(&key); 1882 umtxq_unlock(&key); 1883 umtx_key_release(&key); 1884 /* userland messed the mutex */ 1885 return (EPERM); 1886 } 1887 uq_me = curthread->td_umtxq; 1888 umtx_pi_disown(pi); 1889 /* get highest priority thread which is still sleeping. */ 1890 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1891 while (uq_first != NULL && 1892 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 1893 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 1894 } 1895 pri = PRI_MAX; 1896 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1897 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1898 if (uq_first2 != NULL) { 1899 if (pri > UPRI(uq_first2->uq_thread)) 1900 pri = UPRI(uq_first2->uq_thread); 1901 } 1902 } 1903 thread_lock(curthread); 1904 sched_lend_user_prio(curthread, pri); 1905 thread_unlock(curthread); 1906 mtx_unlock(&umtx_lock); 1907 if (uq_first) 1908 umtxq_signal_thread(uq_first); 1909 } else { 1910 pi = umtx_pi_lookup(&key); 1911 /* 1912 * A umtx_pi can exist if a signal or timeout removed the 1913 * last waiter from the umtxq, but there is still 1914 * a thread in do_lock_pi() holding the umtx_pi. 1915 */ 1916 if (pi != NULL) { 1917 /* 1918 * The umtx_pi can be unowned, such as when a thread 1919 * has just entered do_lock_pi(), allocated the 1920 * umtx_pi, and unlocked the umtxq. 1921 * If the current thread owns it, it must disown it. 1922 */ 1923 mtx_lock(&umtx_lock); 1924 if (pi->pi_owner == td) 1925 umtx_pi_disown(pi); 1926 mtx_unlock(&umtx_lock); 1927 } 1928 } 1929 umtxq_unlock(&key); 1930 1931 /* 1932 * When unlocking the umtx, it must be marked as unowned if 1933 * there is zero or one thread only waiting for it. 1934 * Otherwise, it must be marked as contested. 1935 */ 1936 error = casueword32(&m->m_owner, owner, &old, 1937 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1938 1939 umtxq_unbusy_unlocked(&key); 1940 umtx_key_release(&key); 1941 if (error == -1) 1942 return (EFAULT); 1943 if (old != owner) 1944 return (EINVAL); 1945 return (0); 1946 } 1947 1948 /* 1949 * Lock a PP mutex. 1950 */ 1951 static int 1952 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 1953 struct _umtx_time *timeout, int try) 1954 { 1955 struct abs_timeout timo; 1956 struct umtx_q *uq, *uq2; 1957 struct umtx_pi *pi; 1958 uint32_t ceiling; 1959 uint32_t owner, id; 1960 int error, pri, old_inherited_pri, su, rv; 1961 1962 id = td->td_tid; 1963 uq = td->td_umtxq; 1964 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1965 &uq->uq_key)) != 0) 1966 return (error); 1967 1968 if (timeout != NULL) 1969 abs_timeout_init2(&timo, timeout); 1970 1971 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1972 for (;;) { 1973 old_inherited_pri = uq->uq_inherited_pri; 1974 umtxq_lock(&uq->uq_key); 1975 umtxq_busy(&uq->uq_key); 1976 umtxq_unlock(&uq->uq_key); 1977 1978 rv = fueword32(&m->m_ceilings[0], &ceiling); 1979 if (rv == -1) { 1980 error = EFAULT; 1981 goto out; 1982 } 1983 ceiling = RTP_PRIO_MAX - ceiling; 1984 if (ceiling > RTP_PRIO_MAX) { 1985 error = EINVAL; 1986 goto out; 1987 } 1988 1989 mtx_lock(&umtx_lock); 1990 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1991 mtx_unlock(&umtx_lock); 1992 error = EINVAL; 1993 goto out; 1994 } 1995 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1996 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1997 thread_lock(td); 1998 if (uq->uq_inherited_pri < UPRI(td)) 1999 sched_lend_user_prio(td, uq->uq_inherited_pri); 2000 thread_unlock(td); 2001 } 2002 mtx_unlock(&umtx_lock); 2003 2004 rv = casueword32(&m->m_owner, 2005 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2006 /* The address was invalid. */ 2007 if (rv == -1) { 2008 error = EFAULT; 2009 break; 2010 } 2011 2012 if (owner == UMUTEX_CONTESTED) { 2013 error = 0; 2014 break; 2015 } 2016 2017 if (try != 0) { 2018 error = EBUSY; 2019 break; 2020 } 2021 2022 /* 2023 * If we caught a signal, we have retried and now 2024 * exit immediately. 2025 */ 2026 if (error != 0) 2027 break; 2028 2029 umtxq_lock(&uq->uq_key); 2030 umtxq_insert(uq); 2031 umtxq_unbusy(&uq->uq_key); 2032 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2033 NULL : &timo); 2034 umtxq_remove(uq); 2035 umtxq_unlock(&uq->uq_key); 2036 2037 mtx_lock(&umtx_lock); 2038 uq->uq_inherited_pri = old_inherited_pri; 2039 pri = PRI_MAX; 2040 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2041 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2042 if (uq2 != NULL) { 2043 if (pri > UPRI(uq2->uq_thread)) 2044 pri = UPRI(uq2->uq_thread); 2045 } 2046 } 2047 if (pri > uq->uq_inherited_pri) 2048 pri = uq->uq_inherited_pri; 2049 thread_lock(td); 2050 sched_lend_user_prio(td, pri); 2051 thread_unlock(td); 2052 mtx_unlock(&umtx_lock); 2053 } 2054 2055 if (error != 0) { 2056 mtx_lock(&umtx_lock); 2057 uq->uq_inherited_pri = old_inherited_pri; 2058 pri = PRI_MAX; 2059 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2060 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2061 if (uq2 != NULL) { 2062 if (pri > UPRI(uq2->uq_thread)) 2063 pri = UPRI(uq2->uq_thread); 2064 } 2065 } 2066 if (pri > uq->uq_inherited_pri) 2067 pri = uq->uq_inherited_pri; 2068 thread_lock(td); 2069 sched_lend_user_prio(td, pri); 2070 thread_unlock(td); 2071 mtx_unlock(&umtx_lock); 2072 } 2073 2074 out: 2075 umtxq_unbusy_unlocked(&uq->uq_key); 2076 umtx_key_release(&uq->uq_key); 2077 return (error); 2078 } 2079 2080 /* 2081 * Unlock a PP mutex. 2082 */ 2083 static int 2084 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2085 { 2086 struct umtx_key key; 2087 struct umtx_q *uq, *uq2; 2088 struct umtx_pi *pi; 2089 uint32_t owner, id; 2090 uint32_t rceiling; 2091 int error, pri, new_inherited_pri, su; 2092 2093 id = td->td_tid; 2094 uq = td->td_umtxq; 2095 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2096 2097 /* 2098 * Make sure we own this mtx. 2099 */ 2100 error = fueword32(&m->m_owner, &owner); 2101 if (error == -1) 2102 return (EFAULT); 2103 2104 if ((owner & ~UMUTEX_CONTESTED) != id) 2105 return (EPERM); 2106 2107 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2108 if (error != 0) 2109 return (error); 2110 2111 if (rceiling == -1) 2112 new_inherited_pri = PRI_MAX; 2113 else { 2114 rceiling = RTP_PRIO_MAX - rceiling; 2115 if (rceiling > RTP_PRIO_MAX) 2116 return (EINVAL); 2117 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2118 } 2119 2120 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2121 &key)) != 0) 2122 return (error); 2123 umtxq_lock(&key); 2124 umtxq_busy(&key); 2125 umtxq_unlock(&key); 2126 /* 2127 * For priority protected mutex, always set unlocked state 2128 * to UMUTEX_CONTESTED, so that userland always enters kernel 2129 * to lock the mutex, it is necessary because thread priority 2130 * has to be adjusted for such mutex. 2131 */ 2132 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 2133 2134 umtxq_lock(&key); 2135 if (error == 0) 2136 umtxq_signal(&key, 1); 2137 umtxq_unbusy(&key); 2138 umtxq_unlock(&key); 2139 2140 if (error == -1) 2141 error = EFAULT; 2142 else { 2143 mtx_lock(&umtx_lock); 2144 if (su != 0) 2145 uq->uq_inherited_pri = new_inherited_pri; 2146 pri = PRI_MAX; 2147 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2148 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2149 if (uq2 != NULL) { 2150 if (pri > UPRI(uq2->uq_thread)) 2151 pri = UPRI(uq2->uq_thread); 2152 } 2153 } 2154 if (pri > uq->uq_inherited_pri) 2155 pri = uq->uq_inherited_pri; 2156 thread_lock(td); 2157 sched_lend_user_prio(td, pri); 2158 thread_unlock(td); 2159 mtx_unlock(&umtx_lock); 2160 } 2161 umtx_key_release(&key); 2162 return (error); 2163 } 2164 2165 static int 2166 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2167 uint32_t *old_ceiling) 2168 { 2169 struct umtx_q *uq; 2170 uint32_t save_ceiling; 2171 uint32_t owner, id; 2172 uint32_t flags; 2173 int error, rv; 2174 2175 error = fueword32(&m->m_flags, &flags); 2176 if (error == -1) 2177 return (EFAULT); 2178 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2179 return (EINVAL); 2180 if (ceiling > RTP_PRIO_MAX) 2181 return (EINVAL); 2182 id = td->td_tid; 2183 uq = td->td_umtxq; 2184 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2185 &uq->uq_key)) != 0) 2186 return (error); 2187 for (;;) { 2188 umtxq_lock(&uq->uq_key); 2189 umtxq_busy(&uq->uq_key); 2190 umtxq_unlock(&uq->uq_key); 2191 2192 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2193 if (rv == -1) { 2194 error = EFAULT; 2195 break; 2196 } 2197 2198 rv = casueword32(&m->m_owner, 2199 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2200 if (rv == -1) { 2201 error = EFAULT; 2202 break; 2203 } 2204 2205 if (owner == UMUTEX_CONTESTED) { 2206 suword32(&m->m_ceilings[0], ceiling); 2207 suword32(&m->m_owner, UMUTEX_CONTESTED); 2208 error = 0; 2209 break; 2210 } 2211 2212 if ((owner & ~UMUTEX_CONTESTED) == id) { 2213 suword32(&m->m_ceilings[0], ceiling); 2214 error = 0; 2215 break; 2216 } 2217 2218 /* 2219 * If we caught a signal, we have retried and now 2220 * exit immediately. 2221 */ 2222 if (error != 0) 2223 break; 2224 2225 /* 2226 * We set the contested bit, sleep. Otherwise the lock changed 2227 * and we need to retry or we lost a race to the thread 2228 * unlocking the umtx. 2229 */ 2230 umtxq_lock(&uq->uq_key); 2231 umtxq_insert(uq); 2232 umtxq_unbusy(&uq->uq_key); 2233 error = umtxq_sleep(uq, "umtxpp", NULL); 2234 umtxq_remove(uq); 2235 umtxq_unlock(&uq->uq_key); 2236 } 2237 umtxq_lock(&uq->uq_key); 2238 if (error == 0) 2239 umtxq_signal(&uq->uq_key, INT_MAX); 2240 umtxq_unbusy(&uq->uq_key); 2241 umtxq_unlock(&uq->uq_key); 2242 umtx_key_release(&uq->uq_key); 2243 if (error == 0 && old_ceiling != NULL) 2244 suword32(old_ceiling, save_ceiling); 2245 return (error); 2246 } 2247 2248 /* 2249 * Lock a userland POSIX mutex. 2250 */ 2251 static int 2252 do_lock_umutex(struct thread *td, struct umutex *m, 2253 struct _umtx_time *timeout, int mode) 2254 { 2255 uint32_t flags; 2256 int error; 2257 2258 error = fueword32(&m->m_flags, &flags); 2259 if (error == -1) 2260 return (EFAULT); 2261 2262 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2263 case 0: 2264 error = do_lock_normal(td, m, flags, timeout, mode); 2265 break; 2266 case UMUTEX_PRIO_INHERIT: 2267 error = do_lock_pi(td, m, flags, timeout, mode); 2268 break; 2269 case UMUTEX_PRIO_PROTECT: 2270 error = do_lock_pp(td, m, flags, timeout, mode); 2271 break; 2272 default: 2273 return (EINVAL); 2274 } 2275 if (timeout == NULL) { 2276 if (error == EINTR && mode != _UMUTEX_WAIT) 2277 error = ERESTART; 2278 } else { 2279 /* Timed-locking is not restarted. */ 2280 if (error == ERESTART) 2281 error = EINTR; 2282 } 2283 return (error); 2284 } 2285 2286 /* 2287 * Unlock a userland POSIX mutex. 2288 */ 2289 static int 2290 do_unlock_umutex(struct thread *td, struct umutex *m) 2291 { 2292 uint32_t flags; 2293 int error; 2294 2295 error = fueword32(&m->m_flags, &flags); 2296 if (error == -1) 2297 return (EFAULT); 2298 2299 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2300 case 0: 2301 return (do_unlock_normal(td, m, flags)); 2302 case UMUTEX_PRIO_INHERIT: 2303 return (do_unlock_pi(td, m, flags)); 2304 case UMUTEX_PRIO_PROTECT: 2305 return (do_unlock_pp(td, m, flags)); 2306 } 2307 2308 return (EINVAL); 2309 } 2310 2311 static int 2312 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2313 struct timespec *timeout, u_long wflags) 2314 { 2315 struct abs_timeout timo; 2316 struct umtx_q *uq; 2317 uint32_t flags, clockid, hasw; 2318 int error; 2319 2320 uq = td->td_umtxq; 2321 error = fueword32(&cv->c_flags, &flags); 2322 if (error == -1) 2323 return (EFAULT); 2324 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2325 if (error != 0) 2326 return (error); 2327 2328 if ((wflags & CVWAIT_CLOCKID) != 0) { 2329 error = fueword32(&cv->c_clockid, &clockid); 2330 if (error == -1) { 2331 umtx_key_release(&uq->uq_key); 2332 return (EFAULT); 2333 } 2334 if (clockid < CLOCK_REALTIME || 2335 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2336 /* hmm, only HW clock id will work. */ 2337 umtx_key_release(&uq->uq_key); 2338 return (EINVAL); 2339 } 2340 } else { 2341 clockid = CLOCK_REALTIME; 2342 } 2343 2344 umtxq_lock(&uq->uq_key); 2345 umtxq_busy(&uq->uq_key); 2346 umtxq_insert(uq); 2347 umtxq_unlock(&uq->uq_key); 2348 2349 /* 2350 * Set c_has_waiters to 1 before releasing user mutex, also 2351 * don't modify cache line when unnecessary. 2352 */ 2353 error = fueword32(&cv->c_has_waiters, &hasw); 2354 if (error == 0 && hasw == 0) 2355 suword32(&cv->c_has_waiters, 1); 2356 2357 umtxq_unbusy_unlocked(&uq->uq_key); 2358 2359 error = do_unlock_umutex(td, m); 2360 2361 if (timeout != NULL) 2362 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2363 timeout); 2364 2365 umtxq_lock(&uq->uq_key); 2366 if (error == 0) { 2367 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2368 NULL : &timo); 2369 } 2370 2371 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2372 error = 0; 2373 else { 2374 /* 2375 * This must be timeout,interrupted by signal or 2376 * surprious wakeup, clear c_has_waiter flag when 2377 * necessary. 2378 */ 2379 umtxq_busy(&uq->uq_key); 2380 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2381 int oldlen = uq->uq_cur_queue->length; 2382 umtxq_remove(uq); 2383 if (oldlen == 1) { 2384 umtxq_unlock(&uq->uq_key); 2385 suword32(&cv->c_has_waiters, 0); 2386 umtxq_lock(&uq->uq_key); 2387 } 2388 } 2389 umtxq_unbusy(&uq->uq_key); 2390 if (error == ERESTART) 2391 error = EINTR; 2392 } 2393 2394 umtxq_unlock(&uq->uq_key); 2395 umtx_key_release(&uq->uq_key); 2396 return (error); 2397 } 2398 2399 /* 2400 * Signal a userland condition variable. 2401 */ 2402 static int 2403 do_cv_signal(struct thread *td, struct ucond *cv) 2404 { 2405 struct umtx_key key; 2406 int error, cnt, nwake; 2407 uint32_t flags; 2408 2409 error = fueword32(&cv->c_flags, &flags); 2410 if (error == -1) 2411 return (EFAULT); 2412 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2413 return (error); 2414 umtxq_lock(&key); 2415 umtxq_busy(&key); 2416 cnt = umtxq_count(&key); 2417 nwake = umtxq_signal(&key, 1); 2418 if (cnt <= nwake) { 2419 umtxq_unlock(&key); 2420 error = suword32(&cv->c_has_waiters, 0); 2421 if (error == -1) 2422 error = EFAULT; 2423 umtxq_lock(&key); 2424 } 2425 umtxq_unbusy(&key); 2426 umtxq_unlock(&key); 2427 umtx_key_release(&key); 2428 return (error); 2429 } 2430 2431 static int 2432 do_cv_broadcast(struct thread *td, struct ucond *cv) 2433 { 2434 struct umtx_key key; 2435 int error; 2436 uint32_t flags; 2437 2438 error = fueword32(&cv->c_flags, &flags); 2439 if (error == -1) 2440 return (EFAULT); 2441 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2442 return (error); 2443 2444 umtxq_lock(&key); 2445 umtxq_busy(&key); 2446 umtxq_signal(&key, INT_MAX); 2447 umtxq_unlock(&key); 2448 2449 error = suword32(&cv->c_has_waiters, 0); 2450 if (error == -1) 2451 error = EFAULT; 2452 2453 umtxq_unbusy_unlocked(&key); 2454 2455 umtx_key_release(&key); 2456 return (error); 2457 } 2458 2459 static int 2460 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2461 { 2462 struct abs_timeout timo; 2463 struct umtx_q *uq; 2464 uint32_t flags, wrflags; 2465 int32_t state, oldstate; 2466 int32_t blocked_readers; 2467 int error, rv; 2468 2469 uq = td->td_umtxq; 2470 error = fueword32(&rwlock->rw_flags, &flags); 2471 if (error == -1) 2472 return (EFAULT); 2473 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2474 if (error != 0) 2475 return (error); 2476 2477 if (timeout != NULL) 2478 abs_timeout_init2(&timo, timeout); 2479 2480 wrflags = URWLOCK_WRITE_OWNER; 2481 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2482 wrflags |= URWLOCK_WRITE_WAITERS; 2483 2484 for (;;) { 2485 rv = fueword32(&rwlock->rw_state, &state); 2486 if (rv == -1) { 2487 umtx_key_release(&uq->uq_key); 2488 return (EFAULT); 2489 } 2490 2491 /* try to lock it */ 2492 while (!(state & wrflags)) { 2493 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2494 umtx_key_release(&uq->uq_key); 2495 return (EAGAIN); 2496 } 2497 rv = casueword32(&rwlock->rw_state, state, 2498 &oldstate, state + 1); 2499 if (rv == -1) { 2500 umtx_key_release(&uq->uq_key); 2501 return (EFAULT); 2502 } 2503 if (oldstate == state) { 2504 umtx_key_release(&uq->uq_key); 2505 return (0); 2506 } 2507 error = umtxq_check_susp(td); 2508 if (error != 0) 2509 break; 2510 state = oldstate; 2511 } 2512 2513 if (error) 2514 break; 2515 2516 /* grab monitor lock */ 2517 umtxq_lock(&uq->uq_key); 2518 umtxq_busy(&uq->uq_key); 2519 umtxq_unlock(&uq->uq_key); 2520 2521 /* 2522 * re-read the state, in case it changed between the try-lock above 2523 * and the check below 2524 */ 2525 rv = fueword32(&rwlock->rw_state, &state); 2526 if (rv == -1) 2527 error = EFAULT; 2528 2529 /* set read contention bit */ 2530 while (error == 0 && (state & wrflags) && 2531 !(state & URWLOCK_READ_WAITERS)) { 2532 rv = casueword32(&rwlock->rw_state, state, 2533 &oldstate, state | URWLOCK_READ_WAITERS); 2534 if (rv == -1) { 2535 error = EFAULT; 2536 break; 2537 } 2538 if (oldstate == state) 2539 goto sleep; 2540 state = oldstate; 2541 error = umtxq_check_susp(td); 2542 if (error != 0) 2543 break; 2544 } 2545 if (error != 0) { 2546 umtxq_unbusy_unlocked(&uq->uq_key); 2547 break; 2548 } 2549 2550 /* state is changed while setting flags, restart */ 2551 if (!(state & wrflags)) { 2552 umtxq_unbusy_unlocked(&uq->uq_key); 2553 error = umtxq_check_susp(td); 2554 if (error != 0) 2555 break; 2556 continue; 2557 } 2558 2559 sleep: 2560 /* contention bit is set, before sleeping, increase read waiter count */ 2561 rv = fueword32(&rwlock->rw_blocked_readers, 2562 &blocked_readers); 2563 if (rv == -1) { 2564 umtxq_unbusy_unlocked(&uq->uq_key); 2565 error = EFAULT; 2566 break; 2567 } 2568 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2569 2570 while (state & wrflags) { 2571 umtxq_lock(&uq->uq_key); 2572 umtxq_insert(uq); 2573 umtxq_unbusy(&uq->uq_key); 2574 2575 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2576 NULL : &timo); 2577 2578 umtxq_busy(&uq->uq_key); 2579 umtxq_remove(uq); 2580 umtxq_unlock(&uq->uq_key); 2581 if (error) 2582 break; 2583 rv = fueword32(&rwlock->rw_state, &state); 2584 if (rv == -1) { 2585 error = EFAULT; 2586 break; 2587 } 2588 } 2589 2590 /* decrease read waiter count, and may clear read contention bit */ 2591 rv = fueword32(&rwlock->rw_blocked_readers, 2592 &blocked_readers); 2593 if (rv == -1) { 2594 umtxq_unbusy_unlocked(&uq->uq_key); 2595 error = EFAULT; 2596 break; 2597 } 2598 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2599 if (blocked_readers == 1) { 2600 rv = fueword32(&rwlock->rw_state, &state); 2601 if (rv == -1) 2602 error = EFAULT; 2603 while (error == 0) { 2604 rv = casueword32(&rwlock->rw_state, state, 2605 &oldstate, state & ~URWLOCK_READ_WAITERS); 2606 if (rv == -1) { 2607 error = EFAULT; 2608 break; 2609 } 2610 if (oldstate == state) 2611 break; 2612 state = oldstate; 2613 error = umtxq_check_susp(td); 2614 } 2615 } 2616 2617 umtxq_unbusy_unlocked(&uq->uq_key); 2618 if (error != 0) 2619 break; 2620 } 2621 umtx_key_release(&uq->uq_key); 2622 if (error == ERESTART) 2623 error = EINTR; 2624 return (error); 2625 } 2626 2627 static int 2628 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2629 { 2630 struct abs_timeout timo; 2631 struct umtx_q *uq; 2632 uint32_t flags; 2633 int32_t state, oldstate; 2634 int32_t blocked_writers; 2635 int32_t blocked_readers; 2636 int error, rv; 2637 2638 uq = td->td_umtxq; 2639 error = fueword32(&rwlock->rw_flags, &flags); 2640 if (error == -1) 2641 return (EFAULT); 2642 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2643 if (error != 0) 2644 return (error); 2645 2646 if (timeout != NULL) 2647 abs_timeout_init2(&timo, timeout); 2648 2649 blocked_readers = 0; 2650 for (;;) { 2651 rv = fueword32(&rwlock->rw_state, &state); 2652 if (rv == -1) { 2653 umtx_key_release(&uq->uq_key); 2654 return (EFAULT); 2655 } 2656 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2657 rv = casueword32(&rwlock->rw_state, state, 2658 &oldstate, state | URWLOCK_WRITE_OWNER); 2659 if (rv == -1) { 2660 umtx_key_release(&uq->uq_key); 2661 return (EFAULT); 2662 } 2663 if (oldstate == state) { 2664 umtx_key_release(&uq->uq_key); 2665 return (0); 2666 } 2667 state = oldstate; 2668 error = umtxq_check_susp(td); 2669 if (error != 0) 2670 break; 2671 } 2672 2673 if (error) { 2674 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2675 blocked_readers != 0) { 2676 umtxq_lock(&uq->uq_key); 2677 umtxq_busy(&uq->uq_key); 2678 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2679 umtxq_unbusy(&uq->uq_key); 2680 umtxq_unlock(&uq->uq_key); 2681 } 2682 2683 break; 2684 } 2685 2686 /* grab monitor lock */ 2687 umtxq_lock(&uq->uq_key); 2688 umtxq_busy(&uq->uq_key); 2689 umtxq_unlock(&uq->uq_key); 2690 2691 /* 2692 * re-read the state, in case it changed between the try-lock above 2693 * and the check below 2694 */ 2695 rv = fueword32(&rwlock->rw_state, &state); 2696 if (rv == -1) 2697 error = EFAULT; 2698 2699 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2700 URWLOCK_READER_COUNT(state) != 0) && 2701 (state & URWLOCK_WRITE_WAITERS) == 0) { 2702 rv = casueword32(&rwlock->rw_state, state, 2703 &oldstate, state | URWLOCK_WRITE_WAITERS); 2704 if (rv == -1) { 2705 error = EFAULT; 2706 break; 2707 } 2708 if (oldstate == state) 2709 goto sleep; 2710 state = oldstate; 2711 error = umtxq_check_susp(td); 2712 if (error != 0) 2713 break; 2714 } 2715 if (error != 0) { 2716 umtxq_unbusy_unlocked(&uq->uq_key); 2717 break; 2718 } 2719 2720 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2721 umtxq_unbusy_unlocked(&uq->uq_key); 2722 error = umtxq_check_susp(td); 2723 if (error != 0) 2724 break; 2725 continue; 2726 } 2727 sleep: 2728 rv = fueword32(&rwlock->rw_blocked_writers, 2729 &blocked_writers); 2730 if (rv == -1) { 2731 umtxq_unbusy_unlocked(&uq->uq_key); 2732 error = EFAULT; 2733 break; 2734 } 2735 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2736 2737 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2738 umtxq_lock(&uq->uq_key); 2739 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2740 umtxq_unbusy(&uq->uq_key); 2741 2742 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2743 NULL : &timo); 2744 2745 umtxq_busy(&uq->uq_key); 2746 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2747 umtxq_unlock(&uq->uq_key); 2748 if (error) 2749 break; 2750 rv = fueword32(&rwlock->rw_state, &state); 2751 if (rv == -1) { 2752 error = EFAULT; 2753 break; 2754 } 2755 } 2756 2757 rv = fueword32(&rwlock->rw_blocked_writers, 2758 &blocked_writers); 2759 if (rv == -1) { 2760 umtxq_unbusy_unlocked(&uq->uq_key); 2761 error = EFAULT; 2762 break; 2763 } 2764 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2765 if (blocked_writers == 1) { 2766 rv = fueword32(&rwlock->rw_state, &state); 2767 if (rv == -1) { 2768 umtxq_unbusy_unlocked(&uq->uq_key); 2769 error = EFAULT; 2770 break; 2771 } 2772 for (;;) { 2773 rv = casueword32(&rwlock->rw_state, state, 2774 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2775 if (rv == -1) { 2776 error = EFAULT; 2777 break; 2778 } 2779 if (oldstate == state) 2780 break; 2781 state = oldstate; 2782 error = umtxq_check_susp(td); 2783 /* 2784 * We are leaving the URWLOCK_WRITE_WAITERS 2785 * behind, but this should not harm the 2786 * correctness. 2787 */ 2788 if (error != 0) 2789 break; 2790 } 2791 rv = fueword32(&rwlock->rw_blocked_readers, 2792 &blocked_readers); 2793 if (rv == -1) { 2794 umtxq_unbusy_unlocked(&uq->uq_key); 2795 error = EFAULT; 2796 break; 2797 } 2798 } else 2799 blocked_readers = 0; 2800 2801 umtxq_unbusy_unlocked(&uq->uq_key); 2802 } 2803 2804 umtx_key_release(&uq->uq_key); 2805 if (error == ERESTART) 2806 error = EINTR; 2807 return (error); 2808 } 2809 2810 static int 2811 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2812 { 2813 struct umtx_q *uq; 2814 uint32_t flags; 2815 int32_t state, oldstate; 2816 int error, rv, q, count; 2817 2818 uq = td->td_umtxq; 2819 error = fueword32(&rwlock->rw_flags, &flags); 2820 if (error == -1) 2821 return (EFAULT); 2822 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2823 if (error != 0) 2824 return (error); 2825 2826 error = fueword32(&rwlock->rw_state, &state); 2827 if (error == -1) { 2828 error = EFAULT; 2829 goto out; 2830 } 2831 if (state & URWLOCK_WRITE_OWNER) { 2832 for (;;) { 2833 rv = casueword32(&rwlock->rw_state, state, 2834 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2835 if (rv == -1) { 2836 error = EFAULT; 2837 goto out; 2838 } 2839 if (oldstate != state) { 2840 state = oldstate; 2841 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2842 error = EPERM; 2843 goto out; 2844 } 2845 error = umtxq_check_susp(td); 2846 if (error != 0) 2847 goto out; 2848 } else 2849 break; 2850 } 2851 } else if (URWLOCK_READER_COUNT(state) != 0) { 2852 for (;;) { 2853 rv = casueword32(&rwlock->rw_state, state, 2854 &oldstate, state - 1); 2855 if (rv == -1) { 2856 error = EFAULT; 2857 goto out; 2858 } 2859 if (oldstate != state) { 2860 state = oldstate; 2861 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2862 error = EPERM; 2863 goto out; 2864 } 2865 error = umtxq_check_susp(td); 2866 if (error != 0) 2867 goto out; 2868 } else 2869 break; 2870 } 2871 } else { 2872 error = EPERM; 2873 goto out; 2874 } 2875 2876 count = 0; 2877 2878 if (!(flags & URWLOCK_PREFER_READER)) { 2879 if (state & URWLOCK_WRITE_WAITERS) { 2880 count = 1; 2881 q = UMTX_EXCLUSIVE_QUEUE; 2882 } else if (state & URWLOCK_READ_WAITERS) { 2883 count = INT_MAX; 2884 q = UMTX_SHARED_QUEUE; 2885 } 2886 } else { 2887 if (state & URWLOCK_READ_WAITERS) { 2888 count = INT_MAX; 2889 q = UMTX_SHARED_QUEUE; 2890 } else if (state & URWLOCK_WRITE_WAITERS) { 2891 count = 1; 2892 q = UMTX_EXCLUSIVE_QUEUE; 2893 } 2894 } 2895 2896 if (count) { 2897 umtxq_lock(&uq->uq_key); 2898 umtxq_busy(&uq->uq_key); 2899 umtxq_signal_queue(&uq->uq_key, count, q); 2900 umtxq_unbusy(&uq->uq_key); 2901 umtxq_unlock(&uq->uq_key); 2902 } 2903 out: 2904 umtx_key_release(&uq->uq_key); 2905 return (error); 2906 } 2907 2908 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 2909 static int 2910 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 2911 { 2912 struct abs_timeout timo; 2913 struct umtx_q *uq; 2914 uint32_t flags, count, count1; 2915 int error, rv; 2916 2917 uq = td->td_umtxq; 2918 error = fueword32(&sem->_flags, &flags); 2919 if (error == -1) 2920 return (EFAULT); 2921 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2922 if (error != 0) 2923 return (error); 2924 2925 if (timeout != NULL) 2926 abs_timeout_init2(&timo, timeout); 2927 2928 umtxq_lock(&uq->uq_key); 2929 umtxq_busy(&uq->uq_key); 2930 umtxq_insert(uq); 2931 umtxq_unlock(&uq->uq_key); 2932 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 2933 if (rv == 0) 2934 rv = fueword32(&sem->_count, &count); 2935 if (rv == -1 || count != 0) { 2936 umtxq_lock(&uq->uq_key); 2937 umtxq_unbusy(&uq->uq_key); 2938 umtxq_remove(uq); 2939 umtxq_unlock(&uq->uq_key); 2940 umtx_key_release(&uq->uq_key); 2941 return (rv == -1 ? EFAULT : 0); 2942 } 2943 umtxq_lock(&uq->uq_key); 2944 umtxq_unbusy(&uq->uq_key); 2945 2946 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2947 2948 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2949 error = 0; 2950 else { 2951 umtxq_remove(uq); 2952 /* A relative timeout cannot be restarted. */ 2953 if (error == ERESTART && timeout != NULL && 2954 (timeout->_flags & UMTX_ABSTIME) == 0) 2955 error = EINTR; 2956 } 2957 umtxq_unlock(&uq->uq_key); 2958 umtx_key_release(&uq->uq_key); 2959 return (error); 2960 } 2961 2962 /* 2963 * Signal a userland semaphore. 2964 */ 2965 static int 2966 do_sem_wake(struct thread *td, struct _usem *sem) 2967 { 2968 struct umtx_key key; 2969 int error, cnt; 2970 uint32_t flags; 2971 2972 error = fueword32(&sem->_flags, &flags); 2973 if (error == -1) 2974 return (EFAULT); 2975 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2976 return (error); 2977 umtxq_lock(&key); 2978 umtxq_busy(&key); 2979 cnt = umtxq_count(&key); 2980 if (cnt > 0) { 2981 umtxq_signal(&key, 1); 2982 /* 2983 * Check if count is greater than 0, this means the memory is 2984 * still being referenced by user code, so we can safely 2985 * update _has_waiters flag. 2986 */ 2987 if (cnt == 1) { 2988 umtxq_unlock(&key); 2989 error = suword32(&sem->_has_waiters, 0); 2990 umtxq_lock(&key); 2991 if (error == -1) 2992 error = EFAULT; 2993 } 2994 } 2995 umtxq_unbusy(&key); 2996 umtxq_unlock(&key); 2997 umtx_key_release(&key); 2998 return (error); 2999 } 3000 #endif 3001 3002 static int 3003 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3004 { 3005 struct abs_timeout timo; 3006 struct umtx_q *uq; 3007 uint32_t count, flags; 3008 int error, rv; 3009 3010 uq = td->td_umtxq; 3011 flags = fuword32(&sem->_flags); 3012 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3013 if (error != 0) 3014 return (error); 3015 3016 if (timeout != NULL) 3017 abs_timeout_init2(&timo, timeout); 3018 3019 umtxq_lock(&uq->uq_key); 3020 umtxq_busy(&uq->uq_key); 3021 umtxq_insert(uq); 3022 umtxq_unlock(&uq->uq_key); 3023 rv = fueword32(&sem->_count, &count); 3024 if (rv == -1) { 3025 umtxq_lock(&uq->uq_key); 3026 umtxq_unbusy(&uq->uq_key); 3027 umtxq_remove(uq); 3028 umtxq_unlock(&uq->uq_key); 3029 umtx_key_release(&uq->uq_key); 3030 return (EFAULT); 3031 } 3032 for (;;) { 3033 if (USEM_COUNT(count) != 0) { 3034 umtxq_lock(&uq->uq_key); 3035 umtxq_unbusy(&uq->uq_key); 3036 umtxq_remove(uq); 3037 umtxq_unlock(&uq->uq_key); 3038 umtx_key_release(&uq->uq_key); 3039 return (0); 3040 } 3041 if (count == USEM_HAS_WAITERS) 3042 break; 3043 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3044 if (rv == -1) { 3045 umtxq_lock(&uq->uq_key); 3046 umtxq_unbusy(&uq->uq_key); 3047 umtxq_remove(uq); 3048 umtxq_unlock(&uq->uq_key); 3049 umtx_key_release(&uq->uq_key); 3050 return (EFAULT); 3051 } 3052 if (count == 0) 3053 break; 3054 } 3055 umtxq_lock(&uq->uq_key); 3056 umtxq_unbusy(&uq->uq_key); 3057 3058 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3059 3060 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3061 error = 0; 3062 else { 3063 umtxq_remove(uq); 3064 /* A relative timeout cannot be restarted. */ 3065 if (error == ERESTART && timeout != NULL && 3066 (timeout->_flags & UMTX_ABSTIME) == 0) 3067 error = EINTR; 3068 } 3069 umtxq_unlock(&uq->uq_key); 3070 umtx_key_release(&uq->uq_key); 3071 return (error); 3072 } 3073 3074 /* 3075 * Signal a userland semaphore. 3076 */ 3077 static int 3078 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3079 { 3080 struct umtx_key key; 3081 int error, cnt, rv; 3082 uint32_t count, flags; 3083 3084 rv = fueword32(&sem->_flags, &flags); 3085 if (rv == -1) 3086 return (EFAULT); 3087 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3088 return (error); 3089 umtxq_lock(&key); 3090 umtxq_busy(&key); 3091 cnt = umtxq_count(&key); 3092 if (cnt > 0) { 3093 umtxq_signal(&key, 1); 3094 3095 /* 3096 * If this was the last sleeping thread, clear the waiters 3097 * flag in _count. 3098 */ 3099 if (cnt == 1) { 3100 umtxq_unlock(&key); 3101 rv = fueword32(&sem->_count, &count); 3102 while (rv != -1 && count & USEM_HAS_WAITERS) 3103 rv = casueword32(&sem->_count, count, &count, 3104 count & ~USEM_HAS_WAITERS); 3105 if (rv == -1) 3106 error = EFAULT; 3107 umtxq_lock(&key); 3108 } 3109 } 3110 umtxq_unbusy(&key); 3111 umtxq_unlock(&key); 3112 umtx_key_release(&key); 3113 return (error); 3114 } 3115 3116 inline int 3117 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3118 { 3119 int error; 3120 3121 error = copyin(addr, tsp, sizeof(struct timespec)); 3122 if (error == 0) { 3123 if (tsp->tv_sec < 0 || 3124 tsp->tv_nsec >= 1000000000 || 3125 tsp->tv_nsec < 0) 3126 error = EINVAL; 3127 } 3128 return (error); 3129 } 3130 3131 static inline int 3132 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3133 { 3134 int error; 3135 3136 if (size <= sizeof(struct timespec)) { 3137 tp->_clockid = CLOCK_REALTIME; 3138 tp->_flags = 0; 3139 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3140 } else 3141 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3142 if (error != 0) 3143 return (error); 3144 if (tp->_timeout.tv_sec < 0 || 3145 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3146 return (EINVAL); 3147 return (0); 3148 } 3149 3150 static int 3151 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3152 { 3153 3154 return (EOPNOTSUPP); 3155 } 3156 3157 static int 3158 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3159 { 3160 struct _umtx_time timeout, *tm_p; 3161 int error; 3162 3163 if (uap->uaddr2 == NULL) 3164 tm_p = NULL; 3165 else { 3166 error = umtx_copyin_umtx_time( 3167 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3168 if (error != 0) 3169 return (error); 3170 tm_p = &timeout; 3171 } 3172 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3173 } 3174 3175 static int 3176 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3177 { 3178 struct _umtx_time timeout, *tm_p; 3179 int error; 3180 3181 if (uap->uaddr2 == NULL) 3182 tm_p = NULL; 3183 else { 3184 error = umtx_copyin_umtx_time( 3185 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3186 if (error != 0) 3187 return (error); 3188 tm_p = &timeout; 3189 } 3190 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3191 } 3192 3193 static int 3194 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3195 { 3196 struct _umtx_time *tm_p, timeout; 3197 int error; 3198 3199 if (uap->uaddr2 == NULL) 3200 tm_p = NULL; 3201 else { 3202 error = umtx_copyin_umtx_time( 3203 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3204 if (error != 0) 3205 return (error); 3206 tm_p = &timeout; 3207 } 3208 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3209 } 3210 3211 static int 3212 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3213 { 3214 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3215 } 3216 3217 #define BATCH_SIZE 128 3218 static int 3219 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3220 { 3221 int count = uap->val; 3222 void *uaddrs[BATCH_SIZE]; 3223 char **upp = (char **)uap->obj; 3224 int tocopy; 3225 int error = 0; 3226 int i, pos = 0; 3227 3228 while (count > 0) { 3229 tocopy = count; 3230 if (tocopy > BATCH_SIZE) 3231 tocopy = BATCH_SIZE; 3232 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3233 if (error != 0) 3234 break; 3235 for (i = 0; i < tocopy; ++i) 3236 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3237 count -= tocopy; 3238 pos += tocopy; 3239 } 3240 return (error); 3241 } 3242 3243 static int 3244 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3245 { 3246 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3247 } 3248 3249 static int 3250 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3251 { 3252 struct _umtx_time *tm_p, timeout; 3253 int error; 3254 3255 /* Allow a null timespec (wait forever). */ 3256 if (uap->uaddr2 == NULL) 3257 tm_p = NULL; 3258 else { 3259 error = umtx_copyin_umtx_time( 3260 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3261 if (error != 0) 3262 return (error); 3263 tm_p = &timeout; 3264 } 3265 return do_lock_umutex(td, uap->obj, tm_p, 0); 3266 } 3267 3268 static int 3269 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3270 { 3271 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3272 } 3273 3274 static int 3275 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3276 { 3277 struct _umtx_time *tm_p, timeout; 3278 int error; 3279 3280 /* Allow a null timespec (wait forever). */ 3281 if (uap->uaddr2 == NULL) 3282 tm_p = NULL; 3283 else { 3284 error = umtx_copyin_umtx_time( 3285 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3286 if (error != 0) 3287 return (error); 3288 tm_p = &timeout; 3289 } 3290 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3291 } 3292 3293 static int 3294 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3295 { 3296 return do_wake_umutex(td, uap->obj); 3297 } 3298 3299 static int 3300 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3301 { 3302 return do_unlock_umutex(td, uap->obj); 3303 } 3304 3305 static int 3306 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3307 { 3308 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3309 } 3310 3311 static int 3312 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3313 { 3314 struct timespec *ts, timeout; 3315 int error; 3316 3317 /* Allow a null timespec (wait forever). */ 3318 if (uap->uaddr2 == NULL) 3319 ts = NULL; 3320 else { 3321 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3322 if (error != 0) 3323 return (error); 3324 ts = &timeout; 3325 } 3326 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3327 } 3328 3329 static int 3330 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3331 { 3332 return do_cv_signal(td, uap->obj); 3333 } 3334 3335 static int 3336 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3337 { 3338 return do_cv_broadcast(td, uap->obj); 3339 } 3340 3341 static int 3342 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3343 { 3344 struct _umtx_time timeout; 3345 int error; 3346 3347 /* Allow a null timespec (wait forever). */ 3348 if (uap->uaddr2 == NULL) { 3349 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3350 } else { 3351 error = umtx_copyin_umtx_time(uap->uaddr2, 3352 (size_t)uap->uaddr1, &timeout); 3353 if (error != 0) 3354 return (error); 3355 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3356 } 3357 return (error); 3358 } 3359 3360 static int 3361 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3362 { 3363 struct _umtx_time timeout; 3364 int error; 3365 3366 /* Allow a null timespec (wait forever). */ 3367 if (uap->uaddr2 == NULL) { 3368 error = do_rw_wrlock(td, uap->obj, 0); 3369 } else { 3370 error = umtx_copyin_umtx_time(uap->uaddr2, 3371 (size_t)uap->uaddr1, &timeout); 3372 if (error != 0) 3373 return (error); 3374 3375 error = do_rw_wrlock(td, uap->obj, &timeout); 3376 } 3377 return (error); 3378 } 3379 3380 static int 3381 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3382 { 3383 return do_rw_unlock(td, uap->obj); 3384 } 3385 3386 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3387 static int 3388 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3389 { 3390 struct _umtx_time *tm_p, timeout; 3391 int error; 3392 3393 /* Allow a null timespec (wait forever). */ 3394 if (uap->uaddr2 == NULL) 3395 tm_p = NULL; 3396 else { 3397 error = umtx_copyin_umtx_time( 3398 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3399 if (error != 0) 3400 return (error); 3401 tm_p = &timeout; 3402 } 3403 return (do_sem_wait(td, uap->obj, tm_p)); 3404 } 3405 3406 static int 3407 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3408 { 3409 return do_sem_wake(td, uap->obj); 3410 } 3411 #endif 3412 3413 static int 3414 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3415 { 3416 return do_wake2_umutex(td, uap->obj, uap->val); 3417 } 3418 3419 static int 3420 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3421 { 3422 struct _umtx_time *tm_p, timeout; 3423 int error; 3424 3425 /* Allow a null timespec (wait forever). */ 3426 if (uap->uaddr2 == NULL) 3427 tm_p = NULL; 3428 else { 3429 error = umtx_copyin_umtx_time( 3430 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3431 if (error != 0) 3432 return (error); 3433 tm_p = &timeout; 3434 } 3435 return (do_sem2_wait(td, uap->obj, tm_p)); 3436 } 3437 3438 static int 3439 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3440 { 3441 return do_sem2_wake(td, uap->obj); 3442 } 3443 3444 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3445 3446 static _umtx_op_func op_table[] = { 3447 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3448 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3449 __umtx_op_wait, /* UMTX_OP_WAIT */ 3450 __umtx_op_wake, /* UMTX_OP_WAKE */ 3451 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3452 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3453 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3454 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3455 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3456 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3457 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3458 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3459 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3460 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3461 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3462 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3463 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3464 __umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */ 3465 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3466 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3467 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3468 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3469 #else 3470 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3471 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3472 #endif 3473 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3474 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3475 __umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */ 3476 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3477 }; 3478 3479 int 3480 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3481 { 3482 if ((unsigned)uap->op < UMTX_OP_MAX) 3483 return (*op_table[uap->op])(td, uap); 3484 return (EINVAL); 3485 } 3486 3487 #ifdef COMPAT_FREEBSD32 3488 3489 struct timespec32 { 3490 int32_t tv_sec; 3491 int32_t tv_nsec; 3492 }; 3493 3494 struct umtx_time32 { 3495 struct timespec32 timeout; 3496 uint32_t flags; 3497 uint32_t clockid; 3498 }; 3499 3500 static inline int 3501 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3502 { 3503 struct timespec32 ts32; 3504 int error; 3505 3506 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3507 if (error == 0) { 3508 if (ts32.tv_sec < 0 || 3509 ts32.tv_nsec >= 1000000000 || 3510 ts32.tv_nsec < 0) 3511 error = EINVAL; 3512 else { 3513 tsp->tv_sec = ts32.tv_sec; 3514 tsp->tv_nsec = ts32.tv_nsec; 3515 } 3516 } 3517 return (error); 3518 } 3519 3520 static inline int 3521 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3522 { 3523 struct umtx_time32 t32; 3524 int error; 3525 3526 t32.clockid = CLOCK_REALTIME; 3527 t32.flags = 0; 3528 if (size <= sizeof(struct timespec32)) 3529 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3530 else 3531 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3532 if (error != 0) 3533 return (error); 3534 if (t32.timeout.tv_sec < 0 || 3535 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3536 return (EINVAL); 3537 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3538 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3539 tp->_flags = t32.flags; 3540 tp->_clockid = t32.clockid; 3541 return (0); 3542 } 3543 3544 static int 3545 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3546 { 3547 struct _umtx_time *tm_p, timeout; 3548 int error; 3549 3550 if (uap->uaddr2 == NULL) 3551 tm_p = NULL; 3552 else { 3553 error = umtx_copyin_umtx_time32(uap->uaddr2, 3554 (size_t)uap->uaddr1, &timeout); 3555 if (error != 0) 3556 return (error); 3557 tm_p = &timeout; 3558 } 3559 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3560 } 3561 3562 static int 3563 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3564 { 3565 struct _umtx_time *tm_p, timeout; 3566 int error; 3567 3568 /* Allow a null timespec (wait forever). */ 3569 if (uap->uaddr2 == NULL) 3570 tm_p = NULL; 3571 else { 3572 error = umtx_copyin_umtx_time(uap->uaddr2, 3573 (size_t)uap->uaddr1, &timeout); 3574 if (error != 0) 3575 return (error); 3576 tm_p = &timeout; 3577 } 3578 return do_lock_umutex(td, uap->obj, tm_p, 0); 3579 } 3580 3581 static int 3582 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3583 { 3584 struct _umtx_time *tm_p, timeout; 3585 int error; 3586 3587 /* Allow a null timespec (wait forever). */ 3588 if (uap->uaddr2 == NULL) 3589 tm_p = NULL; 3590 else { 3591 error = umtx_copyin_umtx_time32(uap->uaddr2, 3592 (size_t)uap->uaddr1, &timeout); 3593 if (error != 0) 3594 return (error); 3595 tm_p = &timeout; 3596 } 3597 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3598 } 3599 3600 static int 3601 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3602 { 3603 struct timespec *ts, timeout; 3604 int error; 3605 3606 /* Allow a null timespec (wait forever). */ 3607 if (uap->uaddr2 == NULL) 3608 ts = NULL; 3609 else { 3610 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3611 if (error != 0) 3612 return (error); 3613 ts = &timeout; 3614 } 3615 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3616 } 3617 3618 static int 3619 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3620 { 3621 struct _umtx_time timeout; 3622 int error; 3623 3624 /* Allow a null timespec (wait forever). */ 3625 if (uap->uaddr2 == NULL) { 3626 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3627 } else { 3628 error = umtx_copyin_umtx_time32(uap->uaddr2, 3629 (size_t)uap->uaddr1, &timeout); 3630 if (error != 0) 3631 return (error); 3632 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3633 } 3634 return (error); 3635 } 3636 3637 static int 3638 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3639 { 3640 struct _umtx_time timeout; 3641 int error; 3642 3643 /* Allow a null timespec (wait forever). */ 3644 if (uap->uaddr2 == NULL) { 3645 error = do_rw_wrlock(td, uap->obj, 0); 3646 } else { 3647 error = umtx_copyin_umtx_time32(uap->uaddr2, 3648 (size_t)uap->uaddr1, &timeout); 3649 if (error != 0) 3650 return (error); 3651 error = do_rw_wrlock(td, uap->obj, &timeout); 3652 } 3653 return (error); 3654 } 3655 3656 static int 3657 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3658 { 3659 struct _umtx_time *tm_p, timeout; 3660 int error; 3661 3662 if (uap->uaddr2 == NULL) 3663 tm_p = NULL; 3664 else { 3665 error = umtx_copyin_umtx_time32( 3666 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3667 if (error != 0) 3668 return (error); 3669 tm_p = &timeout; 3670 } 3671 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3672 } 3673 3674 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3675 static int 3676 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3677 { 3678 struct _umtx_time *tm_p, timeout; 3679 int error; 3680 3681 /* Allow a null timespec (wait forever). */ 3682 if (uap->uaddr2 == NULL) 3683 tm_p = NULL; 3684 else { 3685 error = umtx_copyin_umtx_time32(uap->uaddr2, 3686 (size_t)uap->uaddr1, &timeout); 3687 if (error != 0) 3688 return (error); 3689 tm_p = &timeout; 3690 } 3691 return (do_sem_wait(td, uap->obj, tm_p)); 3692 } 3693 #endif 3694 3695 static int 3696 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3697 { 3698 struct _umtx_time *tm_p, timeout; 3699 int error; 3700 3701 /* Allow a null timespec (wait forever). */ 3702 if (uap->uaddr2 == NULL) 3703 tm_p = NULL; 3704 else { 3705 error = umtx_copyin_umtx_time32(uap->uaddr2, 3706 (size_t)uap->uaddr1, &timeout); 3707 if (error != 0) 3708 return (error); 3709 tm_p = &timeout; 3710 } 3711 return (do_sem2_wait(td, uap->obj, tm_p)); 3712 } 3713 3714 static int 3715 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3716 { 3717 int count = uap->val; 3718 uint32_t uaddrs[BATCH_SIZE]; 3719 uint32_t **upp = (uint32_t **)uap->obj; 3720 int tocopy; 3721 int error = 0; 3722 int i, pos = 0; 3723 3724 while (count > 0) { 3725 tocopy = count; 3726 if (tocopy > BATCH_SIZE) 3727 tocopy = BATCH_SIZE; 3728 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3729 if (error != 0) 3730 break; 3731 for (i = 0; i < tocopy; ++i) 3732 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3733 INT_MAX, 1); 3734 count -= tocopy; 3735 pos += tocopy; 3736 } 3737 return (error); 3738 } 3739 3740 static _umtx_op_func op_table_compat32[] = { 3741 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3742 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3743 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3744 __umtx_op_wake, /* UMTX_OP_WAKE */ 3745 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3746 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3747 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3748 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3749 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3750 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3751 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3752 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3753 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3754 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3755 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3756 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3757 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3758 __umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */ 3759 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3760 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3761 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3762 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3763 #else 3764 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3765 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3766 #endif 3767 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3768 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3769 __umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */ 3770 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3771 }; 3772 3773 int 3774 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3775 { 3776 if ((unsigned)uap->op < UMTX_OP_MAX) 3777 return (*op_table_compat32[uap->op])(td, 3778 (struct _umtx_op_args *)uap); 3779 return (EINVAL); 3780 } 3781 #endif 3782 3783 void 3784 umtx_thread_init(struct thread *td) 3785 { 3786 td->td_umtxq = umtxq_alloc(); 3787 td->td_umtxq->uq_thread = td; 3788 } 3789 3790 void 3791 umtx_thread_fini(struct thread *td) 3792 { 3793 umtxq_free(td->td_umtxq); 3794 } 3795 3796 /* 3797 * It will be called when new thread is created, e.g fork(). 3798 */ 3799 void 3800 umtx_thread_alloc(struct thread *td) 3801 { 3802 struct umtx_q *uq; 3803 3804 uq = td->td_umtxq; 3805 uq->uq_inherited_pri = PRI_MAX; 3806 3807 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3808 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3809 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3810 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3811 } 3812 3813 /* 3814 * exec() hook. 3815 */ 3816 static void 3817 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3818 struct image_params *imgp __unused) 3819 { 3820 umtx_thread_cleanup(curthread); 3821 } 3822 3823 /* 3824 * thread_exit() hook. 3825 */ 3826 void 3827 umtx_thread_exit(struct thread *td) 3828 { 3829 umtx_thread_cleanup(td); 3830 } 3831 3832 /* 3833 * clean up umtx data. 3834 */ 3835 static void 3836 umtx_thread_cleanup(struct thread *td) 3837 { 3838 struct umtx_q *uq; 3839 struct umtx_pi *pi; 3840 3841 if ((uq = td->td_umtxq) == NULL) 3842 return; 3843 3844 mtx_lock(&umtx_lock); 3845 uq->uq_inherited_pri = PRI_MAX; 3846 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3847 pi->pi_owner = NULL; 3848 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3849 } 3850 mtx_unlock(&umtx_lock); 3851 thread_lock(td); 3852 sched_lend_user_prio(td, PRI_MAX); 3853 thread_unlock(td); 3854 } 3855