1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 173 /* 174 * Don't propagate time-sharing priority, there is a security reason, 175 * a user can simply introduce PI-mutex, let thread A lock the mutex, 176 * and let another thread B block on the mutex, because B is 177 * sleeping, its priority will be boosted, this causes A's priority to 178 * be boosted via priority propagating too and will never be lowered even 179 * if it is using 100%CPU, this is unfair to other processes. 180 */ 181 182 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 183 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 184 PRI_MAX_TIMESHARE : (td)->td_user_pri) 185 186 #define GOLDEN_RATIO_PRIME 2654404609U 187 #define UMTX_CHAINS 512 188 #define UMTX_SHIFTS (__WORD_BIT - 9) 189 190 #define GET_SHARE(flags) \ 191 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 192 193 #define BUSY_SPINS 200 194 195 struct abs_timeout { 196 int clockid; 197 struct timespec cur; 198 struct timespec end; 199 }; 200 201 static uma_zone_t umtx_pi_zone; 202 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 203 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 204 static int umtx_pi_allocated; 205 206 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 207 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 208 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 209 210 #ifdef UMTX_PROFILING 211 static long max_length; 212 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 213 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 214 #endif 215 216 static void umtxq_sysinit(void *); 217 static void umtxq_hash(struct umtx_key *key); 218 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 219 static void umtxq_lock(struct umtx_key *key); 220 static void umtxq_unlock(struct umtx_key *key); 221 static void umtxq_busy(struct umtx_key *key); 222 static void umtxq_unbusy(struct umtx_key *key); 223 static void umtxq_insert_queue(struct umtx_q *uq, int q); 224 static void umtxq_remove_queue(struct umtx_q *uq, int q); 225 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 226 static int umtxq_count(struct umtx_key *key); 227 static struct umtx_pi *umtx_pi_alloc(int); 228 static void umtx_pi_free(struct umtx_pi *pi); 229 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 230 static void umtx_thread_cleanup(struct thread *td); 231 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 232 struct image_params *imgp __unused); 233 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 234 235 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 236 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 237 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 238 239 static struct mtx umtx_lock; 240 241 #ifdef UMTX_PROFILING 242 static void 243 umtx_init_profiling(void) 244 { 245 struct sysctl_oid *chain_oid; 246 char chain_name[10]; 247 int i; 248 249 for (i = 0; i < UMTX_CHAINS; ++i) { 250 snprintf(chain_name, sizeof(chain_name), "%d", i); 251 chain_oid = SYSCTL_ADD_NODE(NULL, 252 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 253 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 254 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 255 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 256 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 257 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 258 } 259 } 260 261 static int 262 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 263 { 264 char buf[512]; 265 struct sbuf sb; 266 struct umtxq_chain *uc; 267 u_int fract, i, j, tot, whole; 268 u_int sf0, sf1, sf2, sf3, sf4; 269 u_int si0, si1, si2, si3, si4; 270 u_int sw0, sw1, sw2, sw3, sw4; 271 272 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 273 for (i = 0; i < 2; i++) { 274 tot = 0; 275 for (j = 0; j < UMTX_CHAINS; ++j) { 276 uc = &umtxq_chains[i][j]; 277 mtx_lock(&uc->uc_lock); 278 tot += uc->max_length; 279 mtx_unlock(&uc->uc_lock); 280 } 281 if (tot == 0) 282 sbuf_printf(&sb, "%u) Empty ", i); 283 else { 284 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 285 si0 = si1 = si2 = si3 = si4 = 0; 286 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 287 for (j = 0; j < UMTX_CHAINS; j++) { 288 uc = &umtxq_chains[i][j]; 289 mtx_lock(&uc->uc_lock); 290 whole = uc->max_length * 100; 291 mtx_unlock(&uc->uc_lock); 292 fract = (whole % tot) * 100; 293 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 294 sf0 = fract; 295 si0 = j; 296 sw0 = whole; 297 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 298 sf1)) { 299 sf1 = fract; 300 si1 = j; 301 sw1 = whole; 302 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 303 sf2)) { 304 sf2 = fract; 305 si2 = j; 306 sw2 = whole; 307 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 308 sf3)) { 309 sf3 = fract; 310 si3 = j; 311 sw3 = whole; 312 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 313 sf4)) { 314 sf4 = fract; 315 si4 = j; 316 sw4 = whole; 317 } 318 } 319 sbuf_printf(&sb, "queue %u:\n", i); 320 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 321 sf0 / tot, si0); 322 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 323 sf1 / tot, si1); 324 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 325 sf2 / tot, si2); 326 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 327 sf3 / tot, si3); 328 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 329 sf4 / tot, si4); 330 } 331 } 332 sbuf_trim(&sb); 333 sbuf_finish(&sb); 334 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 335 sbuf_delete(&sb); 336 return (0); 337 } 338 339 static int 340 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 341 { 342 struct umtxq_chain *uc; 343 u_int i, j; 344 int clear, error; 345 346 clear = 0; 347 error = sysctl_handle_int(oidp, &clear, 0, req); 348 if (error != 0 || req->newptr == NULL) 349 return (error); 350 351 if (clear != 0) { 352 for (i = 0; i < 2; ++i) { 353 for (j = 0; j < UMTX_CHAINS; ++j) { 354 uc = &umtxq_chains[i][j]; 355 mtx_lock(&uc->uc_lock); 356 uc->length = 0; 357 uc->max_length = 0; 358 mtx_unlock(&uc->uc_lock); 359 } 360 } 361 } 362 return (0); 363 } 364 365 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 366 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 367 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 368 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 369 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 370 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 371 #endif 372 373 static void 374 umtxq_sysinit(void *arg __unused) 375 { 376 int i, j; 377 378 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 379 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 380 for (i = 0; i < 2; ++i) { 381 for (j = 0; j < UMTX_CHAINS; ++j) { 382 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 383 MTX_DEF | MTX_DUPOK); 384 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 386 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 387 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 388 umtxq_chains[i][j].uc_busy = 0; 389 umtxq_chains[i][j].uc_waiters = 0; 390 #ifdef UMTX_PROFILING 391 umtxq_chains[i][j].length = 0; 392 umtxq_chains[i][j].max_length = 0; 393 #endif 394 } 395 } 396 #ifdef UMTX_PROFILING 397 umtx_init_profiling(); 398 #endif 399 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 400 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 401 EVENTHANDLER_PRI_ANY); 402 } 403 404 struct umtx_q * 405 umtxq_alloc(void) 406 { 407 struct umtx_q *uq; 408 409 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 410 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 411 TAILQ_INIT(&uq->uq_spare_queue->head); 412 TAILQ_INIT(&uq->uq_pi_contested); 413 uq->uq_inherited_pri = PRI_MAX; 414 return (uq); 415 } 416 417 void 418 umtxq_free(struct umtx_q *uq) 419 { 420 MPASS(uq->uq_spare_queue != NULL); 421 free(uq->uq_spare_queue, M_UMTX); 422 free(uq, M_UMTX); 423 } 424 425 static inline void 426 umtxq_hash(struct umtx_key *key) 427 { 428 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 429 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 430 } 431 432 static inline struct umtxq_chain * 433 umtxq_getchain(struct umtx_key *key) 434 { 435 if (key->type <= TYPE_SEM) 436 return (&umtxq_chains[1][key->hash]); 437 return (&umtxq_chains[0][key->hash]); 438 } 439 440 /* 441 * Lock a chain. 442 */ 443 static inline void 444 umtxq_lock(struct umtx_key *key) 445 { 446 struct umtxq_chain *uc; 447 448 uc = umtxq_getchain(key); 449 mtx_lock(&uc->uc_lock); 450 } 451 452 /* 453 * Unlock a chain. 454 */ 455 static inline void 456 umtxq_unlock(struct umtx_key *key) 457 { 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 mtx_unlock(&uc->uc_lock); 462 } 463 464 /* 465 * Set chain to busy state when following operation 466 * may be blocked (kernel mutex can not be used). 467 */ 468 static inline void 469 umtxq_busy(struct umtx_key *key) 470 { 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(key); 474 mtx_assert(&uc->uc_lock, MA_OWNED); 475 if (uc->uc_busy) { 476 #ifdef SMP 477 if (smp_cpus > 1) { 478 int count = BUSY_SPINS; 479 if (count > 0) { 480 umtxq_unlock(key); 481 while (uc->uc_busy && --count > 0) 482 cpu_spinwait(); 483 umtxq_lock(key); 484 } 485 } 486 #endif 487 while (uc->uc_busy) { 488 uc->uc_waiters++; 489 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 490 uc->uc_waiters--; 491 } 492 } 493 uc->uc_busy = 1; 494 } 495 496 /* 497 * Unbusy a chain. 498 */ 499 static inline void 500 umtxq_unbusy(struct umtx_key *key) 501 { 502 struct umtxq_chain *uc; 503 504 uc = umtxq_getchain(key); 505 mtx_assert(&uc->uc_lock, MA_OWNED); 506 KASSERT(uc->uc_busy != 0, ("not busy")); 507 uc->uc_busy = 0; 508 if (uc->uc_waiters) 509 wakeup_one(uc); 510 } 511 512 static inline void 513 umtxq_unbusy_unlocked(struct umtx_key *key) 514 { 515 516 umtxq_lock(key); 517 umtxq_unbusy(key); 518 umtxq_unlock(key); 519 } 520 521 static struct umtxq_queue * 522 umtxq_queue_lookup(struct umtx_key *key, int q) 523 { 524 struct umtxq_queue *uh; 525 struct umtxq_chain *uc; 526 527 uc = umtxq_getchain(key); 528 UMTXQ_LOCKED_ASSERT(uc); 529 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 530 if (umtx_key_match(&uh->key, key)) 531 return (uh); 532 } 533 534 return (NULL); 535 } 536 537 static inline void 538 umtxq_insert_queue(struct umtx_q *uq, int q) 539 { 540 struct umtxq_queue *uh; 541 struct umtxq_chain *uc; 542 543 uc = umtxq_getchain(&uq->uq_key); 544 UMTXQ_LOCKED_ASSERT(uc); 545 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 546 uh = umtxq_queue_lookup(&uq->uq_key, q); 547 if (uh != NULL) { 548 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 549 } else { 550 uh = uq->uq_spare_queue; 551 uh->key = uq->uq_key; 552 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 553 #ifdef UMTX_PROFILING 554 uc->length++; 555 if (uc->length > uc->max_length) { 556 uc->max_length = uc->length; 557 if (uc->max_length > max_length) 558 max_length = uc->max_length; 559 } 560 #endif 561 } 562 uq->uq_spare_queue = NULL; 563 564 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 565 uh->length++; 566 uq->uq_flags |= UQF_UMTXQ; 567 uq->uq_cur_queue = uh; 568 return; 569 } 570 571 static inline void 572 umtxq_remove_queue(struct umtx_q *uq, int q) 573 { 574 struct umtxq_chain *uc; 575 struct umtxq_queue *uh; 576 577 uc = umtxq_getchain(&uq->uq_key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 if (uq->uq_flags & UQF_UMTXQ) { 580 uh = uq->uq_cur_queue; 581 TAILQ_REMOVE(&uh->head, uq, uq_link); 582 uh->length--; 583 uq->uq_flags &= ~UQF_UMTXQ; 584 if (TAILQ_EMPTY(&uh->head)) { 585 KASSERT(uh->length == 0, 586 ("inconsistent umtxq_queue length")); 587 #ifdef UMTX_PROFILING 588 uc->length--; 589 #endif 590 LIST_REMOVE(uh, link); 591 } else { 592 uh = LIST_FIRST(&uc->uc_spare_queue); 593 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 594 LIST_REMOVE(uh, link); 595 } 596 uq->uq_spare_queue = uh; 597 uq->uq_cur_queue = NULL; 598 } 599 } 600 601 /* 602 * Check if there are multiple waiters 603 */ 604 static int 605 umtxq_count(struct umtx_key *key) 606 { 607 struct umtxq_chain *uc; 608 struct umtxq_queue *uh; 609 610 uc = umtxq_getchain(key); 611 UMTXQ_LOCKED_ASSERT(uc); 612 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 613 if (uh != NULL) 614 return (uh->length); 615 return (0); 616 } 617 618 /* 619 * Check if there are multiple PI waiters and returns first 620 * waiter. 621 */ 622 static int 623 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 624 { 625 struct umtxq_chain *uc; 626 struct umtxq_queue *uh; 627 628 *first = NULL; 629 uc = umtxq_getchain(key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 632 if (uh != NULL) { 633 *first = TAILQ_FIRST(&uh->head); 634 return (uh->length); 635 } 636 return (0); 637 } 638 639 static int 640 umtxq_check_susp(struct thread *td) 641 { 642 struct proc *p; 643 int error; 644 645 /* 646 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 647 * eventually break the lockstep loop. 648 */ 649 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 650 return (0); 651 error = 0; 652 p = td->td_proc; 653 PROC_LOCK(p); 654 if (P_SHOULDSTOP(p) || 655 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 656 if (p->p_flag & P_SINGLE_EXIT) 657 error = EINTR; 658 else 659 error = ERESTART; 660 } 661 PROC_UNLOCK(p); 662 return (error); 663 } 664 665 /* 666 * Wake up threads waiting on an userland object. 667 */ 668 669 static int 670 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 671 { 672 struct umtxq_chain *uc; 673 struct umtxq_queue *uh; 674 struct umtx_q *uq; 675 int ret; 676 677 ret = 0; 678 uc = umtxq_getchain(key); 679 UMTXQ_LOCKED_ASSERT(uc); 680 uh = umtxq_queue_lookup(key, q); 681 if (uh != NULL) { 682 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 683 umtxq_remove_queue(uq, q); 684 wakeup(uq); 685 if (++ret >= n_wake) 686 return (ret); 687 } 688 } 689 return (ret); 690 } 691 692 693 /* 694 * Wake up specified thread. 695 */ 696 static inline void 697 umtxq_signal_thread(struct umtx_q *uq) 698 { 699 struct umtxq_chain *uc; 700 701 uc = umtxq_getchain(&uq->uq_key); 702 UMTXQ_LOCKED_ASSERT(uc); 703 umtxq_remove(uq); 704 wakeup(uq); 705 } 706 707 static inline int 708 tstohz(const struct timespec *tsp) 709 { 710 struct timeval tv; 711 712 TIMESPEC_TO_TIMEVAL(&tv, tsp); 713 return tvtohz(&tv); 714 } 715 716 static void 717 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 718 const struct timespec *timeout) 719 { 720 721 timo->clockid = clockid; 722 if (!absolute) { 723 kern_clock_gettime(curthread, clockid, &timo->end); 724 timo->cur = timo->end; 725 timespecadd(&timo->end, timeout); 726 } else { 727 timo->end = *timeout; 728 kern_clock_gettime(curthread, clockid, &timo->cur); 729 } 730 } 731 732 static void 733 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 734 { 735 736 abs_timeout_init(timo, umtxtime->_clockid, 737 (umtxtime->_flags & UMTX_ABSTIME) != 0, 738 &umtxtime->_timeout); 739 } 740 741 static inline void 742 abs_timeout_update(struct abs_timeout *timo) 743 { 744 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 745 } 746 747 static int 748 abs_timeout_gethz(struct abs_timeout *timo) 749 { 750 struct timespec tts; 751 752 if (timespeccmp(&timo->end, &timo->cur, <=)) 753 return (-1); 754 tts = timo->end; 755 timespecsub(&tts, &timo->cur); 756 return (tstohz(&tts)); 757 } 758 759 /* 760 * Put thread into sleep state, before sleeping, check if 761 * thread was removed from umtx queue. 762 */ 763 static inline int 764 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 765 { 766 struct umtxq_chain *uc; 767 int error, timo; 768 769 uc = umtxq_getchain(&uq->uq_key); 770 UMTXQ_LOCKED_ASSERT(uc); 771 for (;;) { 772 if (!(uq->uq_flags & UQF_UMTXQ)) 773 return (0); 774 if (abstime != NULL) { 775 timo = abs_timeout_gethz(abstime); 776 if (timo < 0) 777 return (ETIMEDOUT); 778 } else 779 timo = 0; 780 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 781 if (error != EWOULDBLOCK) { 782 umtxq_lock(&uq->uq_key); 783 break; 784 } 785 if (abstime != NULL) 786 abs_timeout_update(abstime); 787 umtxq_lock(&uq->uq_key); 788 } 789 return (error); 790 } 791 792 /* 793 * Convert userspace address into unique logical address. 794 */ 795 int 796 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 797 { 798 struct thread *td = curthread; 799 vm_map_t map; 800 vm_map_entry_t entry; 801 vm_pindex_t pindex; 802 vm_prot_t prot; 803 boolean_t wired; 804 805 key->type = type; 806 if (share == THREAD_SHARE) { 807 key->shared = 0; 808 key->info.private.vs = td->td_proc->p_vmspace; 809 key->info.private.addr = (uintptr_t)addr; 810 } else { 811 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 812 map = &td->td_proc->p_vmspace->vm_map; 813 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 814 &entry, &key->info.shared.object, &pindex, &prot, 815 &wired) != KERN_SUCCESS) { 816 return EFAULT; 817 } 818 819 if ((share == PROCESS_SHARE) || 820 (share == AUTO_SHARE && 821 VM_INHERIT_SHARE == entry->inheritance)) { 822 key->shared = 1; 823 key->info.shared.offset = (vm_offset_t)addr - 824 entry->start + entry->offset; 825 vm_object_reference(key->info.shared.object); 826 } else { 827 key->shared = 0; 828 key->info.private.vs = td->td_proc->p_vmspace; 829 key->info.private.addr = (uintptr_t)addr; 830 } 831 vm_map_lookup_done(map, entry); 832 } 833 834 umtxq_hash(key); 835 return (0); 836 } 837 838 /* 839 * Release key. 840 */ 841 void 842 umtx_key_release(struct umtx_key *key) 843 { 844 if (key->shared) 845 vm_object_deallocate(key->info.shared.object); 846 } 847 848 /* 849 * Fetch and compare value, sleep on the address if value is not changed. 850 */ 851 static int 852 do_wait(struct thread *td, void *addr, u_long id, 853 struct _umtx_time *timeout, int compat32, int is_private) 854 { 855 struct abs_timeout timo; 856 struct umtx_q *uq; 857 u_long tmp; 858 uint32_t tmp32; 859 int error = 0; 860 861 uq = td->td_umtxq; 862 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 863 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 864 return (error); 865 866 if (timeout != NULL) 867 abs_timeout_init2(&timo, timeout); 868 869 umtxq_lock(&uq->uq_key); 870 umtxq_insert(uq); 871 umtxq_unlock(&uq->uq_key); 872 if (compat32 == 0) { 873 error = fueword(addr, &tmp); 874 if (error != 0) 875 error = EFAULT; 876 } else { 877 error = fueword32(addr, &tmp32); 878 if (error == 0) 879 tmp = tmp32; 880 else 881 error = EFAULT; 882 } 883 umtxq_lock(&uq->uq_key); 884 if (error == 0) { 885 if (tmp == id) 886 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 887 NULL : &timo); 888 if ((uq->uq_flags & UQF_UMTXQ) == 0) 889 error = 0; 890 else 891 umtxq_remove(uq); 892 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 893 umtxq_remove(uq); 894 } 895 umtxq_unlock(&uq->uq_key); 896 umtx_key_release(&uq->uq_key); 897 if (error == ERESTART) 898 error = EINTR; 899 return (error); 900 } 901 902 /* 903 * Wake up threads sleeping on the specified address. 904 */ 905 int 906 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 907 { 908 struct umtx_key key; 909 int ret; 910 911 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 912 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 913 return (ret); 914 umtxq_lock(&key); 915 umtxq_signal(&key, n_wake); 916 umtxq_unlock(&key); 917 umtx_key_release(&key); 918 return (0); 919 } 920 921 /* 922 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 923 */ 924 static int 925 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 926 struct _umtx_time *timeout, int mode) 927 { 928 struct abs_timeout timo; 929 struct umtx_q *uq; 930 uint32_t owner, old, id; 931 int error, rv; 932 933 id = td->td_tid; 934 uq = td->td_umtxq; 935 error = 0; 936 if (timeout != NULL) 937 abs_timeout_init2(&timo, timeout); 938 939 /* 940 * Care must be exercised when dealing with umtx structure. It 941 * can fault on any access. 942 */ 943 for (;;) { 944 rv = fueword32(&m->m_owner, &owner); 945 if (rv == -1) 946 return (EFAULT); 947 if (mode == _UMUTEX_WAIT) { 948 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 949 return (0); 950 } else { 951 /* 952 * Try the uncontested case. This should be done in userland. 953 */ 954 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 955 &owner, id); 956 /* The address was invalid. */ 957 if (rv == -1) 958 return (EFAULT); 959 960 /* The acquire succeeded. */ 961 if (owner == UMUTEX_UNOWNED) 962 return (0); 963 964 /* If no one owns it but it is contested try to acquire it. */ 965 if (owner == UMUTEX_CONTESTED) { 966 rv = casueword32(&m->m_owner, 967 UMUTEX_CONTESTED, &owner, 968 id | UMUTEX_CONTESTED); 969 /* The address was invalid. */ 970 if (rv == -1) 971 return (EFAULT); 972 973 if (owner == UMUTEX_CONTESTED) 974 return (0); 975 976 rv = umtxq_check_susp(td); 977 if (rv != 0) 978 return (rv); 979 980 /* If this failed the lock has changed, restart. */ 981 continue; 982 } 983 } 984 985 if (mode == _UMUTEX_TRY) 986 return (EBUSY); 987 988 /* 989 * If we caught a signal, we have retried and now 990 * exit immediately. 991 */ 992 if (error != 0) 993 return (error); 994 995 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 996 GET_SHARE(flags), &uq->uq_key)) != 0) 997 return (error); 998 999 umtxq_lock(&uq->uq_key); 1000 umtxq_busy(&uq->uq_key); 1001 umtxq_insert(uq); 1002 umtxq_unlock(&uq->uq_key); 1003 1004 /* 1005 * Set the contested bit so that a release in user space 1006 * knows to use the system call for unlock. If this fails 1007 * either some one else has acquired the lock or it has been 1008 * released. 1009 */ 1010 rv = casueword32(&m->m_owner, owner, &old, 1011 owner | UMUTEX_CONTESTED); 1012 1013 /* The address was invalid. */ 1014 if (rv == -1) { 1015 umtxq_lock(&uq->uq_key); 1016 umtxq_remove(uq); 1017 umtxq_unbusy(&uq->uq_key); 1018 umtxq_unlock(&uq->uq_key); 1019 umtx_key_release(&uq->uq_key); 1020 return (EFAULT); 1021 } 1022 1023 /* 1024 * We set the contested bit, sleep. Otherwise the lock changed 1025 * and we need to retry or we lost a race to the thread 1026 * unlocking the umtx. 1027 */ 1028 umtxq_lock(&uq->uq_key); 1029 umtxq_unbusy(&uq->uq_key); 1030 if (old == owner) 1031 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1032 NULL : &timo); 1033 umtxq_remove(uq); 1034 umtxq_unlock(&uq->uq_key); 1035 umtx_key_release(&uq->uq_key); 1036 1037 if (error == 0) 1038 error = umtxq_check_susp(td); 1039 } 1040 1041 return (0); 1042 } 1043 1044 /* 1045 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1046 */ 1047 static int 1048 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1049 { 1050 struct umtx_key key; 1051 uint32_t owner, old, id; 1052 int error; 1053 int count; 1054 1055 id = td->td_tid; 1056 /* 1057 * Make sure we own this mtx. 1058 */ 1059 error = fueword32(&m->m_owner, &owner); 1060 if (error == -1) 1061 return (EFAULT); 1062 1063 if ((owner & ~UMUTEX_CONTESTED) != id) 1064 return (EPERM); 1065 1066 if ((owner & UMUTEX_CONTESTED) == 0) { 1067 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1068 if (error == -1) 1069 return (EFAULT); 1070 if (old == owner) 1071 return (0); 1072 owner = old; 1073 } 1074 1075 /* We should only ever be in here for contested locks */ 1076 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1077 &key)) != 0) 1078 return (error); 1079 1080 umtxq_lock(&key); 1081 umtxq_busy(&key); 1082 count = umtxq_count(&key); 1083 umtxq_unlock(&key); 1084 1085 /* 1086 * When unlocking the umtx, it must be marked as unowned if 1087 * there is zero or one thread only waiting for it. 1088 * Otherwise, it must be marked as contested. 1089 */ 1090 error = casueword32(&m->m_owner, owner, &old, 1091 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1092 umtxq_lock(&key); 1093 umtxq_signal(&key,1); 1094 umtxq_unbusy(&key); 1095 umtxq_unlock(&key); 1096 umtx_key_release(&key); 1097 if (error == -1) 1098 return (EFAULT); 1099 if (old != owner) 1100 return (EINVAL); 1101 return (0); 1102 } 1103 1104 /* 1105 * Check if the mutex is available and wake up a waiter, 1106 * only for simple mutex. 1107 */ 1108 static int 1109 do_wake_umutex(struct thread *td, struct umutex *m) 1110 { 1111 struct umtx_key key; 1112 uint32_t owner; 1113 uint32_t flags; 1114 int error; 1115 int count; 1116 1117 error = fueword32(&m->m_owner, &owner); 1118 if (error == -1) 1119 return (EFAULT); 1120 1121 if ((owner & ~UMUTEX_CONTESTED) != 0) 1122 return (0); 1123 1124 error = fueword32(&m->m_flags, &flags); 1125 if (error == -1) 1126 return (EFAULT); 1127 1128 /* We should only ever be in here for contested locks */ 1129 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1130 &key)) != 0) 1131 return (error); 1132 1133 umtxq_lock(&key); 1134 umtxq_busy(&key); 1135 count = umtxq_count(&key); 1136 umtxq_unlock(&key); 1137 1138 if (count <= 1) { 1139 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1140 UMUTEX_UNOWNED); 1141 if (error == -1) 1142 error = EFAULT; 1143 } 1144 1145 umtxq_lock(&key); 1146 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1147 umtxq_signal(&key, 1); 1148 umtxq_unbusy(&key); 1149 umtxq_unlock(&key); 1150 umtx_key_release(&key); 1151 return (error); 1152 } 1153 1154 /* 1155 * Check if the mutex has waiters and tries to fix contention bit. 1156 */ 1157 static int 1158 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1159 { 1160 struct umtx_key key; 1161 uint32_t owner, old; 1162 int type; 1163 int error; 1164 int count; 1165 1166 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1167 case 0: 1168 type = TYPE_NORMAL_UMUTEX; 1169 break; 1170 case UMUTEX_PRIO_INHERIT: 1171 type = TYPE_PI_UMUTEX; 1172 break; 1173 case UMUTEX_PRIO_PROTECT: 1174 type = TYPE_PP_UMUTEX; 1175 break; 1176 default: 1177 return (EINVAL); 1178 } 1179 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1180 &key)) != 0) 1181 return (error); 1182 1183 owner = 0; 1184 umtxq_lock(&key); 1185 umtxq_busy(&key); 1186 count = umtxq_count(&key); 1187 umtxq_unlock(&key); 1188 /* 1189 * Only repair contention bit if there is a waiter, this means the mutex 1190 * is still being referenced by userland code, otherwise don't update 1191 * any memory. 1192 */ 1193 if (count > 1) { 1194 error = fueword32(&m->m_owner, &owner); 1195 if (error == -1) 1196 error = EFAULT; 1197 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1198 error = casueword32(&m->m_owner, owner, &old, 1199 owner | UMUTEX_CONTESTED); 1200 if (error == -1) { 1201 error = EFAULT; 1202 break; 1203 } 1204 if (old == owner) 1205 break; 1206 owner = old; 1207 error = umtxq_check_susp(td); 1208 if (error != 0) 1209 break; 1210 } 1211 } else if (count == 1) { 1212 error = fueword32(&m->m_owner, &owner); 1213 if (error == -1) 1214 error = EFAULT; 1215 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1216 (owner & UMUTEX_CONTESTED) == 0) { 1217 error = casueword32(&m->m_owner, owner, &old, 1218 owner | UMUTEX_CONTESTED); 1219 if (error == -1) { 1220 error = EFAULT; 1221 break; 1222 } 1223 if (old == owner) 1224 break; 1225 owner = old; 1226 error = umtxq_check_susp(td); 1227 if (error != 0) 1228 break; 1229 } 1230 } 1231 umtxq_lock(&key); 1232 if (error == EFAULT) { 1233 umtxq_signal(&key, INT_MAX); 1234 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1235 umtxq_signal(&key, 1); 1236 umtxq_unbusy(&key); 1237 umtxq_unlock(&key); 1238 umtx_key_release(&key); 1239 return (error); 1240 } 1241 1242 static inline struct umtx_pi * 1243 umtx_pi_alloc(int flags) 1244 { 1245 struct umtx_pi *pi; 1246 1247 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1248 TAILQ_INIT(&pi->pi_blocked); 1249 atomic_add_int(&umtx_pi_allocated, 1); 1250 return (pi); 1251 } 1252 1253 static inline void 1254 umtx_pi_free(struct umtx_pi *pi) 1255 { 1256 uma_zfree(umtx_pi_zone, pi); 1257 atomic_add_int(&umtx_pi_allocated, -1); 1258 } 1259 1260 /* 1261 * Adjust the thread's position on a pi_state after its priority has been 1262 * changed. 1263 */ 1264 static int 1265 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1266 { 1267 struct umtx_q *uq, *uq1, *uq2; 1268 struct thread *td1; 1269 1270 mtx_assert(&umtx_lock, MA_OWNED); 1271 if (pi == NULL) 1272 return (0); 1273 1274 uq = td->td_umtxq; 1275 1276 /* 1277 * Check if the thread needs to be moved on the blocked chain. 1278 * It needs to be moved if either its priority is lower than 1279 * the previous thread or higher than the next thread. 1280 */ 1281 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1282 uq2 = TAILQ_NEXT(uq, uq_lockq); 1283 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1284 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1285 /* 1286 * Remove thread from blocked chain and determine where 1287 * it should be moved to. 1288 */ 1289 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1290 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1291 td1 = uq1->uq_thread; 1292 MPASS(td1->td_proc->p_magic == P_MAGIC); 1293 if (UPRI(td1) > UPRI(td)) 1294 break; 1295 } 1296 1297 if (uq1 == NULL) 1298 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1299 else 1300 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1301 } 1302 return (1); 1303 } 1304 1305 static struct umtx_pi * 1306 umtx_pi_next(struct umtx_pi *pi) 1307 { 1308 struct umtx_q *uq_owner; 1309 1310 if (pi->pi_owner == NULL) 1311 return (NULL); 1312 uq_owner = pi->pi_owner->td_umtxq; 1313 if (uq_owner == NULL) 1314 return (NULL); 1315 return (uq_owner->uq_pi_blocked); 1316 } 1317 1318 /* 1319 * Floyd's Cycle-Finding Algorithm. 1320 */ 1321 static bool 1322 umtx_pi_check_loop(struct umtx_pi *pi) 1323 { 1324 struct umtx_pi *pi1; /* fast iterator */ 1325 1326 mtx_assert(&umtx_lock, MA_OWNED); 1327 if (pi == NULL) 1328 return (false); 1329 pi1 = pi; 1330 for (;;) { 1331 pi = umtx_pi_next(pi); 1332 if (pi == NULL) 1333 break; 1334 pi1 = umtx_pi_next(pi1); 1335 if (pi1 == NULL) 1336 break; 1337 pi1 = umtx_pi_next(pi1); 1338 if (pi1 == NULL) 1339 break; 1340 if (pi == pi1) 1341 return (true); 1342 } 1343 return (false); 1344 } 1345 1346 /* 1347 * Propagate priority when a thread is blocked on POSIX 1348 * PI mutex. 1349 */ 1350 static void 1351 umtx_propagate_priority(struct thread *td) 1352 { 1353 struct umtx_q *uq; 1354 struct umtx_pi *pi; 1355 int pri; 1356 1357 mtx_assert(&umtx_lock, MA_OWNED); 1358 pri = UPRI(td); 1359 uq = td->td_umtxq; 1360 pi = uq->uq_pi_blocked; 1361 if (pi == NULL) 1362 return; 1363 if (umtx_pi_check_loop(pi)) 1364 return; 1365 1366 for (;;) { 1367 td = pi->pi_owner; 1368 if (td == NULL || td == curthread) 1369 return; 1370 1371 MPASS(td->td_proc != NULL); 1372 MPASS(td->td_proc->p_magic == P_MAGIC); 1373 1374 thread_lock(td); 1375 if (td->td_lend_user_pri > pri) 1376 sched_lend_user_prio(td, pri); 1377 else { 1378 thread_unlock(td); 1379 break; 1380 } 1381 thread_unlock(td); 1382 1383 /* 1384 * Pick up the lock that td is blocked on. 1385 */ 1386 uq = td->td_umtxq; 1387 pi = uq->uq_pi_blocked; 1388 if (pi == NULL) 1389 break; 1390 /* Resort td on the list if needed. */ 1391 umtx_pi_adjust_thread(pi, td); 1392 } 1393 } 1394 1395 /* 1396 * Unpropagate priority for a PI mutex when a thread blocked on 1397 * it is interrupted by signal or resumed by others. 1398 */ 1399 static void 1400 umtx_repropagate_priority(struct umtx_pi *pi) 1401 { 1402 struct umtx_q *uq, *uq_owner; 1403 struct umtx_pi *pi2; 1404 int pri; 1405 1406 mtx_assert(&umtx_lock, MA_OWNED); 1407 1408 if (umtx_pi_check_loop(pi)) 1409 return; 1410 while (pi != NULL && pi->pi_owner != NULL) { 1411 pri = PRI_MAX; 1412 uq_owner = pi->pi_owner->td_umtxq; 1413 1414 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1415 uq = TAILQ_FIRST(&pi2->pi_blocked); 1416 if (uq != NULL) { 1417 if (pri > UPRI(uq->uq_thread)) 1418 pri = UPRI(uq->uq_thread); 1419 } 1420 } 1421 1422 if (pri > uq_owner->uq_inherited_pri) 1423 pri = uq_owner->uq_inherited_pri; 1424 thread_lock(pi->pi_owner); 1425 sched_lend_user_prio(pi->pi_owner, pri); 1426 thread_unlock(pi->pi_owner); 1427 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1428 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1429 } 1430 } 1431 1432 /* 1433 * Insert a PI mutex into owned list. 1434 */ 1435 static void 1436 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1437 { 1438 struct umtx_q *uq_owner; 1439 1440 uq_owner = owner->td_umtxq; 1441 mtx_assert(&umtx_lock, MA_OWNED); 1442 if (pi->pi_owner != NULL) 1443 panic("pi_owner != NULL"); 1444 pi->pi_owner = owner; 1445 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1446 } 1447 1448 1449 /* 1450 * Disown a PI mutex, and remove it from the owned list. 1451 */ 1452 static void 1453 umtx_pi_disown(struct umtx_pi *pi) 1454 { 1455 1456 mtx_assert(&umtx_lock, MA_OWNED); 1457 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1458 pi->pi_owner = NULL; 1459 } 1460 1461 /* 1462 * Claim ownership of a PI mutex. 1463 */ 1464 static int 1465 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1466 { 1467 struct umtx_q *uq; 1468 1469 mtx_lock(&umtx_lock); 1470 if (pi->pi_owner == owner) { 1471 mtx_unlock(&umtx_lock); 1472 return (0); 1473 } 1474 1475 if (pi->pi_owner != NULL) { 1476 /* 1477 * userland may have already messed the mutex, sigh. 1478 */ 1479 mtx_unlock(&umtx_lock); 1480 return (EPERM); 1481 } 1482 umtx_pi_setowner(pi, owner); 1483 uq = TAILQ_FIRST(&pi->pi_blocked); 1484 if (uq != NULL) { 1485 int pri; 1486 1487 pri = UPRI(uq->uq_thread); 1488 thread_lock(owner); 1489 if (pri < UPRI(owner)) 1490 sched_lend_user_prio(owner, pri); 1491 thread_unlock(owner); 1492 } 1493 mtx_unlock(&umtx_lock); 1494 return (0); 1495 } 1496 1497 /* 1498 * Adjust a thread's order position in its blocked PI mutex, 1499 * this may result new priority propagating process. 1500 */ 1501 void 1502 umtx_pi_adjust(struct thread *td, u_char oldpri) 1503 { 1504 struct umtx_q *uq; 1505 struct umtx_pi *pi; 1506 1507 uq = td->td_umtxq; 1508 mtx_lock(&umtx_lock); 1509 /* 1510 * Pick up the lock that td is blocked on. 1511 */ 1512 pi = uq->uq_pi_blocked; 1513 if (pi != NULL) { 1514 umtx_pi_adjust_thread(pi, td); 1515 umtx_repropagate_priority(pi); 1516 } 1517 mtx_unlock(&umtx_lock); 1518 } 1519 1520 /* 1521 * Sleep on a PI mutex. 1522 */ 1523 static int 1524 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1525 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1526 { 1527 struct umtxq_chain *uc; 1528 struct thread *td, *td1; 1529 struct umtx_q *uq1; 1530 int pri; 1531 int error = 0; 1532 1533 td = uq->uq_thread; 1534 KASSERT(td == curthread, ("inconsistent uq_thread")); 1535 uc = umtxq_getchain(&uq->uq_key); 1536 UMTXQ_LOCKED_ASSERT(uc); 1537 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1538 umtxq_insert(uq); 1539 mtx_lock(&umtx_lock); 1540 if (pi->pi_owner == NULL) { 1541 mtx_unlock(&umtx_lock); 1542 /* XXX Only look up thread in current process. */ 1543 td1 = tdfind(owner, curproc->p_pid); 1544 mtx_lock(&umtx_lock); 1545 if (td1 != NULL) { 1546 if (pi->pi_owner == NULL) 1547 umtx_pi_setowner(pi, td1); 1548 PROC_UNLOCK(td1->td_proc); 1549 } 1550 } 1551 1552 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1553 pri = UPRI(uq1->uq_thread); 1554 if (pri > UPRI(td)) 1555 break; 1556 } 1557 1558 if (uq1 != NULL) 1559 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1560 else 1561 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1562 1563 uq->uq_pi_blocked = pi; 1564 thread_lock(td); 1565 td->td_flags |= TDF_UPIBLOCKED; 1566 thread_unlock(td); 1567 umtx_propagate_priority(td); 1568 mtx_unlock(&umtx_lock); 1569 umtxq_unbusy(&uq->uq_key); 1570 1571 error = umtxq_sleep(uq, wmesg, timo); 1572 umtxq_remove(uq); 1573 1574 mtx_lock(&umtx_lock); 1575 uq->uq_pi_blocked = NULL; 1576 thread_lock(td); 1577 td->td_flags &= ~TDF_UPIBLOCKED; 1578 thread_unlock(td); 1579 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1580 umtx_repropagate_priority(pi); 1581 mtx_unlock(&umtx_lock); 1582 umtxq_unlock(&uq->uq_key); 1583 1584 return (error); 1585 } 1586 1587 /* 1588 * Add reference count for a PI mutex. 1589 */ 1590 static void 1591 umtx_pi_ref(struct umtx_pi *pi) 1592 { 1593 struct umtxq_chain *uc; 1594 1595 uc = umtxq_getchain(&pi->pi_key); 1596 UMTXQ_LOCKED_ASSERT(uc); 1597 pi->pi_refcount++; 1598 } 1599 1600 /* 1601 * Decrease reference count for a PI mutex, if the counter 1602 * is decreased to zero, its memory space is freed. 1603 */ 1604 static void 1605 umtx_pi_unref(struct umtx_pi *pi) 1606 { 1607 struct umtxq_chain *uc; 1608 1609 uc = umtxq_getchain(&pi->pi_key); 1610 UMTXQ_LOCKED_ASSERT(uc); 1611 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1612 if (--pi->pi_refcount == 0) { 1613 mtx_lock(&umtx_lock); 1614 if (pi->pi_owner != NULL) 1615 umtx_pi_disown(pi); 1616 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1617 ("blocked queue not empty")); 1618 mtx_unlock(&umtx_lock); 1619 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1620 umtx_pi_free(pi); 1621 } 1622 } 1623 1624 /* 1625 * Find a PI mutex in hash table. 1626 */ 1627 static struct umtx_pi * 1628 umtx_pi_lookup(struct umtx_key *key) 1629 { 1630 struct umtxq_chain *uc; 1631 struct umtx_pi *pi; 1632 1633 uc = umtxq_getchain(key); 1634 UMTXQ_LOCKED_ASSERT(uc); 1635 1636 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1637 if (umtx_key_match(&pi->pi_key, key)) { 1638 return (pi); 1639 } 1640 } 1641 return (NULL); 1642 } 1643 1644 /* 1645 * Insert a PI mutex into hash table. 1646 */ 1647 static inline void 1648 umtx_pi_insert(struct umtx_pi *pi) 1649 { 1650 struct umtxq_chain *uc; 1651 1652 uc = umtxq_getchain(&pi->pi_key); 1653 UMTXQ_LOCKED_ASSERT(uc); 1654 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1655 } 1656 1657 /* 1658 * Lock a PI mutex. 1659 */ 1660 static int 1661 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1662 struct _umtx_time *timeout, int try) 1663 { 1664 struct abs_timeout timo; 1665 struct umtx_q *uq; 1666 struct umtx_pi *pi, *new_pi; 1667 uint32_t id, owner, old; 1668 int error, rv; 1669 1670 id = td->td_tid; 1671 uq = td->td_umtxq; 1672 1673 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1674 &uq->uq_key)) != 0) 1675 return (error); 1676 1677 if (timeout != NULL) 1678 abs_timeout_init2(&timo, timeout); 1679 1680 umtxq_lock(&uq->uq_key); 1681 pi = umtx_pi_lookup(&uq->uq_key); 1682 if (pi == NULL) { 1683 new_pi = umtx_pi_alloc(M_NOWAIT); 1684 if (new_pi == NULL) { 1685 umtxq_unlock(&uq->uq_key); 1686 new_pi = umtx_pi_alloc(M_WAITOK); 1687 umtxq_lock(&uq->uq_key); 1688 pi = umtx_pi_lookup(&uq->uq_key); 1689 if (pi != NULL) { 1690 umtx_pi_free(new_pi); 1691 new_pi = NULL; 1692 } 1693 } 1694 if (new_pi != NULL) { 1695 new_pi->pi_key = uq->uq_key; 1696 umtx_pi_insert(new_pi); 1697 pi = new_pi; 1698 } 1699 } 1700 umtx_pi_ref(pi); 1701 umtxq_unlock(&uq->uq_key); 1702 1703 /* 1704 * Care must be exercised when dealing with umtx structure. It 1705 * can fault on any access. 1706 */ 1707 for (;;) { 1708 /* 1709 * Try the uncontested case. This should be done in userland. 1710 */ 1711 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1712 /* The address was invalid. */ 1713 if (rv == -1) { 1714 error = EFAULT; 1715 break; 1716 } 1717 1718 /* The acquire succeeded. */ 1719 if (owner == UMUTEX_UNOWNED) { 1720 error = 0; 1721 break; 1722 } 1723 1724 /* If no one owns it but it is contested try to acquire it. */ 1725 if (owner == UMUTEX_CONTESTED) { 1726 rv = casueword32(&m->m_owner, 1727 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1728 /* The address was invalid. */ 1729 if (rv == -1) { 1730 error = EFAULT; 1731 break; 1732 } 1733 1734 if (owner == UMUTEX_CONTESTED) { 1735 umtxq_lock(&uq->uq_key); 1736 umtxq_busy(&uq->uq_key); 1737 error = umtx_pi_claim(pi, td); 1738 umtxq_unbusy(&uq->uq_key); 1739 umtxq_unlock(&uq->uq_key); 1740 if (error != 0) { 1741 /* 1742 * Since we're going to return an 1743 * error, restore the m_owner to its 1744 * previous, unowned state to avoid 1745 * compounding the problem. 1746 */ 1747 (void)casuword32(&m->m_owner, 1748 id | UMUTEX_CONTESTED, 1749 UMUTEX_CONTESTED); 1750 } 1751 break; 1752 } 1753 1754 error = umtxq_check_susp(td); 1755 if (error != 0) 1756 break; 1757 1758 /* If this failed the lock has changed, restart. */ 1759 continue; 1760 } 1761 1762 if ((owner & ~UMUTEX_CONTESTED) == id) { 1763 error = EDEADLK; 1764 break; 1765 } 1766 1767 if (try != 0) { 1768 error = EBUSY; 1769 break; 1770 } 1771 1772 /* 1773 * If we caught a signal, we have retried and now 1774 * exit immediately. 1775 */ 1776 if (error != 0) 1777 break; 1778 1779 umtxq_lock(&uq->uq_key); 1780 umtxq_busy(&uq->uq_key); 1781 umtxq_unlock(&uq->uq_key); 1782 1783 /* 1784 * Set the contested bit so that a release in user space 1785 * knows to use the system call for unlock. If this fails 1786 * either some one else has acquired the lock or it has been 1787 * released. 1788 */ 1789 rv = casueword32(&m->m_owner, owner, &old, 1790 owner | UMUTEX_CONTESTED); 1791 1792 /* The address was invalid. */ 1793 if (rv == -1) { 1794 umtxq_unbusy_unlocked(&uq->uq_key); 1795 error = EFAULT; 1796 break; 1797 } 1798 1799 umtxq_lock(&uq->uq_key); 1800 /* 1801 * We set the contested bit, sleep. Otherwise the lock changed 1802 * and we need to retry or we lost a race to the thread 1803 * unlocking the umtx. 1804 */ 1805 if (old == owner) { 1806 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1807 "umtxpi", timeout == NULL ? NULL : &timo); 1808 if (error != 0) 1809 continue; 1810 } else { 1811 umtxq_unbusy(&uq->uq_key); 1812 umtxq_unlock(&uq->uq_key); 1813 } 1814 1815 error = umtxq_check_susp(td); 1816 if (error != 0) 1817 break; 1818 } 1819 1820 umtxq_lock(&uq->uq_key); 1821 umtx_pi_unref(pi); 1822 umtxq_unlock(&uq->uq_key); 1823 1824 umtx_key_release(&uq->uq_key); 1825 return (error); 1826 } 1827 1828 /* 1829 * Unlock a PI mutex. 1830 */ 1831 static int 1832 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1833 { 1834 struct umtx_key key; 1835 struct umtx_q *uq_first, *uq_first2, *uq_me; 1836 struct umtx_pi *pi, *pi2; 1837 uint32_t owner, old, id; 1838 int error; 1839 int count; 1840 int pri; 1841 1842 id = td->td_tid; 1843 /* 1844 * Make sure we own this mtx. 1845 */ 1846 error = fueword32(&m->m_owner, &owner); 1847 if (error == -1) 1848 return (EFAULT); 1849 1850 if ((owner & ~UMUTEX_CONTESTED) != id) 1851 return (EPERM); 1852 1853 /* This should be done in userland */ 1854 if ((owner & UMUTEX_CONTESTED) == 0) { 1855 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1856 if (error == -1) 1857 return (EFAULT); 1858 if (old == owner) 1859 return (0); 1860 owner = old; 1861 } 1862 1863 /* We should only ever be in here for contested locks */ 1864 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1865 &key)) != 0) 1866 return (error); 1867 1868 umtxq_lock(&key); 1869 umtxq_busy(&key); 1870 count = umtxq_count_pi(&key, &uq_first); 1871 if (uq_first != NULL) { 1872 mtx_lock(&umtx_lock); 1873 pi = uq_first->uq_pi_blocked; 1874 KASSERT(pi != NULL, ("pi == NULL?")); 1875 if (pi->pi_owner != td) { 1876 mtx_unlock(&umtx_lock); 1877 umtxq_unbusy(&key); 1878 umtxq_unlock(&key); 1879 umtx_key_release(&key); 1880 /* userland messed the mutex */ 1881 return (EPERM); 1882 } 1883 uq_me = td->td_umtxq; 1884 umtx_pi_disown(pi); 1885 /* get highest priority thread which is still sleeping. */ 1886 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1887 while (uq_first != NULL && 1888 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 1889 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 1890 } 1891 pri = PRI_MAX; 1892 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1893 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1894 if (uq_first2 != NULL) { 1895 if (pri > UPRI(uq_first2->uq_thread)) 1896 pri = UPRI(uq_first2->uq_thread); 1897 } 1898 } 1899 thread_lock(td); 1900 sched_lend_user_prio(td, pri); 1901 thread_unlock(td); 1902 mtx_unlock(&umtx_lock); 1903 if (uq_first) 1904 umtxq_signal_thread(uq_first); 1905 } else { 1906 pi = umtx_pi_lookup(&key); 1907 /* 1908 * A umtx_pi can exist if a signal or timeout removed the 1909 * last waiter from the umtxq, but there is still 1910 * a thread in do_lock_pi() holding the umtx_pi. 1911 */ 1912 if (pi != NULL) { 1913 /* 1914 * The umtx_pi can be unowned, such as when a thread 1915 * has just entered do_lock_pi(), allocated the 1916 * umtx_pi, and unlocked the umtxq. 1917 * If the current thread owns it, it must disown it. 1918 */ 1919 mtx_lock(&umtx_lock); 1920 if (pi->pi_owner == td) 1921 umtx_pi_disown(pi); 1922 mtx_unlock(&umtx_lock); 1923 } 1924 } 1925 umtxq_unlock(&key); 1926 1927 /* 1928 * When unlocking the umtx, it must be marked as unowned if 1929 * there is zero or one thread only waiting for it. 1930 * Otherwise, it must be marked as contested. 1931 */ 1932 error = casueword32(&m->m_owner, owner, &old, 1933 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1934 1935 umtxq_unbusy_unlocked(&key); 1936 umtx_key_release(&key); 1937 if (error == -1) 1938 return (EFAULT); 1939 if (old != owner) 1940 return (EINVAL); 1941 return (0); 1942 } 1943 1944 /* 1945 * Lock a PP mutex. 1946 */ 1947 static int 1948 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 1949 struct _umtx_time *timeout, int try) 1950 { 1951 struct abs_timeout timo; 1952 struct umtx_q *uq, *uq2; 1953 struct umtx_pi *pi; 1954 uint32_t ceiling; 1955 uint32_t owner, id; 1956 int error, pri, old_inherited_pri, su, rv; 1957 1958 id = td->td_tid; 1959 uq = td->td_umtxq; 1960 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1961 &uq->uq_key)) != 0) 1962 return (error); 1963 1964 if (timeout != NULL) 1965 abs_timeout_init2(&timo, timeout); 1966 1967 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1968 for (;;) { 1969 old_inherited_pri = uq->uq_inherited_pri; 1970 umtxq_lock(&uq->uq_key); 1971 umtxq_busy(&uq->uq_key); 1972 umtxq_unlock(&uq->uq_key); 1973 1974 rv = fueword32(&m->m_ceilings[0], &ceiling); 1975 if (rv == -1) { 1976 error = EFAULT; 1977 goto out; 1978 } 1979 ceiling = RTP_PRIO_MAX - ceiling; 1980 if (ceiling > RTP_PRIO_MAX) { 1981 error = EINVAL; 1982 goto out; 1983 } 1984 1985 mtx_lock(&umtx_lock); 1986 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1987 mtx_unlock(&umtx_lock); 1988 error = EINVAL; 1989 goto out; 1990 } 1991 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1992 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1993 thread_lock(td); 1994 if (uq->uq_inherited_pri < UPRI(td)) 1995 sched_lend_user_prio(td, uq->uq_inherited_pri); 1996 thread_unlock(td); 1997 } 1998 mtx_unlock(&umtx_lock); 1999 2000 rv = casueword32(&m->m_owner, 2001 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2002 /* The address was invalid. */ 2003 if (rv == -1) { 2004 error = EFAULT; 2005 break; 2006 } 2007 2008 if (owner == UMUTEX_CONTESTED) { 2009 error = 0; 2010 break; 2011 } 2012 2013 if (try != 0) { 2014 error = EBUSY; 2015 break; 2016 } 2017 2018 /* 2019 * If we caught a signal, we have retried and now 2020 * exit immediately. 2021 */ 2022 if (error != 0) 2023 break; 2024 2025 umtxq_lock(&uq->uq_key); 2026 umtxq_insert(uq); 2027 umtxq_unbusy(&uq->uq_key); 2028 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2029 NULL : &timo); 2030 umtxq_remove(uq); 2031 umtxq_unlock(&uq->uq_key); 2032 2033 mtx_lock(&umtx_lock); 2034 uq->uq_inherited_pri = old_inherited_pri; 2035 pri = PRI_MAX; 2036 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2037 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2038 if (uq2 != NULL) { 2039 if (pri > UPRI(uq2->uq_thread)) 2040 pri = UPRI(uq2->uq_thread); 2041 } 2042 } 2043 if (pri > uq->uq_inherited_pri) 2044 pri = uq->uq_inherited_pri; 2045 thread_lock(td); 2046 sched_lend_user_prio(td, pri); 2047 thread_unlock(td); 2048 mtx_unlock(&umtx_lock); 2049 } 2050 2051 if (error != 0) { 2052 mtx_lock(&umtx_lock); 2053 uq->uq_inherited_pri = old_inherited_pri; 2054 pri = PRI_MAX; 2055 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2056 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2057 if (uq2 != NULL) { 2058 if (pri > UPRI(uq2->uq_thread)) 2059 pri = UPRI(uq2->uq_thread); 2060 } 2061 } 2062 if (pri > uq->uq_inherited_pri) 2063 pri = uq->uq_inherited_pri; 2064 thread_lock(td); 2065 sched_lend_user_prio(td, pri); 2066 thread_unlock(td); 2067 mtx_unlock(&umtx_lock); 2068 } 2069 2070 out: 2071 umtxq_unbusy_unlocked(&uq->uq_key); 2072 umtx_key_release(&uq->uq_key); 2073 return (error); 2074 } 2075 2076 /* 2077 * Unlock a PP mutex. 2078 */ 2079 static int 2080 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2081 { 2082 struct umtx_key key; 2083 struct umtx_q *uq, *uq2; 2084 struct umtx_pi *pi; 2085 uint32_t owner, id; 2086 uint32_t rceiling; 2087 int error, pri, new_inherited_pri, su; 2088 2089 id = td->td_tid; 2090 uq = td->td_umtxq; 2091 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2092 2093 /* 2094 * Make sure we own this mtx. 2095 */ 2096 error = fueword32(&m->m_owner, &owner); 2097 if (error == -1) 2098 return (EFAULT); 2099 2100 if ((owner & ~UMUTEX_CONTESTED) != id) 2101 return (EPERM); 2102 2103 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2104 if (error != 0) 2105 return (error); 2106 2107 if (rceiling == -1) 2108 new_inherited_pri = PRI_MAX; 2109 else { 2110 rceiling = RTP_PRIO_MAX - rceiling; 2111 if (rceiling > RTP_PRIO_MAX) 2112 return (EINVAL); 2113 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2114 } 2115 2116 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2117 &key)) != 0) 2118 return (error); 2119 umtxq_lock(&key); 2120 umtxq_busy(&key); 2121 umtxq_unlock(&key); 2122 /* 2123 * For priority protected mutex, always set unlocked state 2124 * to UMUTEX_CONTESTED, so that userland always enters kernel 2125 * to lock the mutex, it is necessary because thread priority 2126 * has to be adjusted for such mutex. 2127 */ 2128 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 2129 2130 umtxq_lock(&key); 2131 if (error == 0) 2132 umtxq_signal(&key, 1); 2133 umtxq_unbusy(&key); 2134 umtxq_unlock(&key); 2135 2136 if (error == -1) 2137 error = EFAULT; 2138 else { 2139 mtx_lock(&umtx_lock); 2140 if (su != 0) 2141 uq->uq_inherited_pri = new_inherited_pri; 2142 pri = PRI_MAX; 2143 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2144 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2145 if (uq2 != NULL) { 2146 if (pri > UPRI(uq2->uq_thread)) 2147 pri = UPRI(uq2->uq_thread); 2148 } 2149 } 2150 if (pri > uq->uq_inherited_pri) 2151 pri = uq->uq_inherited_pri; 2152 thread_lock(td); 2153 sched_lend_user_prio(td, pri); 2154 thread_unlock(td); 2155 mtx_unlock(&umtx_lock); 2156 } 2157 umtx_key_release(&key); 2158 return (error); 2159 } 2160 2161 static int 2162 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2163 uint32_t *old_ceiling) 2164 { 2165 struct umtx_q *uq; 2166 uint32_t save_ceiling; 2167 uint32_t owner, id; 2168 uint32_t flags; 2169 int error, rv; 2170 2171 error = fueword32(&m->m_flags, &flags); 2172 if (error == -1) 2173 return (EFAULT); 2174 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2175 return (EINVAL); 2176 if (ceiling > RTP_PRIO_MAX) 2177 return (EINVAL); 2178 id = td->td_tid; 2179 uq = td->td_umtxq; 2180 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2181 &uq->uq_key)) != 0) 2182 return (error); 2183 for (;;) { 2184 umtxq_lock(&uq->uq_key); 2185 umtxq_busy(&uq->uq_key); 2186 umtxq_unlock(&uq->uq_key); 2187 2188 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2189 if (rv == -1) { 2190 error = EFAULT; 2191 break; 2192 } 2193 2194 rv = casueword32(&m->m_owner, 2195 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2196 if (rv == -1) { 2197 error = EFAULT; 2198 break; 2199 } 2200 2201 if (owner == UMUTEX_CONTESTED) { 2202 suword32(&m->m_ceilings[0], ceiling); 2203 suword32(&m->m_owner, UMUTEX_CONTESTED); 2204 error = 0; 2205 break; 2206 } 2207 2208 if ((owner & ~UMUTEX_CONTESTED) == id) { 2209 suword32(&m->m_ceilings[0], ceiling); 2210 error = 0; 2211 break; 2212 } 2213 2214 /* 2215 * If we caught a signal, we have retried and now 2216 * exit immediately. 2217 */ 2218 if (error != 0) 2219 break; 2220 2221 /* 2222 * We set the contested bit, sleep. Otherwise the lock changed 2223 * and we need to retry or we lost a race to the thread 2224 * unlocking the umtx. 2225 */ 2226 umtxq_lock(&uq->uq_key); 2227 umtxq_insert(uq); 2228 umtxq_unbusy(&uq->uq_key); 2229 error = umtxq_sleep(uq, "umtxpp", NULL); 2230 umtxq_remove(uq); 2231 umtxq_unlock(&uq->uq_key); 2232 } 2233 umtxq_lock(&uq->uq_key); 2234 if (error == 0) 2235 umtxq_signal(&uq->uq_key, INT_MAX); 2236 umtxq_unbusy(&uq->uq_key); 2237 umtxq_unlock(&uq->uq_key); 2238 umtx_key_release(&uq->uq_key); 2239 if (error == 0 && old_ceiling != NULL) 2240 suword32(old_ceiling, save_ceiling); 2241 return (error); 2242 } 2243 2244 /* 2245 * Lock a userland POSIX mutex. 2246 */ 2247 static int 2248 do_lock_umutex(struct thread *td, struct umutex *m, 2249 struct _umtx_time *timeout, int mode) 2250 { 2251 uint32_t flags; 2252 int error; 2253 2254 error = fueword32(&m->m_flags, &flags); 2255 if (error == -1) 2256 return (EFAULT); 2257 2258 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2259 case 0: 2260 error = do_lock_normal(td, m, flags, timeout, mode); 2261 break; 2262 case UMUTEX_PRIO_INHERIT: 2263 error = do_lock_pi(td, m, flags, timeout, mode); 2264 break; 2265 case UMUTEX_PRIO_PROTECT: 2266 error = do_lock_pp(td, m, flags, timeout, mode); 2267 break; 2268 default: 2269 return (EINVAL); 2270 } 2271 if (timeout == NULL) { 2272 if (error == EINTR && mode != _UMUTEX_WAIT) 2273 error = ERESTART; 2274 } else { 2275 /* Timed-locking is not restarted. */ 2276 if (error == ERESTART) 2277 error = EINTR; 2278 } 2279 return (error); 2280 } 2281 2282 /* 2283 * Unlock a userland POSIX mutex. 2284 */ 2285 static int 2286 do_unlock_umutex(struct thread *td, struct umutex *m) 2287 { 2288 uint32_t flags; 2289 int error; 2290 2291 error = fueword32(&m->m_flags, &flags); 2292 if (error == -1) 2293 return (EFAULT); 2294 2295 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2296 case 0: 2297 return (do_unlock_normal(td, m, flags)); 2298 case UMUTEX_PRIO_INHERIT: 2299 return (do_unlock_pi(td, m, flags)); 2300 case UMUTEX_PRIO_PROTECT: 2301 return (do_unlock_pp(td, m, flags)); 2302 } 2303 2304 return (EINVAL); 2305 } 2306 2307 static int 2308 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2309 struct timespec *timeout, u_long wflags) 2310 { 2311 struct abs_timeout timo; 2312 struct umtx_q *uq; 2313 uint32_t flags, clockid, hasw; 2314 int error; 2315 2316 uq = td->td_umtxq; 2317 error = fueword32(&cv->c_flags, &flags); 2318 if (error == -1) 2319 return (EFAULT); 2320 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2321 if (error != 0) 2322 return (error); 2323 2324 if ((wflags & CVWAIT_CLOCKID) != 0) { 2325 error = fueword32(&cv->c_clockid, &clockid); 2326 if (error == -1) { 2327 umtx_key_release(&uq->uq_key); 2328 return (EFAULT); 2329 } 2330 if (clockid < CLOCK_REALTIME || 2331 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2332 /* hmm, only HW clock id will work. */ 2333 umtx_key_release(&uq->uq_key); 2334 return (EINVAL); 2335 } 2336 } else { 2337 clockid = CLOCK_REALTIME; 2338 } 2339 2340 umtxq_lock(&uq->uq_key); 2341 umtxq_busy(&uq->uq_key); 2342 umtxq_insert(uq); 2343 umtxq_unlock(&uq->uq_key); 2344 2345 /* 2346 * Set c_has_waiters to 1 before releasing user mutex, also 2347 * don't modify cache line when unnecessary. 2348 */ 2349 error = fueword32(&cv->c_has_waiters, &hasw); 2350 if (error == 0 && hasw == 0) 2351 suword32(&cv->c_has_waiters, 1); 2352 2353 umtxq_unbusy_unlocked(&uq->uq_key); 2354 2355 error = do_unlock_umutex(td, m); 2356 2357 if (timeout != NULL) 2358 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2359 timeout); 2360 2361 umtxq_lock(&uq->uq_key); 2362 if (error == 0) { 2363 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2364 NULL : &timo); 2365 } 2366 2367 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2368 error = 0; 2369 else { 2370 /* 2371 * This must be timeout,interrupted by signal or 2372 * surprious wakeup, clear c_has_waiter flag when 2373 * necessary. 2374 */ 2375 umtxq_busy(&uq->uq_key); 2376 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2377 int oldlen = uq->uq_cur_queue->length; 2378 umtxq_remove(uq); 2379 if (oldlen == 1) { 2380 umtxq_unlock(&uq->uq_key); 2381 suword32(&cv->c_has_waiters, 0); 2382 umtxq_lock(&uq->uq_key); 2383 } 2384 } 2385 umtxq_unbusy(&uq->uq_key); 2386 if (error == ERESTART) 2387 error = EINTR; 2388 } 2389 2390 umtxq_unlock(&uq->uq_key); 2391 umtx_key_release(&uq->uq_key); 2392 return (error); 2393 } 2394 2395 /* 2396 * Signal a userland condition variable. 2397 */ 2398 static int 2399 do_cv_signal(struct thread *td, struct ucond *cv) 2400 { 2401 struct umtx_key key; 2402 int error, cnt, nwake; 2403 uint32_t flags; 2404 2405 error = fueword32(&cv->c_flags, &flags); 2406 if (error == -1) 2407 return (EFAULT); 2408 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2409 return (error); 2410 umtxq_lock(&key); 2411 umtxq_busy(&key); 2412 cnt = umtxq_count(&key); 2413 nwake = umtxq_signal(&key, 1); 2414 if (cnt <= nwake) { 2415 umtxq_unlock(&key); 2416 error = suword32(&cv->c_has_waiters, 0); 2417 if (error == -1) 2418 error = EFAULT; 2419 umtxq_lock(&key); 2420 } 2421 umtxq_unbusy(&key); 2422 umtxq_unlock(&key); 2423 umtx_key_release(&key); 2424 return (error); 2425 } 2426 2427 static int 2428 do_cv_broadcast(struct thread *td, struct ucond *cv) 2429 { 2430 struct umtx_key key; 2431 int error; 2432 uint32_t flags; 2433 2434 error = fueword32(&cv->c_flags, &flags); 2435 if (error == -1) 2436 return (EFAULT); 2437 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2438 return (error); 2439 2440 umtxq_lock(&key); 2441 umtxq_busy(&key); 2442 umtxq_signal(&key, INT_MAX); 2443 umtxq_unlock(&key); 2444 2445 error = suword32(&cv->c_has_waiters, 0); 2446 if (error == -1) 2447 error = EFAULT; 2448 2449 umtxq_unbusy_unlocked(&key); 2450 2451 umtx_key_release(&key); 2452 return (error); 2453 } 2454 2455 static int 2456 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2457 { 2458 struct abs_timeout timo; 2459 struct umtx_q *uq; 2460 uint32_t flags, wrflags; 2461 int32_t state, oldstate; 2462 int32_t blocked_readers; 2463 int error, rv; 2464 2465 uq = td->td_umtxq; 2466 error = fueword32(&rwlock->rw_flags, &flags); 2467 if (error == -1) 2468 return (EFAULT); 2469 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2470 if (error != 0) 2471 return (error); 2472 2473 if (timeout != NULL) 2474 abs_timeout_init2(&timo, timeout); 2475 2476 wrflags = URWLOCK_WRITE_OWNER; 2477 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2478 wrflags |= URWLOCK_WRITE_WAITERS; 2479 2480 for (;;) { 2481 rv = fueword32(&rwlock->rw_state, &state); 2482 if (rv == -1) { 2483 umtx_key_release(&uq->uq_key); 2484 return (EFAULT); 2485 } 2486 2487 /* try to lock it */ 2488 while (!(state & wrflags)) { 2489 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2490 umtx_key_release(&uq->uq_key); 2491 return (EAGAIN); 2492 } 2493 rv = casueword32(&rwlock->rw_state, state, 2494 &oldstate, state + 1); 2495 if (rv == -1) { 2496 umtx_key_release(&uq->uq_key); 2497 return (EFAULT); 2498 } 2499 if (oldstate == state) { 2500 umtx_key_release(&uq->uq_key); 2501 return (0); 2502 } 2503 error = umtxq_check_susp(td); 2504 if (error != 0) 2505 break; 2506 state = oldstate; 2507 } 2508 2509 if (error) 2510 break; 2511 2512 /* grab monitor lock */ 2513 umtxq_lock(&uq->uq_key); 2514 umtxq_busy(&uq->uq_key); 2515 umtxq_unlock(&uq->uq_key); 2516 2517 /* 2518 * re-read the state, in case it changed between the try-lock above 2519 * and the check below 2520 */ 2521 rv = fueword32(&rwlock->rw_state, &state); 2522 if (rv == -1) 2523 error = EFAULT; 2524 2525 /* set read contention bit */ 2526 while (error == 0 && (state & wrflags) && 2527 !(state & URWLOCK_READ_WAITERS)) { 2528 rv = casueword32(&rwlock->rw_state, state, 2529 &oldstate, state | URWLOCK_READ_WAITERS); 2530 if (rv == -1) { 2531 error = EFAULT; 2532 break; 2533 } 2534 if (oldstate == state) 2535 goto sleep; 2536 state = oldstate; 2537 error = umtxq_check_susp(td); 2538 if (error != 0) 2539 break; 2540 } 2541 if (error != 0) { 2542 umtxq_unbusy_unlocked(&uq->uq_key); 2543 break; 2544 } 2545 2546 /* state is changed while setting flags, restart */ 2547 if (!(state & wrflags)) { 2548 umtxq_unbusy_unlocked(&uq->uq_key); 2549 error = umtxq_check_susp(td); 2550 if (error != 0) 2551 break; 2552 continue; 2553 } 2554 2555 sleep: 2556 /* contention bit is set, before sleeping, increase read waiter count */ 2557 rv = fueword32(&rwlock->rw_blocked_readers, 2558 &blocked_readers); 2559 if (rv == -1) { 2560 umtxq_unbusy_unlocked(&uq->uq_key); 2561 error = EFAULT; 2562 break; 2563 } 2564 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2565 2566 while (state & wrflags) { 2567 umtxq_lock(&uq->uq_key); 2568 umtxq_insert(uq); 2569 umtxq_unbusy(&uq->uq_key); 2570 2571 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2572 NULL : &timo); 2573 2574 umtxq_busy(&uq->uq_key); 2575 umtxq_remove(uq); 2576 umtxq_unlock(&uq->uq_key); 2577 if (error) 2578 break; 2579 rv = fueword32(&rwlock->rw_state, &state); 2580 if (rv == -1) { 2581 error = EFAULT; 2582 break; 2583 } 2584 } 2585 2586 /* decrease read waiter count, and may clear read contention bit */ 2587 rv = fueword32(&rwlock->rw_blocked_readers, 2588 &blocked_readers); 2589 if (rv == -1) { 2590 umtxq_unbusy_unlocked(&uq->uq_key); 2591 error = EFAULT; 2592 break; 2593 } 2594 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2595 if (blocked_readers == 1) { 2596 rv = fueword32(&rwlock->rw_state, &state); 2597 if (rv == -1) 2598 error = EFAULT; 2599 while (error == 0) { 2600 rv = casueword32(&rwlock->rw_state, state, 2601 &oldstate, state & ~URWLOCK_READ_WAITERS); 2602 if (rv == -1) { 2603 error = EFAULT; 2604 break; 2605 } 2606 if (oldstate == state) 2607 break; 2608 state = oldstate; 2609 error = umtxq_check_susp(td); 2610 } 2611 } 2612 2613 umtxq_unbusy_unlocked(&uq->uq_key); 2614 if (error != 0) 2615 break; 2616 } 2617 umtx_key_release(&uq->uq_key); 2618 if (error == ERESTART) 2619 error = EINTR; 2620 return (error); 2621 } 2622 2623 static int 2624 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2625 { 2626 struct abs_timeout timo; 2627 struct umtx_q *uq; 2628 uint32_t flags; 2629 int32_t state, oldstate; 2630 int32_t blocked_writers; 2631 int32_t blocked_readers; 2632 int error, rv; 2633 2634 uq = td->td_umtxq; 2635 error = fueword32(&rwlock->rw_flags, &flags); 2636 if (error == -1) 2637 return (EFAULT); 2638 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2639 if (error != 0) 2640 return (error); 2641 2642 if (timeout != NULL) 2643 abs_timeout_init2(&timo, timeout); 2644 2645 blocked_readers = 0; 2646 for (;;) { 2647 rv = fueword32(&rwlock->rw_state, &state); 2648 if (rv == -1) { 2649 umtx_key_release(&uq->uq_key); 2650 return (EFAULT); 2651 } 2652 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2653 rv = casueword32(&rwlock->rw_state, state, 2654 &oldstate, state | URWLOCK_WRITE_OWNER); 2655 if (rv == -1) { 2656 umtx_key_release(&uq->uq_key); 2657 return (EFAULT); 2658 } 2659 if (oldstate == state) { 2660 umtx_key_release(&uq->uq_key); 2661 return (0); 2662 } 2663 state = oldstate; 2664 error = umtxq_check_susp(td); 2665 if (error != 0) 2666 break; 2667 } 2668 2669 if (error) { 2670 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2671 blocked_readers != 0) { 2672 umtxq_lock(&uq->uq_key); 2673 umtxq_busy(&uq->uq_key); 2674 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2675 umtxq_unbusy(&uq->uq_key); 2676 umtxq_unlock(&uq->uq_key); 2677 } 2678 2679 break; 2680 } 2681 2682 /* grab monitor lock */ 2683 umtxq_lock(&uq->uq_key); 2684 umtxq_busy(&uq->uq_key); 2685 umtxq_unlock(&uq->uq_key); 2686 2687 /* 2688 * re-read the state, in case it changed between the try-lock above 2689 * and the check below 2690 */ 2691 rv = fueword32(&rwlock->rw_state, &state); 2692 if (rv == -1) 2693 error = EFAULT; 2694 2695 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2696 URWLOCK_READER_COUNT(state) != 0) && 2697 (state & URWLOCK_WRITE_WAITERS) == 0) { 2698 rv = casueword32(&rwlock->rw_state, state, 2699 &oldstate, state | URWLOCK_WRITE_WAITERS); 2700 if (rv == -1) { 2701 error = EFAULT; 2702 break; 2703 } 2704 if (oldstate == state) 2705 goto sleep; 2706 state = oldstate; 2707 error = umtxq_check_susp(td); 2708 if (error != 0) 2709 break; 2710 } 2711 if (error != 0) { 2712 umtxq_unbusy_unlocked(&uq->uq_key); 2713 break; 2714 } 2715 2716 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2717 umtxq_unbusy_unlocked(&uq->uq_key); 2718 error = umtxq_check_susp(td); 2719 if (error != 0) 2720 break; 2721 continue; 2722 } 2723 sleep: 2724 rv = fueword32(&rwlock->rw_blocked_writers, 2725 &blocked_writers); 2726 if (rv == -1) { 2727 umtxq_unbusy_unlocked(&uq->uq_key); 2728 error = EFAULT; 2729 break; 2730 } 2731 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2732 2733 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2734 umtxq_lock(&uq->uq_key); 2735 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2736 umtxq_unbusy(&uq->uq_key); 2737 2738 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2739 NULL : &timo); 2740 2741 umtxq_busy(&uq->uq_key); 2742 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2743 umtxq_unlock(&uq->uq_key); 2744 if (error) 2745 break; 2746 rv = fueword32(&rwlock->rw_state, &state); 2747 if (rv == -1) { 2748 error = EFAULT; 2749 break; 2750 } 2751 } 2752 2753 rv = fueword32(&rwlock->rw_blocked_writers, 2754 &blocked_writers); 2755 if (rv == -1) { 2756 umtxq_unbusy_unlocked(&uq->uq_key); 2757 error = EFAULT; 2758 break; 2759 } 2760 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2761 if (blocked_writers == 1) { 2762 rv = fueword32(&rwlock->rw_state, &state); 2763 if (rv == -1) { 2764 umtxq_unbusy_unlocked(&uq->uq_key); 2765 error = EFAULT; 2766 break; 2767 } 2768 for (;;) { 2769 rv = casueword32(&rwlock->rw_state, state, 2770 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2771 if (rv == -1) { 2772 error = EFAULT; 2773 break; 2774 } 2775 if (oldstate == state) 2776 break; 2777 state = oldstate; 2778 error = umtxq_check_susp(td); 2779 /* 2780 * We are leaving the URWLOCK_WRITE_WAITERS 2781 * behind, but this should not harm the 2782 * correctness. 2783 */ 2784 if (error != 0) 2785 break; 2786 } 2787 rv = fueword32(&rwlock->rw_blocked_readers, 2788 &blocked_readers); 2789 if (rv == -1) { 2790 umtxq_unbusy_unlocked(&uq->uq_key); 2791 error = EFAULT; 2792 break; 2793 } 2794 } else 2795 blocked_readers = 0; 2796 2797 umtxq_unbusy_unlocked(&uq->uq_key); 2798 } 2799 2800 umtx_key_release(&uq->uq_key); 2801 if (error == ERESTART) 2802 error = EINTR; 2803 return (error); 2804 } 2805 2806 static int 2807 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2808 { 2809 struct umtx_q *uq; 2810 uint32_t flags; 2811 int32_t state, oldstate; 2812 int error, rv, q, count; 2813 2814 uq = td->td_umtxq; 2815 error = fueword32(&rwlock->rw_flags, &flags); 2816 if (error == -1) 2817 return (EFAULT); 2818 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2819 if (error != 0) 2820 return (error); 2821 2822 error = fueword32(&rwlock->rw_state, &state); 2823 if (error == -1) { 2824 error = EFAULT; 2825 goto out; 2826 } 2827 if (state & URWLOCK_WRITE_OWNER) { 2828 for (;;) { 2829 rv = casueword32(&rwlock->rw_state, state, 2830 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2831 if (rv == -1) { 2832 error = EFAULT; 2833 goto out; 2834 } 2835 if (oldstate != state) { 2836 state = oldstate; 2837 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2838 error = EPERM; 2839 goto out; 2840 } 2841 error = umtxq_check_susp(td); 2842 if (error != 0) 2843 goto out; 2844 } else 2845 break; 2846 } 2847 } else if (URWLOCK_READER_COUNT(state) != 0) { 2848 for (;;) { 2849 rv = casueword32(&rwlock->rw_state, state, 2850 &oldstate, state - 1); 2851 if (rv == -1) { 2852 error = EFAULT; 2853 goto out; 2854 } 2855 if (oldstate != state) { 2856 state = oldstate; 2857 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2858 error = EPERM; 2859 goto out; 2860 } 2861 error = umtxq_check_susp(td); 2862 if (error != 0) 2863 goto out; 2864 } else 2865 break; 2866 } 2867 } else { 2868 error = EPERM; 2869 goto out; 2870 } 2871 2872 count = 0; 2873 2874 if (!(flags & URWLOCK_PREFER_READER)) { 2875 if (state & URWLOCK_WRITE_WAITERS) { 2876 count = 1; 2877 q = UMTX_EXCLUSIVE_QUEUE; 2878 } else if (state & URWLOCK_READ_WAITERS) { 2879 count = INT_MAX; 2880 q = UMTX_SHARED_QUEUE; 2881 } 2882 } else { 2883 if (state & URWLOCK_READ_WAITERS) { 2884 count = INT_MAX; 2885 q = UMTX_SHARED_QUEUE; 2886 } else if (state & URWLOCK_WRITE_WAITERS) { 2887 count = 1; 2888 q = UMTX_EXCLUSIVE_QUEUE; 2889 } 2890 } 2891 2892 if (count) { 2893 umtxq_lock(&uq->uq_key); 2894 umtxq_busy(&uq->uq_key); 2895 umtxq_signal_queue(&uq->uq_key, count, q); 2896 umtxq_unbusy(&uq->uq_key); 2897 umtxq_unlock(&uq->uq_key); 2898 } 2899 out: 2900 umtx_key_release(&uq->uq_key); 2901 return (error); 2902 } 2903 2904 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 2905 static int 2906 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 2907 { 2908 struct abs_timeout timo; 2909 struct umtx_q *uq; 2910 uint32_t flags, count, count1; 2911 int error, rv; 2912 2913 uq = td->td_umtxq; 2914 error = fueword32(&sem->_flags, &flags); 2915 if (error == -1) 2916 return (EFAULT); 2917 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2918 if (error != 0) 2919 return (error); 2920 2921 if (timeout != NULL) 2922 abs_timeout_init2(&timo, timeout); 2923 2924 umtxq_lock(&uq->uq_key); 2925 umtxq_busy(&uq->uq_key); 2926 umtxq_insert(uq); 2927 umtxq_unlock(&uq->uq_key); 2928 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 2929 if (rv == 0) 2930 rv = fueword32(&sem->_count, &count); 2931 if (rv == -1 || count != 0) { 2932 umtxq_lock(&uq->uq_key); 2933 umtxq_unbusy(&uq->uq_key); 2934 umtxq_remove(uq); 2935 umtxq_unlock(&uq->uq_key); 2936 umtx_key_release(&uq->uq_key); 2937 return (rv == -1 ? EFAULT : 0); 2938 } 2939 umtxq_lock(&uq->uq_key); 2940 umtxq_unbusy(&uq->uq_key); 2941 2942 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2943 2944 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2945 error = 0; 2946 else { 2947 umtxq_remove(uq); 2948 /* A relative timeout cannot be restarted. */ 2949 if (error == ERESTART && timeout != NULL && 2950 (timeout->_flags & UMTX_ABSTIME) == 0) 2951 error = EINTR; 2952 } 2953 umtxq_unlock(&uq->uq_key); 2954 umtx_key_release(&uq->uq_key); 2955 return (error); 2956 } 2957 2958 /* 2959 * Signal a userland semaphore. 2960 */ 2961 static int 2962 do_sem_wake(struct thread *td, struct _usem *sem) 2963 { 2964 struct umtx_key key; 2965 int error, cnt; 2966 uint32_t flags; 2967 2968 error = fueword32(&sem->_flags, &flags); 2969 if (error == -1) 2970 return (EFAULT); 2971 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2972 return (error); 2973 umtxq_lock(&key); 2974 umtxq_busy(&key); 2975 cnt = umtxq_count(&key); 2976 if (cnt > 0) { 2977 umtxq_signal(&key, 1); 2978 /* 2979 * Check if count is greater than 0, this means the memory is 2980 * still being referenced by user code, so we can safely 2981 * update _has_waiters flag. 2982 */ 2983 if (cnt == 1) { 2984 umtxq_unlock(&key); 2985 error = suword32(&sem->_has_waiters, 0); 2986 umtxq_lock(&key); 2987 if (error == -1) 2988 error = EFAULT; 2989 } 2990 } 2991 umtxq_unbusy(&key); 2992 umtxq_unlock(&key); 2993 umtx_key_release(&key); 2994 return (error); 2995 } 2996 #endif 2997 2998 static int 2999 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3000 { 3001 struct abs_timeout timo; 3002 struct umtx_q *uq; 3003 uint32_t count, flags; 3004 int error, rv; 3005 3006 uq = td->td_umtxq; 3007 flags = fuword32(&sem->_flags); 3008 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3009 if (error != 0) 3010 return (error); 3011 3012 if (timeout != NULL) 3013 abs_timeout_init2(&timo, timeout); 3014 3015 umtxq_lock(&uq->uq_key); 3016 umtxq_busy(&uq->uq_key); 3017 umtxq_insert(uq); 3018 umtxq_unlock(&uq->uq_key); 3019 rv = fueword32(&sem->_count, &count); 3020 if (rv == -1) { 3021 umtxq_lock(&uq->uq_key); 3022 umtxq_unbusy(&uq->uq_key); 3023 umtxq_remove(uq); 3024 umtxq_unlock(&uq->uq_key); 3025 umtx_key_release(&uq->uq_key); 3026 return (EFAULT); 3027 } 3028 for (;;) { 3029 if (USEM_COUNT(count) != 0) { 3030 umtxq_lock(&uq->uq_key); 3031 umtxq_unbusy(&uq->uq_key); 3032 umtxq_remove(uq); 3033 umtxq_unlock(&uq->uq_key); 3034 umtx_key_release(&uq->uq_key); 3035 return (0); 3036 } 3037 if (count == USEM_HAS_WAITERS) 3038 break; 3039 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3040 if (rv == -1) { 3041 umtxq_lock(&uq->uq_key); 3042 umtxq_unbusy(&uq->uq_key); 3043 umtxq_remove(uq); 3044 umtxq_unlock(&uq->uq_key); 3045 umtx_key_release(&uq->uq_key); 3046 return (EFAULT); 3047 } 3048 if (count == 0) 3049 break; 3050 } 3051 umtxq_lock(&uq->uq_key); 3052 umtxq_unbusy(&uq->uq_key); 3053 3054 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3055 3056 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3057 error = 0; 3058 else { 3059 umtxq_remove(uq); 3060 /* A relative timeout cannot be restarted. */ 3061 if (error == ERESTART && timeout != NULL && 3062 (timeout->_flags & UMTX_ABSTIME) == 0) 3063 error = EINTR; 3064 } 3065 umtxq_unlock(&uq->uq_key); 3066 umtx_key_release(&uq->uq_key); 3067 return (error); 3068 } 3069 3070 /* 3071 * Signal a userland semaphore. 3072 */ 3073 static int 3074 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3075 { 3076 struct umtx_key key; 3077 int error, cnt, rv; 3078 uint32_t count, flags; 3079 3080 rv = fueword32(&sem->_flags, &flags); 3081 if (rv == -1) 3082 return (EFAULT); 3083 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3084 return (error); 3085 umtxq_lock(&key); 3086 umtxq_busy(&key); 3087 cnt = umtxq_count(&key); 3088 if (cnt > 0) { 3089 umtxq_signal(&key, 1); 3090 3091 /* 3092 * If this was the last sleeping thread, clear the waiters 3093 * flag in _count. 3094 */ 3095 if (cnt == 1) { 3096 umtxq_unlock(&key); 3097 rv = fueword32(&sem->_count, &count); 3098 while (rv != -1 && count & USEM_HAS_WAITERS) 3099 rv = casueword32(&sem->_count, count, &count, 3100 count & ~USEM_HAS_WAITERS); 3101 if (rv == -1) 3102 error = EFAULT; 3103 umtxq_lock(&key); 3104 } 3105 } 3106 umtxq_unbusy(&key); 3107 umtxq_unlock(&key); 3108 umtx_key_release(&key); 3109 return (error); 3110 } 3111 3112 inline int 3113 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3114 { 3115 int error; 3116 3117 error = copyin(addr, tsp, sizeof(struct timespec)); 3118 if (error == 0) { 3119 if (tsp->tv_sec < 0 || 3120 tsp->tv_nsec >= 1000000000 || 3121 tsp->tv_nsec < 0) 3122 error = EINVAL; 3123 } 3124 return (error); 3125 } 3126 3127 static inline int 3128 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3129 { 3130 int error; 3131 3132 if (size <= sizeof(struct timespec)) { 3133 tp->_clockid = CLOCK_REALTIME; 3134 tp->_flags = 0; 3135 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3136 } else 3137 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3138 if (error != 0) 3139 return (error); 3140 if (tp->_timeout.tv_sec < 0 || 3141 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3142 return (EINVAL); 3143 return (0); 3144 } 3145 3146 static int 3147 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3148 { 3149 3150 return (EOPNOTSUPP); 3151 } 3152 3153 static int 3154 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3155 { 3156 struct _umtx_time timeout, *tm_p; 3157 int error; 3158 3159 if (uap->uaddr2 == NULL) 3160 tm_p = NULL; 3161 else { 3162 error = umtx_copyin_umtx_time( 3163 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3164 if (error != 0) 3165 return (error); 3166 tm_p = &timeout; 3167 } 3168 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3169 } 3170 3171 static int 3172 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3173 { 3174 struct _umtx_time timeout, *tm_p; 3175 int error; 3176 3177 if (uap->uaddr2 == NULL) 3178 tm_p = NULL; 3179 else { 3180 error = umtx_copyin_umtx_time( 3181 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3182 if (error != 0) 3183 return (error); 3184 tm_p = &timeout; 3185 } 3186 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3187 } 3188 3189 static int 3190 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3191 { 3192 struct _umtx_time *tm_p, timeout; 3193 int error; 3194 3195 if (uap->uaddr2 == NULL) 3196 tm_p = NULL; 3197 else { 3198 error = umtx_copyin_umtx_time( 3199 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3200 if (error != 0) 3201 return (error); 3202 tm_p = &timeout; 3203 } 3204 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3205 } 3206 3207 static int 3208 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3209 { 3210 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3211 } 3212 3213 #define BATCH_SIZE 128 3214 static int 3215 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3216 { 3217 int count = uap->val; 3218 void *uaddrs[BATCH_SIZE]; 3219 char **upp = (char **)uap->obj; 3220 int tocopy; 3221 int error = 0; 3222 int i, pos = 0; 3223 3224 while (count > 0) { 3225 tocopy = count; 3226 if (tocopy > BATCH_SIZE) 3227 tocopy = BATCH_SIZE; 3228 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3229 if (error != 0) 3230 break; 3231 for (i = 0; i < tocopy; ++i) 3232 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3233 count -= tocopy; 3234 pos += tocopy; 3235 } 3236 return (error); 3237 } 3238 3239 static int 3240 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3241 { 3242 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3243 } 3244 3245 static int 3246 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3247 { 3248 struct _umtx_time *tm_p, timeout; 3249 int error; 3250 3251 /* Allow a null timespec (wait forever). */ 3252 if (uap->uaddr2 == NULL) 3253 tm_p = NULL; 3254 else { 3255 error = umtx_copyin_umtx_time( 3256 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3257 if (error != 0) 3258 return (error); 3259 tm_p = &timeout; 3260 } 3261 return do_lock_umutex(td, uap->obj, tm_p, 0); 3262 } 3263 3264 static int 3265 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3266 { 3267 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3268 } 3269 3270 static int 3271 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3272 { 3273 struct _umtx_time *tm_p, timeout; 3274 int error; 3275 3276 /* Allow a null timespec (wait forever). */ 3277 if (uap->uaddr2 == NULL) 3278 tm_p = NULL; 3279 else { 3280 error = umtx_copyin_umtx_time( 3281 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3282 if (error != 0) 3283 return (error); 3284 tm_p = &timeout; 3285 } 3286 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3287 } 3288 3289 static int 3290 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3291 { 3292 return do_wake_umutex(td, uap->obj); 3293 } 3294 3295 static int 3296 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3297 { 3298 return do_unlock_umutex(td, uap->obj); 3299 } 3300 3301 static int 3302 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3303 { 3304 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3305 } 3306 3307 static int 3308 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3309 { 3310 struct timespec *ts, timeout; 3311 int error; 3312 3313 /* Allow a null timespec (wait forever). */ 3314 if (uap->uaddr2 == NULL) 3315 ts = NULL; 3316 else { 3317 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3318 if (error != 0) 3319 return (error); 3320 ts = &timeout; 3321 } 3322 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3323 } 3324 3325 static int 3326 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3327 { 3328 return do_cv_signal(td, uap->obj); 3329 } 3330 3331 static int 3332 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3333 { 3334 return do_cv_broadcast(td, uap->obj); 3335 } 3336 3337 static int 3338 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3339 { 3340 struct _umtx_time timeout; 3341 int error; 3342 3343 /* Allow a null timespec (wait forever). */ 3344 if (uap->uaddr2 == NULL) { 3345 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3346 } else { 3347 error = umtx_copyin_umtx_time(uap->uaddr2, 3348 (size_t)uap->uaddr1, &timeout); 3349 if (error != 0) 3350 return (error); 3351 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3352 } 3353 return (error); 3354 } 3355 3356 static int 3357 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3358 { 3359 struct _umtx_time timeout; 3360 int error; 3361 3362 /* Allow a null timespec (wait forever). */ 3363 if (uap->uaddr2 == NULL) { 3364 error = do_rw_wrlock(td, uap->obj, 0); 3365 } else { 3366 error = umtx_copyin_umtx_time(uap->uaddr2, 3367 (size_t)uap->uaddr1, &timeout); 3368 if (error != 0) 3369 return (error); 3370 3371 error = do_rw_wrlock(td, uap->obj, &timeout); 3372 } 3373 return (error); 3374 } 3375 3376 static int 3377 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3378 { 3379 return do_rw_unlock(td, uap->obj); 3380 } 3381 3382 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3383 static int 3384 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3385 { 3386 struct _umtx_time *tm_p, timeout; 3387 int error; 3388 3389 /* Allow a null timespec (wait forever). */ 3390 if (uap->uaddr2 == NULL) 3391 tm_p = NULL; 3392 else { 3393 error = umtx_copyin_umtx_time( 3394 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3395 if (error != 0) 3396 return (error); 3397 tm_p = &timeout; 3398 } 3399 return (do_sem_wait(td, uap->obj, tm_p)); 3400 } 3401 3402 static int 3403 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3404 { 3405 return do_sem_wake(td, uap->obj); 3406 } 3407 #endif 3408 3409 static int 3410 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3411 { 3412 return do_wake2_umutex(td, uap->obj, uap->val); 3413 } 3414 3415 static int 3416 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3417 { 3418 struct _umtx_time *tm_p, timeout; 3419 int error; 3420 3421 /* Allow a null timespec (wait forever). */ 3422 if (uap->uaddr2 == NULL) 3423 tm_p = NULL; 3424 else { 3425 error = umtx_copyin_umtx_time( 3426 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3427 if (error != 0) 3428 return (error); 3429 tm_p = &timeout; 3430 } 3431 return (do_sem2_wait(td, uap->obj, tm_p)); 3432 } 3433 3434 static int 3435 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3436 { 3437 return do_sem2_wake(td, uap->obj); 3438 } 3439 3440 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3441 3442 static _umtx_op_func op_table[] = { 3443 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3444 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3445 __umtx_op_wait, /* UMTX_OP_WAIT */ 3446 __umtx_op_wake, /* UMTX_OP_WAKE */ 3447 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3448 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3449 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3450 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3451 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3452 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3453 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3454 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3455 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3456 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3457 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3458 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3459 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3460 __umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */ 3461 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3462 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3463 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3464 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3465 #else 3466 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3467 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3468 #endif 3469 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3470 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3471 __umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */ 3472 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3473 }; 3474 3475 int 3476 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3477 { 3478 if ((unsigned)uap->op < UMTX_OP_MAX) 3479 return (*op_table[uap->op])(td, uap); 3480 return (EINVAL); 3481 } 3482 3483 #ifdef COMPAT_FREEBSD32 3484 3485 struct timespec32 { 3486 int32_t tv_sec; 3487 int32_t tv_nsec; 3488 }; 3489 3490 struct umtx_time32 { 3491 struct timespec32 timeout; 3492 uint32_t flags; 3493 uint32_t clockid; 3494 }; 3495 3496 static inline int 3497 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3498 { 3499 struct timespec32 ts32; 3500 int error; 3501 3502 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3503 if (error == 0) { 3504 if (ts32.tv_sec < 0 || 3505 ts32.tv_nsec >= 1000000000 || 3506 ts32.tv_nsec < 0) 3507 error = EINVAL; 3508 else { 3509 tsp->tv_sec = ts32.tv_sec; 3510 tsp->tv_nsec = ts32.tv_nsec; 3511 } 3512 } 3513 return (error); 3514 } 3515 3516 static inline int 3517 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3518 { 3519 struct umtx_time32 t32; 3520 int error; 3521 3522 t32.clockid = CLOCK_REALTIME; 3523 t32.flags = 0; 3524 if (size <= sizeof(struct timespec32)) 3525 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3526 else 3527 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3528 if (error != 0) 3529 return (error); 3530 if (t32.timeout.tv_sec < 0 || 3531 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3532 return (EINVAL); 3533 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3534 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3535 tp->_flags = t32.flags; 3536 tp->_clockid = t32.clockid; 3537 return (0); 3538 } 3539 3540 static int 3541 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3542 { 3543 struct _umtx_time *tm_p, timeout; 3544 int error; 3545 3546 if (uap->uaddr2 == NULL) 3547 tm_p = NULL; 3548 else { 3549 error = umtx_copyin_umtx_time32(uap->uaddr2, 3550 (size_t)uap->uaddr1, &timeout); 3551 if (error != 0) 3552 return (error); 3553 tm_p = &timeout; 3554 } 3555 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3556 } 3557 3558 static int 3559 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3560 { 3561 struct _umtx_time *tm_p, timeout; 3562 int error; 3563 3564 /* Allow a null timespec (wait forever). */ 3565 if (uap->uaddr2 == NULL) 3566 tm_p = NULL; 3567 else { 3568 error = umtx_copyin_umtx_time(uap->uaddr2, 3569 (size_t)uap->uaddr1, &timeout); 3570 if (error != 0) 3571 return (error); 3572 tm_p = &timeout; 3573 } 3574 return do_lock_umutex(td, uap->obj, tm_p, 0); 3575 } 3576 3577 static int 3578 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3579 { 3580 struct _umtx_time *tm_p, timeout; 3581 int error; 3582 3583 /* Allow a null timespec (wait forever). */ 3584 if (uap->uaddr2 == NULL) 3585 tm_p = NULL; 3586 else { 3587 error = umtx_copyin_umtx_time32(uap->uaddr2, 3588 (size_t)uap->uaddr1, &timeout); 3589 if (error != 0) 3590 return (error); 3591 tm_p = &timeout; 3592 } 3593 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3594 } 3595 3596 static int 3597 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3598 { 3599 struct timespec *ts, timeout; 3600 int error; 3601 3602 /* Allow a null timespec (wait forever). */ 3603 if (uap->uaddr2 == NULL) 3604 ts = NULL; 3605 else { 3606 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3607 if (error != 0) 3608 return (error); 3609 ts = &timeout; 3610 } 3611 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3612 } 3613 3614 static int 3615 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3616 { 3617 struct _umtx_time timeout; 3618 int error; 3619 3620 /* Allow a null timespec (wait forever). */ 3621 if (uap->uaddr2 == NULL) { 3622 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3623 } else { 3624 error = umtx_copyin_umtx_time32(uap->uaddr2, 3625 (size_t)uap->uaddr1, &timeout); 3626 if (error != 0) 3627 return (error); 3628 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3629 } 3630 return (error); 3631 } 3632 3633 static int 3634 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3635 { 3636 struct _umtx_time timeout; 3637 int error; 3638 3639 /* Allow a null timespec (wait forever). */ 3640 if (uap->uaddr2 == NULL) { 3641 error = do_rw_wrlock(td, uap->obj, 0); 3642 } else { 3643 error = umtx_copyin_umtx_time32(uap->uaddr2, 3644 (size_t)uap->uaddr1, &timeout); 3645 if (error != 0) 3646 return (error); 3647 error = do_rw_wrlock(td, uap->obj, &timeout); 3648 } 3649 return (error); 3650 } 3651 3652 static int 3653 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3654 { 3655 struct _umtx_time *tm_p, timeout; 3656 int error; 3657 3658 if (uap->uaddr2 == NULL) 3659 tm_p = NULL; 3660 else { 3661 error = umtx_copyin_umtx_time32( 3662 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3663 if (error != 0) 3664 return (error); 3665 tm_p = &timeout; 3666 } 3667 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3668 } 3669 3670 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3671 static int 3672 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3673 { 3674 struct _umtx_time *tm_p, timeout; 3675 int error; 3676 3677 /* Allow a null timespec (wait forever). */ 3678 if (uap->uaddr2 == NULL) 3679 tm_p = NULL; 3680 else { 3681 error = umtx_copyin_umtx_time32(uap->uaddr2, 3682 (size_t)uap->uaddr1, &timeout); 3683 if (error != 0) 3684 return (error); 3685 tm_p = &timeout; 3686 } 3687 return (do_sem_wait(td, uap->obj, tm_p)); 3688 } 3689 #endif 3690 3691 static int 3692 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3693 { 3694 struct _umtx_time *tm_p, timeout; 3695 int error; 3696 3697 /* Allow a null timespec (wait forever). */ 3698 if (uap->uaddr2 == NULL) 3699 tm_p = NULL; 3700 else { 3701 error = umtx_copyin_umtx_time32(uap->uaddr2, 3702 (size_t)uap->uaddr1, &timeout); 3703 if (error != 0) 3704 return (error); 3705 tm_p = &timeout; 3706 } 3707 return (do_sem2_wait(td, uap->obj, tm_p)); 3708 } 3709 3710 static int 3711 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3712 { 3713 int count = uap->val; 3714 uint32_t uaddrs[BATCH_SIZE]; 3715 uint32_t **upp = (uint32_t **)uap->obj; 3716 int tocopy; 3717 int error = 0; 3718 int i, pos = 0; 3719 3720 while (count > 0) { 3721 tocopy = count; 3722 if (tocopy > BATCH_SIZE) 3723 tocopy = BATCH_SIZE; 3724 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3725 if (error != 0) 3726 break; 3727 for (i = 0; i < tocopy; ++i) 3728 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3729 INT_MAX, 1); 3730 count -= tocopy; 3731 pos += tocopy; 3732 } 3733 return (error); 3734 } 3735 3736 static _umtx_op_func op_table_compat32[] = { 3737 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3738 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3739 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3740 __umtx_op_wake, /* UMTX_OP_WAKE */ 3741 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3742 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3743 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3744 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3745 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3746 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3747 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3748 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3749 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3750 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3751 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3752 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3753 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3754 __umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */ 3755 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3756 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3757 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3758 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3759 #else 3760 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3761 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3762 #endif 3763 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3764 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3765 __umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */ 3766 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3767 }; 3768 3769 int 3770 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3771 { 3772 if ((unsigned)uap->op < UMTX_OP_MAX) 3773 return (*op_table_compat32[uap->op])(td, 3774 (struct _umtx_op_args *)uap); 3775 return (EINVAL); 3776 } 3777 #endif 3778 3779 void 3780 umtx_thread_init(struct thread *td) 3781 { 3782 td->td_umtxq = umtxq_alloc(); 3783 td->td_umtxq->uq_thread = td; 3784 } 3785 3786 void 3787 umtx_thread_fini(struct thread *td) 3788 { 3789 umtxq_free(td->td_umtxq); 3790 } 3791 3792 /* 3793 * It will be called when new thread is created, e.g fork(). 3794 */ 3795 void 3796 umtx_thread_alloc(struct thread *td) 3797 { 3798 struct umtx_q *uq; 3799 3800 uq = td->td_umtxq; 3801 uq->uq_inherited_pri = PRI_MAX; 3802 3803 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3804 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3805 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3806 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3807 } 3808 3809 /* 3810 * exec() hook. 3811 */ 3812 static void 3813 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3814 struct image_params *imgp __unused) 3815 { 3816 umtx_thread_cleanup(curthread); 3817 } 3818 3819 /* 3820 * thread_exit() hook. 3821 */ 3822 void 3823 umtx_thread_exit(struct thread *td) 3824 { 3825 umtx_thread_cleanup(td); 3826 } 3827 3828 /* 3829 * clean up umtx data. 3830 */ 3831 static void 3832 umtx_thread_cleanup(struct thread *td) 3833 { 3834 struct umtx_q *uq; 3835 struct umtx_pi *pi; 3836 3837 if ((uq = td->td_umtxq) == NULL) 3838 return; 3839 3840 mtx_lock(&umtx_lock); 3841 uq->uq_inherited_pri = PRI_MAX; 3842 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3843 pi->pi_owner = NULL; 3844 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3845 } 3846 mtx_unlock(&umtx_lock); 3847 thread_lock(td); 3848 sched_lend_user_prio(td, PRI_MAX); 3849 thread_unlock(td); 3850 } 3851