1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy")) 173 174 /* 175 * Don't propagate time-sharing priority, there is a security reason, 176 * a user can simply introduce PI-mutex, let thread A lock the mutex, 177 * and let another thread B block on the mutex, because B is 178 * sleeping, its priority will be boosted, this causes A's priority to 179 * be boosted via priority propagating too and will never be lowered even 180 * if it is using 100%CPU, this is unfair to other processes. 181 */ 182 183 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 184 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 185 PRI_MAX_TIMESHARE : (td)->td_user_pri) 186 187 #define GOLDEN_RATIO_PRIME 2654404609U 188 #define UMTX_CHAINS 512 189 #define UMTX_SHIFTS (__WORD_BIT - 9) 190 191 #define GET_SHARE(flags) \ 192 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 193 194 #define BUSY_SPINS 200 195 196 struct abs_timeout { 197 int clockid; 198 struct timespec cur; 199 struct timespec end; 200 }; 201 202 static uma_zone_t umtx_pi_zone; 203 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 204 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 205 static int umtx_pi_allocated; 206 207 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 208 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 209 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 210 211 #ifdef UMTX_PROFILING 212 static long max_length; 213 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 214 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 215 #endif 216 217 static void umtxq_sysinit(void *); 218 static void umtxq_hash(struct umtx_key *key); 219 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 220 static void umtxq_lock(struct umtx_key *key); 221 static void umtxq_unlock(struct umtx_key *key); 222 static void umtxq_busy(struct umtx_key *key); 223 static void umtxq_unbusy(struct umtx_key *key); 224 static void umtxq_insert_queue(struct umtx_q *uq, int q); 225 static void umtxq_remove_queue(struct umtx_q *uq, int q); 226 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 227 static int umtxq_count(struct umtx_key *key); 228 static struct umtx_pi *umtx_pi_alloc(int); 229 static void umtx_pi_free(struct umtx_pi *pi); 230 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 231 static void umtx_thread_cleanup(struct thread *td); 232 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 233 struct image_params *imgp __unused); 234 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 235 236 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 237 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 238 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 239 240 static struct mtx umtx_lock; 241 242 #ifdef UMTX_PROFILING 243 static void 244 umtx_init_profiling(void) 245 { 246 struct sysctl_oid *chain_oid; 247 char chain_name[10]; 248 int i; 249 250 for (i = 0; i < UMTX_CHAINS; ++i) { 251 snprintf(chain_name, sizeof(chain_name), "%d", i); 252 chain_oid = SYSCTL_ADD_NODE(NULL, 253 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 254 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 255 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 256 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 257 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 258 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 259 } 260 } 261 262 static int 263 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 264 { 265 char buf[512]; 266 struct sbuf sb; 267 struct umtxq_chain *uc; 268 u_int fract, i, j, tot, whole; 269 u_int sf0, sf1, sf2, sf3, sf4; 270 u_int si0, si1, si2, si3, si4; 271 u_int sw0, sw1, sw2, sw3, sw4; 272 273 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 274 for (i = 0; i < 2; i++) { 275 tot = 0; 276 for (j = 0; j < UMTX_CHAINS; ++j) { 277 uc = &umtxq_chains[i][j]; 278 mtx_lock(&uc->uc_lock); 279 tot += uc->max_length; 280 mtx_unlock(&uc->uc_lock); 281 } 282 if (tot == 0) 283 sbuf_printf(&sb, "%u) Empty ", i); 284 else { 285 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 286 si0 = si1 = si2 = si3 = si4 = 0; 287 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 288 for (j = 0; j < UMTX_CHAINS; j++) { 289 uc = &umtxq_chains[i][j]; 290 mtx_lock(&uc->uc_lock); 291 whole = uc->max_length * 100; 292 mtx_unlock(&uc->uc_lock); 293 fract = (whole % tot) * 100; 294 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 295 sf0 = fract; 296 si0 = j; 297 sw0 = whole; 298 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 299 sf1)) { 300 sf1 = fract; 301 si1 = j; 302 sw1 = whole; 303 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 304 sf2)) { 305 sf2 = fract; 306 si2 = j; 307 sw2 = whole; 308 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 309 sf3)) { 310 sf3 = fract; 311 si3 = j; 312 sw3 = whole; 313 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 314 sf4)) { 315 sf4 = fract; 316 si4 = j; 317 sw4 = whole; 318 } 319 } 320 sbuf_printf(&sb, "queue %u:\n", i); 321 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 322 sf0 / tot, si0); 323 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 324 sf1 / tot, si1); 325 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 326 sf2 / tot, si2); 327 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 328 sf3 / tot, si3); 329 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 330 sf4 / tot, si4); 331 } 332 } 333 sbuf_trim(&sb); 334 sbuf_finish(&sb); 335 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 336 sbuf_delete(&sb); 337 return (0); 338 } 339 340 static int 341 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 342 { 343 struct umtxq_chain *uc; 344 u_int i, j; 345 int clear, error; 346 347 clear = 0; 348 error = sysctl_handle_int(oidp, &clear, 0, req); 349 if (error != 0 || req->newptr == NULL) 350 return (error); 351 352 if (clear != 0) { 353 for (i = 0; i < 2; ++i) { 354 for (j = 0; j < UMTX_CHAINS; ++j) { 355 uc = &umtxq_chains[i][j]; 356 mtx_lock(&uc->uc_lock); 357 uc->length = 0; 358 uc->max_length = 0; 359 mtx_unlock(&uc->uc_lock); 360 } 361 } 362 } 363 return (0); 364 } 365 366 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 367 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 368 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 369 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 370 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 371 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 372 #endif 373 374 static void 375 umtxq_sysinit(void *arg __unused) 376 { 377 int i, j; 378 379 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 380 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 381 for (i = 0; i < 2; ++i) { 382 for (j = 0; j < UMTX_CHAINS; ++j) { 383 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 384 MTX_DEF | MTX_DUPOK); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 386 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 387 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 388 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 389 umtxq_chains[i][j].uc_busy = 0; 390 umtxq_chains[i][j].uc_waiters = 0; 391 #ifdef UMTX_PROFILING 392 umtxq_chains[i][j].length = 0; 393 umtxq_chains[i][j].max_length = 0; 394 #endif 395 } 396 } 397 #ifdef UMTX_PROFILING 398 umtx_init_profiling(); 399 #endif 400 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 401 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 402 EVENTHANDLER_PRI_ANY); 403 } 404 405 struct umtx_q * 406 umtxq_alloc(void) 407 { 408 struct umtx_q *uq; 409 410 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 411 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 412 TAILQ_INIT(&uq->uq_spare_queue->head); 413 TAILQ_INIT(&uq->uq_pi_contested); 414 uq->uq_inherited_pri = PRI_MAX; 415 return (uq); 416 } 417 418 void 419 umtxq_free(struct umtx_q *uq) 420 { 421 MPASS(uq->uq_spare_queue != NULL); 422 free(uq->uq_spare_queue, M_UMTX); 423 free(uq, M_UMTX); 424 } 425 426 static inline void 427 umtxq_hash(struct umtx_key *key) 428 { 429 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 430 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 431 } 432 433 static inline struct umtxq_chain * 434 umtxq_getchain(struct umtx_key *key) 435 { 436 if (key->type <= TYPE_SEM) 437 return (&umtxq_chains[1][key->hash]); 438 return (&umtxq_chains[0][key->hash]); 439 } 440 441 /* 442 * Lock a chain. 443 */ 444 static inline void 445 umtxq_lock(struct umtx_key *key) 446 { 447 struct umtxq_chain *uc; 448 449 uc = umtxq_getchain(key); 450 mtx_lock(&uc->uc_lock); 451 } 452 453 /* 454 * Unlock a chain. 455 */ 456 static inline void 457 umtxq_unlock(struct umtx_key *key) 458 { 459 struct umtxq_chain *uc; 460 461 uc = umtxq_getchain(key); 462 mtx_unlock(&uc->uc_lock); 463 } 464 465 /* 466 * Set chain to busy state when following operation 467 * may be blocked (kernel mutex can not be used). 468 */ 469 static inline void 470 umtxq_busy(struct umtx_key *key) 471 { 472 struct umtxq_chain *uc; 473 474 uc = umtxq_getchain(key); 475 mtx_assert(&uc->uc_lock, MA_OWNED); 476 if (uc->uc_busy) { 477 #ifdef SMP 478 if (smp_cpus > 1) { 479 int count = BUSY_SPINS; 480 if (count > 0) { 481 umtxq_unlock(key); 482 while (uc->uc_busy && --count > 0) 483 cpu_spinwait(); 484 umtxq_lock(key); 485 } 486 } 487 #endif 488 while (uc->uc_busy) { 489 uc->uc_waiters++; 490 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 491 uc->uc_waiters--; 492 } 493 } 494 uc->uc_busy = 1; 495 } 496 497 /* 498 * Unbusy a chain. 499 */ 500 static inline void 501 umtxq_unbusy(struct umtx_key *key) 502 { 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_assert(&uc->uc_lock, MA_OWNED); 507 KASSERT(uc->uc_busy != 0, ("not busy")); 508 uc->uc_busy = 0; 509 if (uc->uc_waiters) 510 wakeup_one(uc); 511 } 512 513 static inline void 514 umtxq_unbusy_unlocked(struct umtx_key *key) 515 { 516 517 umtxq_lock(key); 518 umtxq_unbusy(key); 519 umtxq_unlock(key); 520 } 521 522 static struct umtxq_queue * 523 umtxq_queue_lookup(struct umtx_key *key, int q) 524 { 525 struct umtxq_queue *uh; 526 struct umtxq_chain *uc; 527 528 uc = umtxq_getchain(key); 529 UMTXQ_LOCKED_ASSERT(uc); 530 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 531 if (umtx_key_match(&uh->key, key)) 532 return (uh); 533 } 534 535 return (NULL); 536 } 537 538 static inline void 539 umtxq_insert_queue(struct umtx_q *uq, int q) 540 { 541 struct umtxq_queue *uh; 542 struct umtxq_chain *uc; 543 544 uc = umtxq_getchain(&uq->uq_key); 545 UMTXQ_LOCKED_ASSERT(uc); 546 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 547 uh = umtxq_queue_lookup(&uq->uq_key, q); 548 if (uh != NULL) { 549 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 550 } else { 551 uh = uq->uq_spare_queue; 552 uh->key = uq->uq_key; 553 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 554 #ifdef UMTX_PROFILING 555 uc->length++; 556 if (uc->length > uc->max_length) { 557 uc->max_length = uc->length; 558 if (uc->max_length > max_length) 559 max_length = uc->max_length; 560 } 561 #endif 562 } 563 uq->uq_spare_queue = NULL; 564 565 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 566 uh->length++; 567 uq->uq_flags |= UQF_UMTXQ; 568 uq->uq_cur_queue = uh; 569 return; 570 } 571 572 static inline void 573 umtxq_remove_queue(struct umtx_q *uq, int q) 574 { 575 struct umtxq_chain *uc; 576 struct umtxq_queue *uh; 577 578 uc = umtxq_getchain(&uq->uq_key); 579 UMTXQ_LOCKED_ASSERT(uc); 580 if (uq->uq_flags & UQF_UMTXQ) { 581 uh = uq->uq_cur_queue; 582 TAILQ_REMOVE(&uh->head, uq, uq_link); 583 uh->length--; 584 uq->uq_flags &= ~UQF_UMTXQ; 585 if (TAILQ_EMPTY(&uh->head)) { 586 KASSERT(uh->length == 0, 587 ("inconsistent umtxq_queue length")); 588 #ifdef UMTX_PROFILING 589 uc->length--; 590 #endif 591 LIST_REMOVE(uh, link); 592 } else { 593 uh = LIST_FIRST(&uc->uc_spare_queue); 594 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 595 LIST_REMOVE(uh, link); 596 } 597 uq->uq_spare_queue = uh; 598 uq->uq_cur_queue = NULL; 599 } 600 } 601 602 /* 603 * Check if there are multiple waiters 604 */ 605 static int 606 umtxq_count(struct umtx_key *key) 607 { 608 struct umtxq_chain *uc; 609 struct umtxq_queue *uh; 610 611 uc = umtxq_getchain(key); 612 UMTXQ_LOCKED_ASSERT(uc); 613 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 614 if (uh != NULL) 615 return (uh->length); 616 return (0); 617 } 618 619 /* 620 * Check if there are multiple PI waiters and returns first 621 * waiter. 622 */ 623 static int 624 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 625 { 626 struct umtxq_chain *uc; 627 struct umtxq_queue *uh; 628 629 *first = NULL; 630 uc = umtxq_getchain(key); 631 UMTXQ_LOCKED_ASSERT(uc); 632 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 633 if (uh != NULL) { 634 *first = TAILQ_FIRST(&uh->head); 635 return (uh->length); 636 } 637 return (0); 638 } 639 640 static int 641 umtxq_check_susp(struct thread *td) 642 { 643 struct proc *p; 644 int error; 645 646 /* 647 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 648 * eventually break the lockstep loop. 649 */ 650 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 651 return (0); 652 error = 0; 653 p = td->td_proc; 654 PROC_LOCK(p); 655 if (P_SHOULDSTOP(p) || 656 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 657 if (p->p_flag & P_SINGLE_EXIT) 658 error = EINTR; 659 else 660 error = ERESTART; 661 } 662 PROC_UNLOCK(p); 663 return (error); 664 } 665 666 /* 667 * Wake up threads waiting on an userland object. 668 */ 669 670 static int 671 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 672 { 673 struct umtxq_chain *uc; 674 struct umtxq_queue *uh; 675 struct umtx_q *uq; 676 int ret; 677 678 ret = 0; 679 uc = umtxq_getchain(key); 680 UMTXQ_LOCKED_ASSERT(uc); 681 uh = umtxq_queue_lookup(key, q); 682 if (uh != NULL) { 683 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 684 umtxq_remove_queue(uq, q); 685 wakeup(uq); 686 if (++ret >= n_wake) 687 return (ret); 688 } 689 } 690 return (ret); 691 } 692 693 694 /* 695 * Wake up specified thread. 696 */ 697 static inline void 698 umtxq_signal_thread(struct umtx_q *uq) 699 { 700 struct umtxq_chain *uc; 701 702 uc = umtxq_getchain(&uq->uq_key); 703 UMTXQ_LOCKED_ASSERT(uc); 704 umtxq_remove(uq); 705 wakeup(uq); 706 } 707 708 static inline int 709 tstohz(const struct timespec *tsp) 710 { 711 struct timeval tv; 712 713 TIMESPEC_TO_TIMEVAL(&tv, tsp); 714 return tvtohz(&tv); 715 } 716 717 static void 718 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 719 const struct timespec *timeout) 720 { 721 722 timo->clockid = clockid; 723 if (!absolute) { 724 kern_clock_gettime(curthread, clockid, &timo->end); 725 timo->cur = timo->end; 726 timespecadd(&timo->end, timeout); 727 } else { 728 timo->end = *timeout; 729 kern_clock_gettime(curthread, clockid, &timo->cur); 730 } 731 } 732 733 static void 734 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 735 { 736 737 abs_timeout_init(timo, umtxtime->_clockid, 738 (umtxtime->_flags & UMTX_ABSTIME) != 0, 739 &umtxtime->_timeout); 740 } 741 742 static inline void 743 abs_timeout_update(struct abs_timeout *timo) 744 { 745 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 746 } 747 748 static int 749 abs_timeout_gethz(struct abs_timeout *timo) 750 { 751 struct timespec tts; 752 753 if (timespeccmp(&timo->end, &timo->cur, <=)) 754 return (-1); 755 tts = timo->end; 756 timespecsub(&tts, &timo->cur); 757 return (tstohz(&tts)); 758 } 759 760 /* 761 * Put thread into sleep state, before sleeping, check if 762 * thread was removed from umtx queue. 763 */ 764 static inline int 765 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 766 { 767 struct umtxq_chain *uc; 768 int error, timo; 769 770 uc = umtxq_getchain(&uq->uq_key); 771 UMTXQ_LOCKED_ASSERT(uc); 772 for (;;) { 773 if (!(uq->uq_flags & UQF_UMTXQ)) 774 return (0); 775 if (abstime != NULL) { 776 timo = abs_timeout_gethz(abstime); 777 if (timo < 0) 778 return (ETIMEDOUT); 779 } else 780 timo = 0; 781 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 782 if (error != EWOULDBLOCK) { 783 umtxq_lock(&uq->uq_key); 784 break; 785 } 786 if (abstime != NULL) 787 abs_timeout_update(abstime); 788 umtxq_lock(&uq->uq_key); 789 } 790 return (error); 791 } 792 793 /* 794 * Convert userspace address into unique logical address. 795 */ 796 int 797 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 798 { 799 struct thread *td = curthread; 800 vm_map_t map; 801 vm_map_entry_t entry; 802 vm_pindex_t pindex; 803 vm_prot_t prot; 804 boolean_t wired; 805 806 key->type = type; 807 if (share == THREAD_SHARE) { 808 key->shared = 0; 809 key->info.private.vs = td->td_proc->p_vmspace; 810 key->info.private.addr = (uintptr_t)addr; 811 } else { 812 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 813 map = &td->td_proc->p_vmspace->vm_map; 814 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 815 &entry, &key->info.shared.object, &pindex, &prot, 816 &wired) != KERN_SUCCESS) { 817 return EFAULT; 818 } 819 820 if ((share == PROCESS_SHARE) || 821 (share == AUTO_SHARE && 822 VM_INHERIT_SHARE == entry->inheritance)) { 823 key->shared = 1; 824 key->info.shared.offset = entry->offset + entry->start - 825 (vm_offset_t)addr; 826 vm_object_reference(key->info.shared.object); 827 } else { 828 key->shared = 0; 829 key->info.private.vs = td->td_proc->p_vmspace; 830 key->info.private.addr = (uintptr_t)addr; 831 } 832 vm_map_lookup_done(map, entry); 833 } 834 835 umtxq_hash(key); 836 return (0); 837 } 838 839 /* 840 * Release key. 841 */ 842 void 843 umtx_key_release(struct umtx_key *key) 844 { 845 if (key->shared) 846 vm_object_deallocate(key->info.shared.object); 847 } 848 849 /* 850 * Fetch and compare value, sleep on the address if value is not changed. 851 */ 852 static int 853 do_wait(struct thread *td, void *addr, u_long id, 854 struct _umtx_time *timeout, int compat32, int is_private) 855 { 856 struct abs_timeout timo; 857 struct umtx_q *uq; 858 u_long tmp; 859 uint32_t tmp32; 860 int error = 0; 861 862 uq = td->td_umtxq; 863 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 864 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 865 return (error); 866 867 if (timeout != NULL) 868 abs_timeout_init2(&timo, timeout); 869 870 umtxq_lock(&uq->uq_key); 871 umtxq_insert(uq); 872 umtxq_unlock(&uq->uq_key); 873 if (compat32 == 0) { 874 error = fueword(addr, &tmp); 875 if (error != 0) 876 error = EFAULT; 877 } else { 878 error = fueword32(addr, &tmp32); 879 if (error == 0) 880 tmp = tmp32; 881 else 882 error = EFAULT; 883 } 884 umtxq_lock(&uq->uq_key); 885 if (error == 0) { 886 if (tmp == id) 887 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 888 NULL : &timo); 889 if ((uq->uq_flags & UQF_UMTXQ) == 0) 890 error = 0; 891 else 892 umtxq_remove(uq); 893 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 894 umtxq_remove(uq); 895 } 896 umtxq_unlock(&uq->uq_key); 897 umtx_key_release(&uq->uq_key); 898 if (error == ERESTART) 899 error = EINTR; 900 return (error); 901 } 902 903 /* 904 * Wake up threads sleeping on the specified address. 905 */ 906 int 907 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 908 { 909 struct umtx_key key; 910 int ret; 911 912 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 913 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 914 return (ret); 915 umtxq_lock(&key); 916 ret = umtxq_signal(&key, n_wake); 917 umtxq_unlock(&key); 918 umtx_key_release(&key); 919 return (0); 920 } 921 922 /* 923 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 924 */ 925 static int 926 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 927 struct _umtx_time *timeout, int mode) 928 { 929 struct abs_timeout timo; 930 struct umtx_q *uq; 931 uint32_t owner, old, id; 932 int error, rv; 933 934 id = td->td_tid; 935 uq = td->td_umtxq; 936 error = 0; 937 if (timeout != NULL) 938 abs_timeout_init2(&timo, timeout); 939 940 /* 941 * Care must be exercised when dealing with umtx structure. It 942 * can fault on any access. 943 */ 944 for (;;) { 945 rv = fueword32(&m->m_owner, &owner); 946 if (rv == -1) 947 return (EFAULT); 948 if (mode == _UMUTEX_WAIT) { 949 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 950 return (0); 951 } else { 952 /* 953 * Try the uncontested case. This should be done in userland. 954 */ 955 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 956 &owner, id); 957 /* The address was invalid. */ 958 if (rv == -1) 959 return (EFAULT); 960 961 /* The acquire succeeded. */ 962 if (owner == UMUTEX_UNOWNED) 963 return (0); 964 965 /* If no one owns it but it is contested try to acquire it. */ 966 if (owner == UMUTEX_CONTESTED) { 967 rv = casueword32(&m->m_owner, 968 UMUTEX_CONTESTED, &owner, 969 id | UMUTEX_CONTESTED); 970 /* The address was invalid. */ 971 if (rv == -1) 972 return (EFAULT); 973 974 if (owner == UMUTEX_CONTESTED) 975 return (0); 976 977 rv = umtxq_check_susp(td); 978 if (rv != 0) 979 return (rv); 980 981 /* If this failed the lock has changed, restart. */ 982 continue; 983 } 984 } 985 986 if (mode == _UMUTEX_TRY) 987 return (EBUSY); 988 989 /* 990 * If we caught a signal, we have retried and now 991 * exit immediately. 992 */ 993 if (error != 0) 994 return (error); 995 996 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 997 GET_SHARE(flags), &uq->uq_key)) != 0) 998 return (error); 999 1000 umtxq_lock(&uq->uq_key); 1001 umtxq_busy(&uq->uq_key); 1002 umtxq_insert(uq); 1003 umtxq_unlock(&uq->uq_key); 1004 1005 /* 1006 * Set the contested bit so that a release in user space 1007 * knows to use the system call for unlock. If this fails 1008 * either some one else has acquired the lock or it has been 1009 * released. 1010 */ 1011 rv = casueword32(&m->m_owner, owner, &old, 1012 owner | UMUTEX_CONTESTED); 1013 1014 /* The address was invalid. */ 1015 if (rv == -1) { 1016 umtxq_lock(&uq->uq_key); 1017 umtxq_remove(uq); 1018 umtxq_unbusy(&uq->uq_key); 1019 umtxq_unlock(&uq->uq_key); 1020 umtx_key_release(&uq->uq_key); 1021 return (EFAULT); 1022 } 1023 1024 /* 1025 * We set the contested bit, sleep. Otherwise the lock changed 1026 * and we need to retry or we lost a race to the thread 1027 * unlocking the umtx. 1028 */ 1029 umtxq_lock(&uq->uq_key); 1030 umtxq_unbusy(&uq->uq_key); 1031 if (old == owner) 1032 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1033 NULL : &timo); 1034 umtxq_remove(uq); 1035 umtxq_unlock(&uq->uq_key); 1036 umtx_key_release(&uq->uq_key); 1037 1038 if (error == 0) 1039 error = umtxq_check_susp(td); 1040 } 1041 1042 return (0); 1043 } 1044 1045 /* 1046 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1047 */ 1048 static int 1049 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1050 { 1051 struct umtx_key key; 1052 uint32_t owner, old, id; 1053 int error; 1054 int count; 1055 1056 id = td->td_tid; 1057 /* 1058 * Make sure we own this mtx. 1059 */ 1060 error = fueword32(&m->m_owner, &owner); 1061 if (error == -1) 1062 return (EFAULT); 1063 1064 if ((owner & ~UMUTEX_CONTESTED) != id) 1065 return (EPERM); 1066 1067 if ((owner & UMUTEX_CONTESTED) == 0) { 1068 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1069 if (error == -1) 1070 return (EFAULT); 1071 if (old == owner) 1072 return (0); 1073 owner = old; 1074 } 1075 1076 /* We should only ever be in here for contested locks */ 1077 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1078 &key)) != 0) 1079 return (error); 1080 1081 umtxq_lock(&key); 1082 umtxq_busy(&key); 1083 count = umtxq_count(&key); 1084 umtxq_unlock(&key); 1085 1086 /* 1087 * When unlocking the umtx, it must be marked as unowned if 1088 * there is zero or one thread only waiting for it. 1089 * Otherwise, it must be marked as contested. 1090 */ 1091 error = casueword32(&m->m_owner, owner, &old, 1092 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1093 umtxq_lock(&key); 1094 umtxq_signal(&key,1); 1095 umtxq_unbusy(&key); 1096 umtxq_unlock(&key); 1097 umtx_key_release(&key); 1098 if (error == -1) 1099 return (EFAULT); 1100 if (old != owner) 1101 return (EINVAL); 1102 return (0); 1103 } 1104 1105 /* 1106 * Check if the mutex is available and wake up a waiter, 1107 * only for simple mutex. 1108 */ 1109 static int 1110 do_wake_umutex(struct thread *td, struct umutex *m) 1111 { 1112 struct umtx_key key; 1113 uint32_t owner; 1114 uint32_t flags; 1115 int error; 1116 int count; 1117 1118 error = fueword32(&m->m_owner, &owner); 1119 if (error == -1) 1120 return (EFAULT); 1121 1122 if ((owner & ~UMUTEX_CONTESTED) != 0) 1123 return (0); 1124 1125 error = fueword32(&m->m_flags, &flags); 1126 if (error == -1) 1127 return (EFAULT); 1128 1129 /* We should only ever be in here for contested locks */ 1130 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1131 &key)) != 0) 1132 return (error); 1133 1134 umtxq_lock(&key); 1135 umtxq_busy(&key); 1136 count = umtxq_count(&key); 1137 umtxq_unlock(&key); 1138 1139 if (count <= 1) { 1140 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1141 UMUTEX_UNOWNED); 1142 if (error == -1) 1143 error = EFAULT; 1144 } 1145 1146 umtxq_lock(&key); 1147 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1148 umtxq_signal(&key, 1); 1149 umtxq_unbusy(&key); 1150 umtxq_unlock(&key); 1151 umtx_key_release(&key); 1152 return (error); 1153 } 1154 1155 /* 1156 * Check if the mutex has waiters and tries to fix contention bit. 1157 */ 1158 static int 1159 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1160 { 1161 struct umtx_key key; 1162 uint32_t owner, old; 1163 int type; 1164 int error; 1165 int count; 1166 1167 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1168 case 0: 1169 type = TYPE_NORMAL_UMUTEX; 1170 break; 1171 case UMUTEX_PRIO_INHERIT: 1172 type = TYPE_PI_UMUTEX; 1173 break; 1174 case UMUTEX_PRIO_PROTECT: 1175 type = TYPE_PP_UMUTEX; 1176 break; 1177 default: 1178 return (EINVAL); 1179 } 1180 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1181 &key)) != 0) 1182 return (error); 1183 1184 owner = 0; 1185 umtxq_lock(&key); 1186 umtxq_busy(&key); 1187 count = umtxq_count(&key); 1188 umtxq_unlock(&key); 1189 /* 1190 * Only repair contention bit if there is a waiter, this means the mutex 1191 * is still being referenced by userland code, otherwise don't update 1192 * any memory. 1193 */ 1194 if (count > 1) { 1195 error = fueword32(&m->m_owner, &owner); 1196 if (error == -1) 1197 error = EFAULT; 1198 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1199 error = casueword32(&m->m_owner, owner, &old, 1200 owner | UMUTEX_CONTESTED); 1201 if (error == -1) { 1202 error = EFAULT; 1203 break; 1204 } 1205 if (old == owner) 1206 break; 1207 owner = old; 1208 error = umtxq_check_susp(td); 1209 if (error != 0) 1210 break; 1211 } 1212 } else if (count == 1) { 1213 error = fueword32(&m->m_owner, &owner); 1214 if (error == -1) 1215 error = EFAULT; 1216 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1217 (owner & UMUTEX_CONTESTED) == 0) { 1218 error = casueword32(&m->m_owner, owner, &old, 1219 owner | UMUTEX_CONTESTED); 1220 if (error == -1) { 1221 error = EFAULT; 1222 break; 1223 } 1224 if (old == owner) 1225 break; 1226 owner = old; 1227 error = umtxq_check_susp(td); 1228 if (error != 0) 1229 break; 1230 } 1231 } 1232 umtxq_lock(&key); 1233 if (error == EFAULT) { 1234 umtxq_signal(&key, INT_MAX); 1235 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1236 umtxq_signal(&key, 1); 1237 umtxq_unbusy(&key); 1238 umtxq_unlock(&key); 1239 umtx_key_release(&key); 1240 return (error); 1241 } 1242 1243 static inline struct umtx_pi * 1244 umtx_pi_alloc(int flags) 1245 { 1246 struct umtx_pi *pi; 1247 1248 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1249 TAILQ_INIT(&pi->pi_blocked); 1250 atomic_add_int(&umtx_pi_allocated, 1); 1251 return (pi); 1252 } 1253 1254 static inline void 1255 umtx_pi_free(struct umtx_pi *pi) 1256 { 1257 uma_zfree(umtx_pi_zone, pi); 1258 atomic_add_int(&umtx_pi_allocated, -1); 1259 } 1260 1261 /* 1262 * Adjust the thread's position on a pi_state after its priority has been 1263 * changed. 1264 */ 1265 static int 1266 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1267 { 1268 struct umtx_q *uq, *uq1, *uq2; 1269 struct thread *td1; 1270 1271 mtx_assert(&umtx_lock, MA_OWNED); 1272 if (pi == NULL) 1273 return (0); 1274 1275 uq = td->td_umtxq; 1276 1277 /* 1278 * Check if the thread needs to be moved on the blocked chain. 1279 * It needs to be moved if either its priority is lower than 1280 * the previous thread or higher than the next thread. 1281 */ 1282 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1283 uq2 = TAILQ_NEXT(uq, uq_lockq); 1284 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1285 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1286 /* 1287 * Remove thread from blocked chain and determine where 1288 * it should be moved to. 1289 */ 1290 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1291 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1292 td1 = uq1->uq_thread; 1293 MPASS(td1->td_proc->p_magic == P_MAGIC); 1294 if (UPRI(td1) > UPRI(td)) 1295 break; 1296 } 1297 1298 if (uq1 == NULL) 1299 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1300 else 1301 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1302 } 1303 return (1); 1304 } 1305 1306 /* 1307 * Propagate priority when a thread is blocked on POSIX 1308 * PI mutex. 1309 */ 1310 static void 1311 umtx_propagate_priority(struct thread *td) 1312 { 1313 struct umtx_q *uq; 1314 struct umtx_pi *pi; 1315 int pri; 1316 1317 mtx_assert(&umtx_lock, MA_OWNED); 1318 pri = UPRI(td); 1319 uq = td->td_umtxq; 1320 pi = uq->uq_pi_blocked; 1321 if (pi == NULL) 1322 return; 1323 1324 for (;;) { 1325 td = pi->pi_owner; 1326 if (td == NULL || td == curthread) 1327 return; 1328 1329 MPASS(td->td_proc != NULL); 1330 MPASS(td->td_proc->p_magic == P_MAGIC); 1331 1332 thread_lock(td); 1333 if (td->td_lend_user_pri > pri) 1334 sched_lend_user_prio(td, pri); 1335 else { 1336 thread_unlock(td); 1337 break; 1338 } 1339 thread_unlock(td); 1340 1341 /* 1342 * Pick up the lock that td is blocked on. 1343 */ 1344 uq = td->td_umtxq; 1345 pi = uq->uq_pi_blocked; 1346 if (pi == NULL) 1347 break; 1348 /* Resort td on the list if needed. */ 1349 umtx_pi_adjust_thread(pi, td); 1350 } 1351 } 1352 1353 /* 1354 * Unpropagate priority for a PI mutex when a thread blocked on 1355 * it is interrupted by signal or resumed by others. 1356 */ 1357 static void 1358 umtx_repropagate_priority(struct umtx_pi *pi) 1359 { 1360 struct umtx_q *uq, *uq_owner; 1361 struct umtx_pi *pi2; 1362 int pri; 1363 1364 mtx_assert(&umtx_lock, MA_OWNED); 1365 1366 while (pi != NULL && pi->pi_owner != NULL) { 1367 pri = PRI_MAX; 1368 uq_owner = pi->pi_owner->td_umtxq; 1369 1370 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1371 uq = TAILQ_FIRST(&pi2->pi_blocked); 1372 if (uq != NULL) { 1373 if (pri > UPRI(uq->uq_thread)) 1374 pri = UPRI(uq->uq_thread); 1375 } 1376 } 1377 1378 if (pri > uq_owner->uq_inherited_pri) 1379 pri = uq_owner->uq_inherited_pri; 1380 thread_lock(pi->pi_owner); 1381 sched_lend_user_prio(pi->pi_owner, pri); 1382 thread_unlock(pi->pi_owner); 1383 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1384 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1385 } 1386 } 1387 1388 /* 1389 * Insert a PI mutex into owned list. 1390 */ 1391 static void 1392 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1393 { 1394 struct umtx_q *uq_owner; 1395 1396 uq_owner = owner->td_umtxq; 1397 mtx_assert(&umtx_lock, MA_OWNED); 1398 if (pi->pi_owner != NULL) 1399 panic("pi_ower != NULL"); 1400 pi->pi_owner = owner; 1401 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1402 } 1403 1404 /* 1405 * Claim ownership of a PI mutex. 1406 */ 1407 static int 1408 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1409 { 1410 struct umtx_q *uq, *uq_owner; 1411 1412 uq_owner = owner->td_umtxq; 1413 mtx_lock_spin(&umtx_lock); 1414 if (pi->pi_owner == owner) { 1415 mtx_unlock_spin(&umtx_lock); 1416 return (0); 1417 } 1418 1419 if (pi->pi_owner != NULL) { 1420 /* 1421 * userland may have already messed the mutex, sigh. 1422 */ 1423 mtx_unlock_spin(&umtx_lock); 1424 return (EPERM); 1425 } 1426 umtx_pi_setowner(pi, owner); 1427 uq = TAILQ_FIRST(&pi->pi_blocked); 1428 if (uq != NULL) { 1429 int pri; 1430 1431 pri = UPRI(uq->uq_thread); 1432 thread_lock(owner); 1433 if (pri < UPRI(owner)) 1434 sched_lend_user_prio(owner, pri); 1435 thread_unlock(owner); 1436 } 1437 mtx_unlock_spin(&umtx_lock); 1438 return (0); 1439 } 1440 1441 /* 1442 * Adjust a thread's order position in its blocked PI mutex, 1443 * this may result new priority propagating process. 1444 */ 1445 void 1446 umtx_pi_adjust(struct thread *td, u_char oldpri) 1447 { 1448 struct umtx_q *uq; 1449 struct umtx_pi *pi; 1450 1451 uq = td->td_umtxq; 1452 mtx_lock_spin(&umtx_lock); 1453 /* 1454 * Pick up the lock that td is blocked on. 1455 */ 1456 pi = uq->uq_pi_blocked; 1457 if (pi != NULL) { 1458 umtx_pi_adjust_thread(pi, td); 1459 umtx_repropagate_priority(pi); 1460 } 1461 mtx_unlock_spin(&umtx_lock); 1462 } 1463 1464 /* 1465 * Sleep on a PI mutex. 1466 */ 1467 static int 1468 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1469 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1470 { 1471 struct umtxq_chain *uc; 1472 struct thread *td, *td1; 1473 struct umtx_q *uq1; 1474 int pri; 1475 int error = 0; 1476 1477 td = uq->uq_thread; 1478 KASSERT(td == curthread, ("inconsistent uq_thread")); 1479 uc = umtxq_getchain(&uq->uq_key); 1480 UMTXQ_LOCKED_ASSERT(uc); 1481 UMTXQ_BUSY_ASSERT(uc); 1482 umtxq_insert(uq); 1483 mtx_lock_spin(&umtx_lock); 1484 if (pi->pi_owner == NULL) { 1485 mtx_unlock_spin(&umtx_lock); 1486 /* XXX Only look up thread in current process. */ 1487 td1 = tdfind(owner, curproc->p_pid); 1488 mtx_lock_spin(&umtx_lock); 1489 if (td1 != NULL) { 1490 if (pi->pi_owner == NULL) 1491 umtx_pi_setowner(pi, td1); 1492 PROC_UNLOCK(td1->td_proc); 1493 } 1494 } 1495 1496 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1497 pri = UPRI(uq1->uq_thread); 1498 if (pri > UPRI(td)) 1499 break; 1500 } 1501 1502 if (uq1 != NULL) 1503 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1504 else 1505 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1506 1507 uq->uq_pi_blocked = pi; 1508 thread_lock(td); 1509 td->td_flags |= TDF_UPIBLOCKED; 1510 thread_unlock(td); 1511 umtx_propagate_priority(td); 1512 mtx_unlock_spin(&umtx_lock); 1513 umtxq_unbusy(&uq->uq_key); 1514 1515 error = umtxq_sleep(uq, wmesg, timo); 1516 umtxq_remove(uq); 1517 1518 mtx_lock_spin(&umtx_lock); 1519 uq->uq_pi_blocked = NULL; 1520 thread_lock(td); 1521 td->td_flags &= ~TDF_UPIBLOCKED; 1522 thread_unlock(td); 1523 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1524 umtx_repropagate_priority(pi); 1525 mtx_unlock_spin(&umtx_lock); 1526 umtxq_unlock(&uq->uq_key); 1527 1528 return (error); 1529 } 1530 1531 /* 1532 * Add reference count for a PI mutex. 1533 */ 1534 static void 1535 umtx_pi_ref(struct umtx_pi *pi) 1536 { 1537 struct umtxq_chain *uc; 1538 1539 uc = umtxq_getchain(&pi->pi_key); 1540 UMTXQ_LOCKED_ASSERT(uc); 1541 pi->pi_refcount++; 1542 } 1543 1544 /* 1545 * Decrease reference count for a PI mutex, if the counter 1546 * is decreased to zero, its memory space is freed. 1547 */ 1548 static void 1549 umtx_pi_unref(struct umtx_pi *pi) 1550 { 1551 struct umtxq_chain *uc; 1552 1553 uc = umtxq_getchain(&pi->pi_key); 1554 UMTXQ_LOCKED_ASSERT(uc); 1555 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1556 if (--pi->pi_refcount == 0) { 1557 mtx_lock_spin(&umtx_lock); 1558 if (pi->pi_owner != NULL) { 1559 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1560 pi, pi_link); 1561 pi->pi_owner = NULL; 1562 } 1563 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1564 ("blocked queue not empty")); 1565 mtx_unlock_spin(&umtx_lock); 1566 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1567 umtx_pi_free(pi); 1568 } 1569 } 1570 1571 /* 1572 * Find a PI mutex in hash table. 1573 */ 1574 static struct umtx_pi * 1575 umtx_pi_lookup(struct umtx_key *key) 1576 { 1577 struct umtxq_chain *uc; 1578 struct umtx_pi *pi; 1579 1580 uc = umtxq_getchain(key); 1581 UMTXQ_LOCKED_ASSERT(uc); 1582 1583 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1584 if (umtx_key_match(&pi->pi_key, key)) { 1585 return (pi); 1586 } 1587 } 1588 return (NULL); 1589 } 1590 1591 /* 1592 * Insert a PI mutex into hash table. 1593 */ 1594 static inline void 1595 umtx_pi_insert(struct umtx_pi *pi) 1596 { 1597 struct umtxq_chain *uc; 1598 1599 uc = umtxq_getchain(&pi->pi_key); 1600 UMTXQ_LOCKED_ASSERT(uc); 1601 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1602 } 1603 1604 /* 1605 * Lock a PI mutex. 1606 */ 1607 static int 1608 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1609 struct _umtx_time *timeout, int try) 1610 { 1611 struct abs_timeout timo; 1612 struct umtx_q *uq; 1613 struct umtx_pi *pi, *new_pi; 1614 uint32_t id, owner, old; 1615 int error, rv; 1616 1617 id = td->td_tid; 1618 uq = td->td_umtxq; 1619 1620 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1621 &uq->uq_key)) != 0) 1622 return (error); 1623 1624 if (timeout != NULL) 1625 abs_timeout_init2(&timo, timeout); 1626 1627 umtxq_lock(&uq->uq_key); 1628 pi = umtx_pi_lookup(&uq->uq_key); 1629 if (pi == NULL) { 1630 new_pi = umtx_pi_alloc(M_NOWAIT); 1631 if (new_pi == NULL) { 1632 umtxq_unlock(&uq->uq_key); 1633 new_pi = umtx_pi_alloc(M_WAITOK); 1634 umtxq_lock(&uq->uq_key); 1635 pi = umtx_pi_lookup(&uq->uq_key); 1636 if (pi != NULL) { 1637 umtx_pi_free(new_pi); 1638 new_pi = NULL; 1639 } 1640 } 1641 if (new_pi != NULL) { 1642 new_pi->pi_key = uq->uq_key; 1643 umtx_pi_insert(new_pi); 1644 pi = new_pi; 1645 } 1646 } 1647 umtx_pi_ref(pi); 1648 umtxq_unlock(&uq->uq_key); 1649 1650 /* 1651 * Care must be exercised when dealing with umtx structure. It 1652 * can fault on any access. 1653 */ 1654 for (;;) { 1655 /* 1656 * Try the uncontested case. This should be done in userland. 1657 */ 1658 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1659 /* The address was invalid. */ 1660 if (rv == -1) { 1661 error = EFAULT; 1662 break; 1663 } 1664 1665 /* The acquire succeeded. */ 1666 if (owner == UMUTEX_UNOWNED) { 1667 error = 0; 1668 break; 1669 } 1670 1671 /* If no one owns it but it is contested try to acquire it. */ 1672 if (owner == UMUTEX_CONTESTED) { 1673 rv = casueword32(&m->m_owner, 1674 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1675 /* The address was invalid. */ 1676 if (rv == -1) { 1677 error = EFAULT; 1678 break; 1679 } 1680 1681 if (owner == UMUTEX_CONTESTED) { 1682 umtxq_lock(&uq->uq_key); 1683 umtxq_busy(&uq->uq_key); 1684 error = umtx_pi_claim(pi, td); 1685 umtxq_unbusy(&uq->uq_key); 1686 umtxq_unlock(&uq->uq_key); 1687 break; 1688 } 1689 1690 error = umtxq_check_susp(td); 1691 if (error != 0) 1692 break; 1693 1694 /* If this failed the lock has changed, restart. */ 1695 continue; 1696 } 1697 1698 if (try != 0) { 1699 error = EBUSY; 1700 break; 1701 } 1702 1703 /* 1704 * If we caught a signal, we have retried and now 1705 * exit immediately. 1706 */ 1707 if (error != 0) 1708 break; 1709 1710 umtxq_lock(&uq->uq_key); 1711 umtxq_busy(&uq->uq_key); 1712 umtxq_unlock(&uq->uq_key); 1713 1714 /* 1715 * Set the contested bit so that a release in user space 1716 * knows to use the system call for unlock. If this fails 1717 * either some one else has acquired the lock or it has been 1718 * released. 1719 */ 1720 rv = casueword32(&m->m_owner, owner, &old, 1721 owner | UMUTEX_CONTESTED); 1722 1723 /* The address was invalid. */ 1724 if (rv == -1) { 1725 umtxq_unbusy_unlocked(&uq->uq_key); 1726 error = EFAULT; 1727 break; 1728 } 1729 1730 umtxq_lock(&uq->uq_key); 1731 /* 1732 * We set the contested bit, sleep. Otherwise the lock changed 1733 * and we need to retry or we lost a race to the thread 1734 * unlocking the umtx. 1735 */ 1736 if (old == owner) { 1737 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1738 "umtxpi", timeout == NULL ? NULL : &timo); 1739 if (error != 0) 1740 continue; 1741 } else { 1742 umtxq_unbusy(&uq->uq_key); 1743 umtxq_unlock(&uq->uq_key); 1744 } 1745 1746 error = umtxq_check_susp(td); 1747 if (error != 0) 1748 break; 1749 } 1750 1751 umtxq_lock(&uq->uq_key); 1752 umtx_pi_unref(pi); 1753 umtxq_unlock(&uq->uq_key); 1754 1755 umtx_key_release(&uq->uq_key); 1756 return (error); 1757 } 1758 1759 /* 1760 * Unlock a PI mutex. 1761 */ 1762 static int 1763 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1764 { 1765 struct umtx_key key; 1766 struct umtx_q *uq_first, *uq_first2, *uq_me; 1767 struct umtx_pi *pi, *pi2; 1768 uint32_t owner, old, id; 1769 int error; 1770 int count; 1771 int pri; 1772 1773 id = td->td_tid; 1774 /* 1775 * Make sure we own this mtx. 1776 */ 1777 error = fueword32(&m->m_owner, &owner); 1778 if (error == -1) 1779 return (EFAULT); 1780 1781 if ((owner & ~UMUTEX_CONTESTED) != id) 1782 return (EPERM); 1783 1784 /* This should be done in userland */ 1785 if ((owner & UMUTEX_CONTESTED) == 0) { 1786 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1787 if (error == -1) 1788 return (EFAULT); 1789 if (old == owner) 1790 return (0); 1791 owner = old; 1792 } 1793 1794 /* We should only ever be in here for contested locks */ 1795 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1796 &key)) != 0) 1797 return (error); 1798 1799 umtxq_lock(&key); 1800 umtxq_busy(&key); 1801 count = umtxq_count_pi(&key, &uq_first); 1802 if (uq_first != NULL) { 1803 mtx_lock_spin(&umtx_lock); 1804 pi = uq_first->uq_pi_blocked; 1805 KASSERT(pi != NULL, ("pi == NULL?")); 1806 if (pi->pi_owner != curthread) { 1807 mtx_unlock_spin(&umtx_lock); 1808 umtxq_unbusy(&key); 1809 umtxq_unlock(&key); 1810 umtx_key_release(&key); 1811 /* userland messed the mutex */ 1812 return (EPERM); 1813 } 1814 uq_me = curthread->td_umtxq; 1815 pi->pi_owner = NULL; 1816 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1817 /* get highest priority thread which is still sleeping. */ 1818 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1819 while (uq_first != NULL && 1820 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 1821 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 1822 } 1823 pri = PRI_MAX; 1824 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1825 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1826 if (uq_first2 != NULL) { 1827 if (pri > UPRI(uq_first2->uq_thread)) 1828 pri = UPRI(uq_first2->uq_thread); 1829 } 1830 } 1831 thread_lock(curthread); 1832 sched_lend_user_prio(curthread, pri); 1833 thread_unlock(curthread); 1834 mtx_unlock_spin(&umtx_lock); 1835 if (uq_first) 1836 umtxq_signal_thread(uq_first); 1837 } 1838 umtxq_unlock(&key); 1839 1840 /* 1841 * When unlocking the umtx, it must be marked as unowned if 1842 * there is zero or one thread only waiting for it. 1843 * Otherwise, it must be marked as contested. 1844 */ 1845 error = casueword32(&m->m_owner, owner, &old, 1846 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1847 1848 umtxq_unbusy_unlocked(&key); 1849 umtx_key_release(&key); 1850 if (error == -1) 1851 return (EFAULT); 1852 if (old != owner) 1853 return (EINVAL); 1854 return (0); 1855 } 1856 1857 /* 1858 * Lock a PP mutex. 1859 */ 1860 static int 1861 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 1862 struct _umtx_time *timeout, int try) 1863 { 1864 struct abs_timeout timo; 1865 struct umtx_q *uq, *uq2; 1866 struct umtx_pi *pi; 1867 uint32_t ceiling; 1868 uint32_t owner, id; 1869 int error, pri, old_inherited_pri, su, rv; 1870 1871 id = td->td_tid; 1872 uq = td->td_umtxq; 1873 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1874 &uq->uq_key)) != 0) 1875 return (error); 1876 1877 if (timeout != NULL) 1878 abs_timeout_init2(&timo, timeout); 1879 1880 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1881 for (;;) { 1882 old_inherited_pri = uq->uq_inherited_pri; 1883 umtxq_lock(&uq->uq_key); 1884 umtxq_busy(&uq->uq_key); 1885 umtxq_unlock(&uq->uq_key); 1886 1887 rv = fueword32(&m->m_ceilings[0], &ceiling); 1888 if (rv == -1) { 1889 error = EFAULT; 1890 goto out; 1891 } 1892 ceiling = RTP_PRIO_MAX - ceiling; 1893 if (ceiling > RTP_PRIO_MAX) { 1894 error = EINVAL; 1895 goto out; 1896 } 1897 1898 mtx_lock_spin(&umtx_lock); 1899 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1900 mtx_unlock_spin(&umtx_lock); 1901 error = EINVAL; 1902 goto out; 1903 } 1904 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1905 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1906 thread_lock(td); 1907 if (uq->uq_inherited_pri < UPRI(td)) 1908 sched_lend_user_prio(td, uq->uq_inherited_pri); 1909 thread_unlock(td); 1910 } 1911 mtx_unlock_spin(&umtx_lock); 1912 1913 rv = casueword32(&m->m_owner, 1914 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1915 /* The address was invalid. */ 1916 if (rv == -1) { 1917 error = EFAULT; 1918 break; 1919 } 1920 1921 if (owner == UMUTEX_CONTESTED) { 1922 error = 0; 1923 break; 1924 } 1925 1926 if (try != 0) { 1927 error = EBUSY; 1928 break; 1929 } 1930 1931 /* 1932 * If we caught a signal, we have retried and now 1933 * exit immediately. 1934 */ 1935 if (error != 0) 1936 break; 1937 1938 umtxq_lock(&uq->uq_key); 1939 umtxq_insert(uq); 1940 umtxq_unbusy(&uq->uq_key); 1941 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 1942 NULL : &timo); 1943 umtxq_remove(uq); 1944 umtxq_unlock(&uq->uq_key); 1945 1946 mtx_lock_spin(&umtx_lock); 1947 uq->uq_inherited_pri = old_inherited_pri; 1948 pri = PRI_MAX; 1949 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1950 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1951 if (uq2 != NULL) { 1952 if (pri > UPRI(uq2->uq_thread)) 1953 pri = UPRI(uq2->uq_thread); 1954 } 1955 } 1956 if (pri > uq->uq_inherited_pri) 1957 pri = uq->uq_inherited_pri; 1958 thread_lock(td); 1959 sched_lend_user_prio(td, pri); 1960 thread_unlock(td); 1961 mtx_unlock_spin(&umtx_lock); 1962 } 1963 1964 if (error != 0) { 1965 mtx_lock_spin(&umtx_lock); 1966 uq->uq_inherited_pri = old_inherited_pri; 1967 pri = PRI_MAX; 1968 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1969 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1970 if (uq2 != NULL) { 1971 if (pri > UPRI(uq2->uq_thread)) 1972 pri = UPRI(uq2->uq_thread); 1973 } 1974 } 1975 if (pri > uq->uq_inherited_pri) 1976 pri = uq->uq_inherited_pri; 1977 thread_lock(td); 1978 sched_lend_user_prio(td, pri); 1979 thread_unlock(td); 1980 mtx_unlock_spin(&umtx_lock); 1981 } 1982 1983 out: 1984 umtxq_unbusy_unlocked(&uq->uq_key); 1985 umtx_key_release(&uq->uq_key); 1986 return (error); 1987 } 1988 1989 /* 1990 * Unlock a PP mutex. 1991 */ 1992 static int 1993 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1994 { 1995 struct umtx_key key; 1996 struct umtx_q *uq, *uq2; 1997 struct umtx_pi *pi; 1998 uint32_t owner, id; 1999 uint32_t rceiling; 2000 int error, pri, new_inherited_pri, su; 2001 2002 id = td->td_tid; 2003 uq = td->td_umtxq; 2004 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2005 2006 /* 2007 * Make sure we own this mtx. 2008 */ 2009 error = fueword32(&m->m_owner, &owner); 2010 if (error == -1) 2011 return (EFAULT); 2012 2013 if ((owner & ~UMUTEX_CONTESTED) != id) 2014 return (EPERM); 2015 2016 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2017 if (error != 0) 2018 return (error); 2019 2020 if (rceiling == -1) 2021 new_inherited_pri = PRI_MAX; 2022 else { 2023 rceiling = RTP_PRIO_MAX - rceiling; 2024 if (rceiling > RTP_PRIO_MAX) 2025 return (EINVAL); 2026 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2027 } 2028 2029 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2030 &key)) != 0) 2031 return (error); 2032 umtxq_lock(&key); 2033 umtxq_busy(&key); 2034 umtxq_unlock(&key); 2035 /* 2036 * For priority protected mutex, always set unlocked state 2037 * to UMUTEX_CONTESTED, so that userland always enters kernel 2038 * to lock the mutex, it is necessary because thread priority 2039 * has to be adjusted for such mutex. 2040 */ 2041 error = suword32(&m->m_owner, UMUTEX_CONTESTED); 2042 2043 umtxq_lock(&key); 2044 if (error == 0) 2045 umtxq_signal(&key, 1); 2046 umtxq_unbusy(&key); 2047 umtxq_unlock(&key); 2048 2049 if (error == -1) 2050 error = EFAULT; 2051 else { 2052 mtx_lock_spin(&umtx_lock); 2053 if (su != 0) 2054 uq->uq_inherited_pri = new_inherited_pri; 2055 pri = PRI_MAX; 2056 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2057 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2058 if (uq2 != NULL) { 2059 if (pri > UPRI(uq2->uq_thread)) 2060 pri = UPRI(uq2->uq_thread); 2061 } 2062 } 2063 if (pri > uq->uq_inherited_pri) 2064 pri = uq->uq_inherited_pri; 2065 thread_lock(td); 2066 sched_lend_user_prio(td, pri); 2067 thread_unlock(td); 2068 mtx_unlock_spin(&umtx_lock); 2069 } 2070 umtx_key_release(&key); 2071 return (error); 2072 } 2073 2074 static int 2075 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2076 uint32_t *old_ceiling) 2077 { 2078 struct umtx_q *uq; 2079 uint32_t save_ceiling; 2080 uint32_t owner, id; 2081 uint32_t flags; 2082 int error, rv; 2083 2084 error = fueword32(&m->m_flags, &flags); 2085 if (error == -1) 2086 return (EFAULT); 2087 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2088 return (EINVAL); 2089 if (ceiling > RTP_PRIO_MAX) 2090 return (EINVAL); 2091 id = td->td_tid; 2092 uq = td->td_umtxq; 2093 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2094 &uq->uq_key)) != 0) 2095 return (error); 2096 for (;;) { 2097 umtxq_lock(&uq->uq_key); 2098 umtxq_busy(&uq->uq_key); 2099 umtxq_unlock(&uq->uq_key); 2100 2101 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2102 if (rv == -1) { 2103 error = EFAULT; 2104 break; 2105 } 2106 2107 rv = casueword32(&m->m_owner, 2108 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2109 if (rv == -1) { 2110 error = EFAULT; 2111 break; 2112 } 2113 2114 if (owner == UMUTEX_CONTESTED) { 2115 suword32(&m->m_ceilings[0], ceiling); 2116 suword32(&m->m_owner, UMUTEX_CONTESTED); 2117 error = 0; 2118 break; 2119 } 2120 2121 if ((owner & ~UMUTEX_CONTESTED) == id) { 2122 suword32(&m->m_ceilings[0], ceiling); 2123 error = 0; 2124 break; 2125 } 2126 2127 /* 2128 * If we caught a signal, we have retried and now 2129 * exit immediately. 2130 */ 2131 if (error != 0) 2132 break; 2133 2134 /* 2135 * We set the contested bit, sleep. Otherwise the lock changed 2136 * and we need to retry or we lost a race to the thread 2137 * unlocking the umtx. 2138 */ 2139 umtxq_lock(&uq->uq_key); 2140 umtxq_insert(uq); 2141 umtxq_unbusy(&uq->uq_key); 2142 error = umtxq_sleep(uq, "umtxpp", NULL); 2143 umtxq_remove(uq); 2144 umtxq_unlock(&uq->uq_key); 2145 } 2146 umtxq_lock(&uq->uq_key); 2147 if (error == 0) 2148 umtxq_signal(&uq->uq_key, INT_MAX); 2149 umtxq_unbusy(&uq->uq_key); 2150 umtxq_unlock(&uq->uq_key); 2151 umtx_key_release(&uq->uq_key); 2152 if (error == 0 && old_ceiling != NULL) 2153 suword32(old_ceiling, save_ceiling); 2154 return (error); 2155 } 2156 2157 /* 2158 * Lock a userland POSIX mutex. 2159 */ 2160 static int 2161 do_lock_umutex(struct thread *td, struct umutex *m, 2162 struct _umtx_time *timeout, int mode) 2163 { 2164 uint32_t flags; 2165 int error; 2166 2167 error = fueword32(&m->m_flags, &flags); 2168 if (error == -1) 2169 return (EFAULT); 2170 2171 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2172 case 0: 2173 error = do_lock_normal(td, m, flags, timeout, mode); 2174 break; 2175 case UMUTEX_PRIO_INHERIT: 2176 error = do_lock_pi(td, m, flags, timeout, mode); 2177 break; 2178 case UMUTEX_PRIO_PROTECT: 2179 error = do_lock_pp(td, m, flags, timeout, mode); 2180 break; 2181 default: 2182 return (EINVAL); 2183 } 2184 if (timeout == NULL) { 2185 if (error == EINTR && mode != _UMUTEX_WAIT) 2186 error = ERESTART; 2187 } else { 2188 /* Timed-locking is not restarted. */ 2189 if (error == ERESTART) 2190 error = EINTR; 2191 } 2192 return (error); 2193 } 2194 2195 /* 2196 * Unlock a userland POSIX mutex. 2197 */ 2198 static int 2199 do_unlock_umutex(struct thread *td, struct umutex *m) 2200 { 2201 uint32_t flags; 2202 int error; 2203 2204 error = fueword32(&m->m_flags, &flags); 2205 if (error == -1) 2206 return (EFAULT); 2207 2208 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2209 case 0: 2210 return (do_unlock_normal(td, m, flags)); 2211 case UMUTEX_PRIO_INHERIT: 2212 return (do_unlock_pi(td, m, flags)); 2213 case UMUTEX_PRIO_PROTECT: 2214 return (do_unlock_pp(td, m, flags)); 2215 } 2216 2217 return (EINVAL); 2218 } 2219 2220 static int 2221 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2222 struct timespec *timeout, u_long wflags) 2223 { 2224 struct abs_timeout timo; 2225 struct umtx_q *uq; 2226 uint32_t flags, clockid, hasw; 2227 int error; 2228 2229 uq = td->td_umtxq; 2230 error = fueword32(&cv->c_flags, &flags); 2231 if (error == -1) 2232 return (EFAULT); 2233 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2234 if (error != 0) 2235 return (error); 2236 2237 if ((wflags & CVWAIT_CLOCKID) != 0) { 2238 error = fueword32(&cv->c_clockid, &clockid); 2239 if (error == -1) { 2240 umtx_key_release(&uq->uq_key); 2241 return (EFAULT); 2242 } 2243 if (clockid < CLOCK_REALTIME || 2244 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2245 /* hmm, only HW clock id will work. */ 2246 umtx_key_release(&uq->uq_key); 2247 return (EINVAL); 2248 } 2249 } else { 2250 clockid = CLOCK_REALTIME; 2251 } 2252 2253 umtxq_lock(&uq->uq_key); 2254 umtxq_busy(&uq->uq_key); 2255 umtxq_insert(uq); 2256 umtxq_unlock(&uq->uq_key); 2257 2258 /* 2259 * Set c_has_waiters to 1 before releasing user mutex, also 2260 * don't modify cache line when unnecessary. 2261 */ 2262 error = fueword32(&cv->c_has_waiters, &hasw); 2263 if (error == 0 && hasw == 0) 2264 suword32(&cv->c_has_waiters, 1); 2265 2266 umtxq_unbusy_unlocked(&uq->uq_key); 2267 2268 error = do_unlock_umutex(td, m); 2269 2270 if (timeout != NULL) 2271 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2272 timeout); 2273 2274 umtxq_lock(&uq->uq_key); 2275 if (error == 0) { 2276 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2277 NULL : &timo); 2278 } 2279 2280 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2281 error = 0; 2282 else { 2283 /* 2284 * This must be timeout,interrupted by signal or 2285 * surprious wakeup, clear c_has_waiter flag when 2286 * necessary. 2287 */ 2288 umtxq_busy(&uq->uq_key); 2289 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2290 int oldlen = uq->uq_cur_queue->length; 2291 umtxq_remove(uq); 2292 if (oldlen == 1) { 2293 umtxq_unlock(&uq->uq_key); 2294 suword32(&cv->c_has_waiters, 0); 2295 umtxq_lock(&uq->uq_key); 2296 } 2297 } 2298 umtxq_unbusy(&uq->uq_key); 2299 if (error == ERESTART) 2300 error = EINTR; 2301 } 2302 2303 umtxq_unlock(&uq->uq_key); 2304 umtx_key_release(&uq->uq_key); 2305 return (error); 2306 } 2307 2308 /* 2309 * Signal a userland condition variable. 2310 */ 2311 static int 2312 do_cv_signal(struct thread *td, struct ucond *cv) 2313 { 2314 struct umtx_key key; 2315 int error, cnt, nwake; 2316 uint32_t flags; 2317 2318 error = fueword32(&cv->c_flags, &flags); 2319 if (error == -1) 2320 return (EFAULT); 2321 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2322 return (error); 2323 umtxq_lock(&key); 2324 umtxq_busy(&key); 2325 cnt = umtxq_count(&key); 2326 nwake = umtxq_signal(&key, 1); 2327 if (cnt <= nwake) { 2328 umtxq_unlock(&key); 2329 error = suword32(&cv->c_has_waiters, 0); 2330 if (error == -1) 2331 error = EFAULT; 2332 umtxq_lock(&key); 2333 } 2334 umtxq_unbusy(&key); 2335 umtxq_unlock(&key); 2336 umtx_key_release(&key); 2337 return (error); 2338 } 2339 2340 static int 2341 do_cv_broadcast(struct thread *td, struct ucond *cv) 2342 { 2343 struct umtx_key key; 2344 int error; 2345 uint32_t flags; 2346 2347 error = fueword32(&cv->c_flags, &flags); 2348 if (error == -1) 2349 return (EFAULT); 2350 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2351 return (error); 2352 2353 umtxq_lock(&key); 2354 umtxq_busy(&key); 2355 umtxq_signal(&key, INT_MAX); 2356 umtxq_unlock(&key); 2357 2358 error = suword32(&cv->c_has_waiters, 0); 2359 if (error == -1) 2360 error = EFAULT; 2361 2362 umtxq_unbusy_unlocked(&key); 2363 2364 umtx_key_release(&key); 2365 return (error); 2366 } 2367 2368 static int 2369 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2370 { 2371 struct abs_timeout timo; 2372 struct umtx_q *uq; 2373 uint32_t flags, wrflags; 2374 int32_t state, oldstate; 2375 int32_t blocked_readers; 2376 int error, rv; 2377 2378 uq = td->td_umtxq; 2379 error = fueword32(&rwlock->rw_flags, &flags); 2380 if (error == -1) 2381 return (EFAULT); 2382 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2383 if (error != 0) 2384 return (error); 2385 2386 if (timeout != NULL) 2387 abs_timeout_init2(&timo, timeout); 2388 2389 wrflags = URWLOCK_WRITE_OWNER; 2390 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2391 wrflags |= URWLOCK_WRITE_WAITERS; 2392 2393 for (;;) { 2394 rv = fueword32(&rwlock->rw_state, &state); 2395 if (rv == -1) { 2396 umtx_key_release(&uq->uq_key); 2397 return (EFAULT); 2398 } 2399 2400 /* try to lock it */ 2401 while (!(state & wrflags)) { 2402 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2403 umtx_key_release(&uq->uq_key); 2404 return (EAGAIN); 2405 } 2406 rv = casueword32(&rwlock->rw_state, state, 2407 &oldstate, state + 1); 2408 if (rv == -1) { 2409 umtx_key_release(&uq->uq_key); 2410 return (EFAULT); 2411 } 2412 if (oldstate == state) { 2413 umtx_key_release(&uq->uq_key); 2414 return (0); 2415 } 2416 error = umtxq_check_susp(td); 2417 if (error != 0) 2418 break; 2419 state = oldstate; 2420 } 2421 2422 if (error) 2423 break; 2424 2425 /* grab monitor lock */ 2426 umtxq_lock(&uq->uq_key); 2427 umtxq_busy(&uq->uq_key); 2428 umtxq_unlock(&uq->uq_key); 2429 2430 /* 2431 * re-read the state, in case it changed between the try-lock above 2432 * and the check below 2433 */ 2434 rv = fueword32(&rwlock->rw_state, &state); 2435 if (rv == -1) 2436 error = EFAULT; 2437 2438 /* set read contention bit */ 2439 while (error == 0 && (state & wrflags) && 2440 !(state & URWLOCK_READ_WAITERS)) { 2441 rv = casueword32(&rwlock->rw_state, state, 2442 &oldstate, state | URWLOCK_READ_WAITERS); 2443 if (rv == -1) { 2444 error = EFAULT; 2445 break; 2446 } 2447 if (oldstate == state) 2448 goto sleep; 2449 state = oldstate; 2450 error = umtxq_check_susp(td); 2451 if (error != 0) 2452 break; 2453 } 2454 if (error != 0) { 2455 umtxq_unbusy_unlocked(&uq->uq_key); 2456 break; 2457 } 2458 2459 /* state is changed while setting flags, restart */ 2460 if (!(state & wrflags)) { 2461 umtxq_unbusy_unlocked(&uq->uq_key); 2462 error = umtxq_check_susp(td); 2463 if (error != 0) 2464 break; 2465 continue; 2466 } 2467 2468 sleep: 2469 /* contention bit is set, before sleeping, increase read waiter count */ 2470 rv = fueword32(&rwlock->rw_blocked_readers, 2471 &blocked_readers); 2472 if (rv == -1) { 2473 umtxq_unbusy_unlocked(&uq->uq_key); 2474 error = EFAULT; 2475 break; 2476 } 2477 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2478 2479 while (state & wrflags) { 2480 umtxq_lock(&uq->uq_key); 2481 umtxq_insert(uq); 2482 umtxq_unbusy(&uq->uq_key); 2483 2484 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2485 NULL : &timo); 2486 2487 umtxq_busy(&uq->uq_key); 2488 umtxq_remove(uq); 2489 umtxq_unlock(&uq->uq_key); 2490 if (error) 2491 break; 2492 rv = fueword32(&rwlock->rw_state, &state); 2493 if (rv == -1) { 2494 error = EFAULT; 2495 break; 2496 } 2497 } 2498 2499 /* decrease read waiter count, and may clear read contention bit */ 2500 rv = fueword32(&rwlock->rw_blocked_readers, 2501 &blocked_readers); 2502 if (rv == -1) { 2503 umtxq_unbusy_unlocked(&uq->uq_key); 2504 error = EFAULT; 2505 break; 2506 } 2507 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2508 if (blocked_readers == 1) { 2509 rv = fueword32(&rwlock->rw_state, &state); 2510 if (rv == -1) 2511 error = EFAULT; 2512 while (error == 0) { 2513 rv = casueword32(&rwlock->rw_state, state, 2514 &oldstate, state & ~URWLOCK_READ_WAITERS); 2515 if (rv == -1) { 2516 error = EFAULT; 2517 break; 2518 } 2519 if (oldstate == state) 2520 break; 2521 state = oldstate; 2522 error = umtxq_check_susp(td); 2523 } 2524 } 2525 2526 umtxq_unbusy_unlocked(&uq->uq_key); 2527 if (error != 0) 2528 break; 2529 } 2530 umtx_key_release(&uq->uq_key); 2531 if (error == ERESTART) 2532 error = EINTR; 2533 return (error); 2534 } 2535 2536 static int 2537 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2538 { 2539 struct abs_timeout timo; 2540 struct umtx_q *uq; 2541 uint32_t flags; 2542 int32_t state, oldstate; 2543 int32_t blocked_writers; 2544 int32_t blocked_readers; 2545 int error, rv; 2546 2547 uq = td->td_umtxq; 2548 error = fueword32(&rwlock->rw_flags, &flags); 2549 if (error == -1) 2550 return (EFAULT); 2551 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2552 if (error != 0) 2553 return (error); 2554 2555 if (timeout != NULL) 2556 abs_timeout_init2(&timo, timeout); 2557 2558 blocked_readers = 0; 2559 for (;;) { 2560 rv = fueword32(&rwlock->rw_state, &state); 2561 if (rv == -1) { 2562 umtx_key_release(&uq->uq_key); 2563 return (EFAULT); 2564 } 2565 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2566 rv = casueword32(&rwlock->rw_state, state, 2567 &oldstate, state | URWLOCK_WRITE_OWNER); 2568 if (rv == -1) { 2569 umtx_key_release(&uq->uq_key); 2570 return (EFAULT); 2571 } 2572 if (oldstate == state) { 2573 umtx_key_release(&uq->uq_key); 2574 return (0); 2575 } 2576 state = oldstate; 2577 error = umtxq_check_susp(td); 2578 if (error != 0) 2579 break; 2580 } 2581 2582 if (error) { 2583 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2584 blocked_readers != 0) { 2585 umtxq_lock(&uq->uq_key); 2586 umtxq_busy(&uq->uq_key); 2587 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2588 umtxq_unbusy(&uq->uq_key); 2589 umtxq_unlock(&uq->uq_key); 2590 } 2591 2592 break; 2593 } 2594 2595 /* grab monitor lock */ 2596 umtxq_lock(&uq->uq_key); 2597 umtxq_busy(&uq->uq_key); 2598 umtxq_unlock(&uq->uq_key); 2599 2600 /* 2601 * re-read the state, in case it changed between the try-lock above 2602 * and the check below 2603 */ 2604 rv = fueword32(&rwlock->rw_state, &state); 2605 if (rv == -1) 2606 error = EFAULT; 2607 2608 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2609 URWLOCK_READER_COUNT(state) != 0) && 2610 (state & URWLOCK_WRITE_WAITERS) == 0) { 2611 rv = casueword32(&rwlock->rw_state, state, 2612 &oldstate, state | URWLOCK_WRITE_WAITERS); 2613 if (rv == -1) { 2614 error = EFAULT; 2615 break; 2616 } 2617 if (oldstate == state) 2618 goto sleep; 2619 state = oldstate; 2620 error = umtxq_check_susp(td); 2621 if (error != 0) 2622 break; 2623 } 2624 if (error != 0) { 2625 umtxq_unbusy_unlocked(&uq->uq_key); 2626 break; 2627 } 2628 2629 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2630 umtxq_unbusy_unlocked(&uq->uq_key); 2631 error = umtxq_check_susp(td); 2632 if (error != 0) 2633 break; 2634 continue; 2635 } 2636 sleep: 2637 rv = fueword32(&rwlock->rw_blocked_writers, 2638 &blocked_writers); 2639 if (rv == -1) { 2640 umtxq_unbusy_unlocked(&uq->uq_key); 2641 error = EFAULT; 2642 break; 2643 } 2644 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2645 2646 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2647 umtxq_lock(&uq->uq_key); 2648 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2649 umtxq_unbusy(&uq->uq_key); 2650 2651 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2652 NULL : &timo); 2653 2654 umtxq_busy(&uq->uq_key); 2655 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2656 umtxq_unlock(&uq->uq_key); 2657 if (error) 2658 break; 2659 rv = fueword32(&rwlock->rw_state, &state); 2660 if (rv == -1) { 2661 error = EFAULT; 2662 break; 2663 } 2664 } 2665 2666 rv = fueword32(&rwlock->rw_blocked_writers, 2667 &blocked_writers); 2668 if (rv == -1) { 2669 umtxq_unbusy_unlocked(&uq->uq_key); 2670 error = EFAULT; 2671 break; 2672 } 2673 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2674 if (blocked_writers == 1) { 2675 rv = fueword32(&rwlock->rw_state, &state); 2676 if (rv == -1) { 2677 umtxq_unbusy_unlocked(&uq->uq_key); 2678 error = EFAULT; 2679 break; 2680 } 2681 for (;;) { 2682 rv = casueword32(&rwlock->rw_state, state, 2683 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2684 if (rv == -1) { 2685 error = EFAULT; 2686 break; 2687 } 2688 if (oldstate == state) 2689 break; 2690 state = oldstate; 2691 error = umtxq_check_susp(td); 2692 /* 2693 * We are leaving the URWLOCK_WRITE_WAITERS 2694 * behind, but this should not harm the 2695 * correctness. 2696 */ 2697 if (error != 0) 2698 break; 2699 } 2700 rv = fueword32(&rwlock->rw_blocked_readers, 2701 &blocked_readers); 2702 if (rv == -1) { 2703 umtxq_unbusy_unlocked(&uq->uq_key); 2704 error = EFAULT; 2705 break; 2706 } 2707 } else 2708 blocked_readers = 0; 2709 2710 umtxq_unbusy_unlocked(&uq->uq_key); 2711 } 2712 2713 umtx_key_release(&uq->uq_key); 2714 if (error == ERESTART) 2715 error = EINTR; 2716 return (error); 2717 } 2718 2719 static int 2720 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2721 { 2722 struct umtx_q *uq; 2723 uint32_t flags; 2724 int32_t state, oldstate; 2725 int error, rv, q, count; 2726 2727 uq = td->td_umtxq; 2728 error = fueword32(&rwlock->rw_flags, &flags); 2729 if (error == -1) 2730 return (EFAULT); 2731 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2732 if (error != 0) 2733 return (error); 2734 2735 error = fueword32(&rwlock->rw_state, &state); 2736 if (error == -1) { 2737 error = EFAULT; 2738 goto out; 2739 } 2740 if (state & URWLOCK_WRITE_OWNER) { 2741 for (;;) { 2742 rv = casueword32(&rwlock->rw_state, state, 2743 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2744 if (rv == -1) { 2745 error = EFAULT; 2746 goto out; 2747 } 2748 if (oldstate != state) { 2749 state = oldstate; 2750 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2751 error = EPERM; 2752 goto out; 2753 } 2754 error = umtxq_check_susp(td); 2755 if (error != 0) 2756 goto out; 2757 } else 2758 break; 2759 } 2760 } else if (URWLOCK_READER_COUNT(state) != 0) { 2761 for (;;) { 2762 rv = casueword32(&rwlock->rw_state, state, 2763 &oldstate, state - 1); 2764 if (rv == -1) { 2765 error = EFAULT; 2766 goto out; 2767 } 2768 if (oldstate != state) { 2769 state = oldstate; 2770 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2771 error = EPERM; 2772 goto out; 2773 } 2774 error = umtxq_check_susp(td); 2775 if (error != 0) 2776 goto out; 2777 } else 2778 break; 2779 } 2780 } else { 2781 error = EPERM; 2782 goto out; 2783 } 2784 2785 count = 0; 2786 2787 if (!(flags & URWLOCK_PREFER_READER)) { 2788 if (state & URWLOCK_WRITE_WAITERS) { 2789 count = 1; 2790 q = UMTX_EXCLUSIVE_QUEUE; 2791 } else if (state & URWLOCK_READ_WAITERS) { 2792 count = INT_MAX; 2793 q = UMTX_SHARED_QUEUE; 2794 } 2795 } else { 2796 if (state & URWLOCK_READ_WAITERS) { 2797 count = INT_MAX; 2798 q = UMTX_SHARED_QUEUE; 2799 } else if (state & URWLOCK_WRITE_WAITERS) { 2800 count = 1; 2801 q = UMTX_EXCLUSIVE_QUEUE; 2802 } 2803 } 2804 2805 if (count) { 2806 umtxq_lock(&uq->uq_key); 2807 umtxq_busy(&uq->uq_key); 2808 umtxq_signal_queue(&uq->uq_key, count, q); 2809 umtxq_unbusy(&uq->uq_key); 2810 umtxq_unlock(&uq->uq_key); 2811 } 2812 out: 2813 umtx_key_release(&uq->uq_key); 2814 return (error); 2815 } 2816 2817 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 2818 static int 2819 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 2820 { 2821 struct abs_timeout timo; 2822 struct umtx_q *uq; 2823 uint32_t flags, count, count1; 2824 int error, rv; 2825 2826 uq = td->td_umtxq; 2827 error = fueword32(&sem->_flags, &flags); 2828 if (error == -1) 2829 return (EFAULT); 2830 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2831 if (error != 0) 2832 return (error); 2833 2834 if (timeout != NULL) 2835 abs_timeout_init2(&timo, timeout); 2836 2837 umtxq_lock(&uq->uq_key); 2838 umtxq_busy(&uq->uq_key); 2839 umtxq_insert(uq); 2840 umtxq_unlock(&uq->uq_key); 2841 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 2842 if (rv == 0) 2843 rv = fueword32(&sem->_count, &count); 2844 if (rv == -1 || count != 0) { 2845 umtxq_lock(&uq->uq_key); 2846 umtxq_unbusy(&uq->uq_key); 2847 umtxq_remove(uq); 2848 umtxq_unlock(&uq->uq_key); 2849 umtx_key_release(&uq->uq_key); 2850 return (rv == -1 ? EFAULT : 0); 2851 } 2852 umtxq_lock(&uq->uq_key); 2853 umtxq_unbusy(&uq->uq_key); 2854 2855 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2856 2857 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2858 error = 0; 2859 else { 2860 umtxq_remove(uq); 2861 /* A relative timeout cannot be restarted. */ 2862 if (error == ERESTART && timeout != NULL && 2863 (timeout->_flags & UMTX_ABSTIME) == 0) 2864 error = EINTR; 2865 } 2866 umtxq_unlock(&uq->uq_key); 2867 umtx_key_release(&uq->uq_key); 2868 return (error); 2869 } 2870 2871 /* 2872 * Signal a userland semaphore. 2873 */ 2874 static int 2875 do_sem_wake(struct thread *td, struct _usem *sem) 2876 { 2877 struct umtx_key key; 2878 int error, cnt; 2879 uint32_t flags; 2880 2881 error = fueword32(&sem->_flags, &flags); 2882 if (error == -1) 2883 return (EFAULT); 2884 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2885 return (error); 2886 umtxq_lock(&key); 2887 umtxq_busy(&key); 2888 cnt = umtxq_count(&key); 2889 if (cnt > 0) { 2890 umtxq_signal(&key, 1); 2891 /* 2892 * Check if count is greater than 0, this means the memory is 2893 * still being referenced by user code, so we can safely 2894 * update _has_waiters flag. 2895 */ 2896 if (cnt == 1) { 2897 umtxq_unlock(&key); 2898 error = suword32(&sem->_has_waiters, 0); 2899 umtxq_lock(&key); 2900 if (error == -1) 2901 error = EFAULT; 2902 } 2903 } 2904 umtxq_unbusy(&key); 2905 umtxq_unlock(&key); 2906 umtx_key_release(&key); 2907 return (error); 2908 } 2909 #endif 2910 2911 static int 2912 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 2913 { 2914 struct abs_timeout timo; 2915 struct umtx_q *uq; 2916 uint32_t count, flags; 2917 int error, rv; 2918 2919 uq = td->td_umtxq; 2920 flags = fuword32(&sem->_flags); 2921 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2922 if (error != 0) 2923 return (error); 2924 2925 if (timeout != NULL) 2926 abs_timeout_init2(&timo, timeout); 2927 2928 umtxq_lock(&uq->uq_key); 2929 umtxq_busy(&uq->uq_key); 2930 umtxq_insert(uq); 2931 umtxq_unlock(&uq->uq_key); 2932 rv = fueword32(&sem->_count, &count); 2933 if (rv == -1) { 2934 umtxq_lock(&uq->uq_key); 2935 umtxq_unbusy(&uq->uq_key); 2936 umtxq_remove(uq); 2937 umtxq_unlock(&uq->uq_key); 2938 umtx_key_release(&uq->uq_key); 2939 return (EFAULT); 2940 } 2941 for (;;) { 2942 if (USEM_COUNT(count) != 0) { 2943 umtxq_lock(&uq->uq_key); 2944 umtxq_unbusy(&uq->uq_key); 2945 umtxq_remove(uq); 2946 umtxq_unlock(&uq->uq_key); 2947 umtx_key_release(&uq->uq_key); 2948 return (0); 2949 } 2950 if (count == USEM_HAS_WAITERS) 2951 break; 2952 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 2953 if (rv == -1) { 2954 umtxq_lock(&uq->uq_key); 2955 umtxq_unbusy(&uq->uq_key); 2956 umtxq_remove(uq); 2957 umtxq_unlock(&uq->uq_key); 2958 umtx_key_release(&uq->uq_key); 2959 return (EFAULT); 2960 } 2961 if (count == 0) 2962 break; 2963 } 2964 umtxq_lock(&uq->uq_key); 2965 umtxq_unbusy(&uq->uq_key); 2966 2967 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2968 2969 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2970 error = 0; 2971 else { 2972 umtxq_remove(uq); 2973 /* A relative timeout cannot be restarted. */ 2974 if (error == ERESTART && timeout != NULL && 2975 (timeout->_flags & UMTX_ABSTIME) == 0) 2976 error = EINTR; 2977 } 2978 umtxq_unlock(&uq->uq_key); 2979 umtx_key_release(&uq->uq_key); 2980 return (error); 2981 } 2982 2983 /* 2984 * Signal a userland semaphore. 2985 */ 2986 static int 2987 do_sem2_wake(struct thread *td, struct _usem2 *sem) 2988 { 2989 struct umtx_key key; 2990 int error, cnt, rv; 2991 uint32_t count, flags; 2992 2993 rv = fueword32(&sem->_flags, &flags); 2994 if (rv == -1) 2995 return (EFAULT); 2996 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2997 return (error); 2998 umtxq_lock(&key); 2999 umtxq_busy(&key); 3000 cnt = umtxq_count(&key); 3001 if (cnt > 0) { 3002 umtxq_signal(&key, 1); 3003 3004 /* 3005 * If this was the last sleeping thread, clear the waiters 3006 * flag in _count. 3007 */ 3008 if (cnt == 1) { 3009 umtxq_unlock(&key); 3010 rv = fueword32(&sem->_count, &count); 3011 while (rv != -1 && count & USEM_HAS_WAITERS) 3012 rv = casueword32(&sem->_count, count, &count, 3013 count & ~USEM_HAS_WAITERS); 3014 if (rv == -1) 3015 error = EFAULT; 3016 umtxq_lock(&key); 3017 } 3018 } 3019 umtxq_unbusy(&key); 3020 umtxq_unlock(&key); 3021 umtx_key_release(&key); 3022 return (error); 3023 } 3024 3025 inline int 3026 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3027 { 3028 int error; 3029 3030 error = copyin(addr, tsp, sizeof(struct timespec)); 3031 if (error == 0) { 3032 if (tsp->tv_sec < 0 || 3033 tsp->tv_nsec >= 1000000000 || 3034 tsp->tv_nsec < 0) 3035 error = EINVAL; 3036 } 3037 return (error); 3038 } 3039 3040 static inline int 3041 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3042 { 3043 int error; 3044 3045 if (size <= sizeof(struct timespec)) { 3046 tp->_clockid = CLOCK_REALTIME; 3047 tp->_flags = 0; 3048 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3049 } else 3050 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3051 if (error != 0) 3052 return (error); 3053 if (tp->_timeout.tv_sec < 0 || 3054 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3055 return (EINVAL); 3056 return (0); 3057 } 3058 3059 static int 3060 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3061 { 3062 3063 return (EOPNOTSUPP); 3064 } 3065 3066 static int 3067 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3068 { 3069 struct _umtx_time timeout, *tm_p; 3070 int error; 3071 3072 if (uap->uaddr2 == NULL) 3073 tm_p = NULL; 3074 else { 3075 error = umtx_copyin_umtx_time( 3076 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3077 if (error != 0) 3078 return (error); 3079 tm_p = &timeout; 3080 } 3081 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3082 } 3083 3084 static int 3085 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3086 { 3087 struct _umtx_time timeout, *tm_p; 3088 int error; 3089 3090 if (uap->uaddr2 == NULL) 3091 tm_p = NULL; 3092 else { 3093 error = umtx_copyin_umtx_time( 3094 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3095 if (error != 0) 3096 return (error); 3097 tm_p = &timeout; 3098 } 3099 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3100 } 3101 3102 static int 3103 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3104 { 3105 struct _umtx_time *tm_p, timeout; 3106 int error; 3107 3108 if (uap->uaddr2 == NULL) 3109 tm_p = NULL; 3110 else { 3111 error = umtx_copyin_umtx_time( 3112 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3113 if (error != 0) 3114 return (error); 3115 tm_p = &timeout; 3116 } 3117 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3118 } 3119 3120 static int 3121 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3122 { 3123 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3124 } 3125 3126 #define BATCH_SIZE 128 3127 static int 3128 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3129 { 3130 int count = uap->val; 3131 void *uaddrs[BATCH_SIZE]; 3132 char **upp = (char **)uap->obj; 3133 int tocopy; 3134 int error = 0; 3135 int i, pos = 0; 3136 3137 while (count > 0) { 3138 tocopy = count; 3139 if (tocopy > BATCH_SIZE) 3140 tocopy = BATCH_SIZE; 3141 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3142 if (error != 0) 3143 break; 3144 for (i = 0; i < tocopy; ++i) 3145 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3146 count -= tocopy; 3147 pos += tocopy; 3148 } 3149 return (error); 3150 } 3151 3152 static int 3153 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3154 { 3155 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3156 } 3157 3158 static int 3159 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3160 { 3161 struct _umtx_time *tm_p, timeout; 3162 int error; 3163 3164 /* Allow a null timespec (wait forever). */ 3165 if (uap->uaddr2 == NULL) 3166 tm_p = NULL; 3167 else { 3168 error = umtx_copyin_umtx_time( 3169 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3170 if (error != 0) 3171 return (error); 3172 tm_p = &timeout; 3173 } 3174 return do_lock_umutex(td, uap->obj, tm_p, 0); 3175 } 3176 3177 static int 3178 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3179 { 3180 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3181 } 3182 3183 static int 3184 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3185 { 3186 struct _umtx_time *tm_p, timeout; 3187 int error; 3188 3189 /* Allow a null timespec (wait forever). */ 3190 if (uap->uaddr2 == NULL) 3191 tm_p = NULL; 3192 else { 3193 error = umtx_copyin_umtx_time( 3194 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3195 if (error != 0) 3196 return (error); 3197 tm_p = &timeout; 3198 } 3199 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3200 } 3201 3202 static int 3203 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3204 { 3205 return do_wake_umutex(td, uap->obj); 3206 } 3207 3208 static int 3209 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3210 { 3211 return do_unlock_umutex(td, uap->obj); 3212 } 3213 3214 static int 3215 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3216 { 3217 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3218 } 3219 3220 static int 3221 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3222 { 3223 struct timespec *ts, timeout; 3224 int error; 3225 3226 /* Allow a null timespec (wait forever). */ 3227 if (uap->uaddr2 == NULL) 3228 ts = NULL; 3229 else { 3230 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3231 if (error != 0) 3232 return (error); 3233 ts = &timeout; 3234 } 3235 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3236 } 3237 3238 static int 3239 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3240 { 3241 return do_cv_signal(td, uap->obj); 3242 } 3243 3244 static int 3245 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3246 { 3247 return do_cv_broadcast(td, uap->obj); 3248 } 3249 3250 static int 3251 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3252 { 3253 struct _umtx_time timeout; 3254 int error; 3255 3256 /* Allow a null timespec (wait forever). */ 3257 if (uap->uaddr2 == NULL) { 3258 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3259 } else { 3260 error = umtx_copyin_umtx_time(uap->uaddr2, 3261 (size_t)uap->uaddr1, &timeout); 3262 if (error != 0) 3263 return (error); 3264 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3265 } 3266 return (error); 3267 } 3268 3269 static int 3270 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3271 { 3272 struct _umtx_time timeout; 3273 int error; 3274 3275 /* Allow a null timespec (wait forever). */ 3276 if (uap->uaddr2 == NULL) { 3277 error = do_rw_wrlock(td, uap->obj, 0); 3278 } else { 3279 error = umtx_copyin_umtx_time(uap->uaddr2, 3280 (size_t)uap->uaddr1, &timeout); 3281 if (error != 0) 3282 return (error); 3283 3284 error = do_rw_wrlock(td, uap->obj, &timeout); 3285 } 3286 return (error); 3287 } 3288 3289 static int 3290 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3291 { 3292 return do_rw_unlock(td, uap->obj); 3293 } 3294 3295 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3296 static int 3297 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3298 { 3299 struct _umtx_time *tm_p, timeout; 3300 int error; 3301 3302 /* Allow a null timespec (wait forever). */ 3303 if (uap->uaddr2 == NULL) 3304 tm_p = NULL; 3305 else { 3306 error = umtx_copyin_umtx_time( 3307 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3308 if (error != 0) 3309 return (error); 3310 tm_p = &timeout; 3311 } 3312 return (do_sem_wait(td, uap->obj, tm_p)); 3313 } 3314 3315 static int 3316 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3317 { 3318 return do_sem_wake(td, uap->obj); 3319 } 3320 #endif 3321 3322 static int 3323 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3324 { 3325 return do_wake2_umutex(td, uap->obj, uap->val); 3326 } 3327 3328 static int 3329 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3330 { 3331 struct _umtx_time *tm_p, timeout; 3332 int error; 3333 3334 /* Allow a null timespec (wait forever). */ 3335 if (uap->uaddr2 == NULL) 3336 tm_p = NULL; 3337 else { 3338 error = umtx_copyin_umtx_time( 3339 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3340 if (error != 0) 3341 return (error); 3342 tm_p = &timeout; 3343 } 3344 return (do_sem2_wait(td, uap->obj, tm_p)); 3345 } 3346 3347 static int 3348 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3349 { 3350 return do_sem2_wake(td, uap->obj); 3351 } 3352 3353 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3354 3355 static _umtx_op_func op_table[] = { 3356 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3357 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3358 __umtx_op_wait, /* UMTX_OP_WAIT */ 3359 __umtx_op_wake, /* UMTX_OP_WAKE */ 3360 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3361 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3362 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3363 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3364 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3365 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3366 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3367 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3368 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3369 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3370 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3371 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3372 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3373 __umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */ 3374 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3375 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3376 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3377 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3378 #else 3379 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3380 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3381 #endif 3382 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3383 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3384 __umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */ 3385 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3386 }; 3387 3388 int 3389 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3390 { 3391 if ((unsigned)uap->op < UMTX_OP_MAX) 3392 return (*op_table[uap->op])(td, uap); 3393 return (EINVAL); 3394 } 3395 3396 #ifdef COMPAT_FREEBSD32 3397 3398 struct timespec32 { 3399 int32_t tv_sec; 3400 int32_t tv_nsec; 3401 }; 3402 3403 struct umtx_time32 { 3404 struct timespec32 timeout; 3405 uint32_t flags; 3406 uint32_t clockid; 3407 }; 3408 3409 static inline int 3410 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3411 { 3412 struct timespec32 ts32; 3413 int error; 3414 3415 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3416 if (error == 0) { 3417 if (ts32.tv_sec < 0 || 3418 ts32.tv_nsec >= 1000000000 || 3419 ts32.tv_nsec < 0) 3420 error = EINVAL; 3421 else { 3422 tsp->tv_sec = ts32.tv_sec; 3423 tsp->tv_nsec = ts32.tv_nsec; 3424 } 3425 } 3426 return (error); 3427 } 3428 3429 static inline int 3430 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3431 { 3432 struct umtx_time32 t32; 3433 int error; 3434 3435 t32.clockid = CLOCK_REALTIME; 3436 t32.flags = 0; 3437 if (size <= sizeof(struct timespec32)) 3438 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3439 else 3440 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3441 if (error != 0) 3442 return (error); 3443 if (t32.timeout.tv_sec < 0 || 3444 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3445 return (EINVAL); 3446 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3447 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3448 tp->_flags = t32.flags; 3449 tp->_clockid = t32.clockid; 3450 return (0); 3451 } 3452 3453 static int 3454 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3455 { 3456 struct _umtx_time *tm_p, timeout; 3457 int error; 3458 3459 if (uap->uaddr2 == NULL) 3460 tm_p = NULL; 3461 else { 3462 error = umtx_copyin_umtx_time32(uap->uaddr2, 3463 (size_t)uap->uaddr1, &timeout); 3464 if (error != 0) 3465 return (error); 3466 tm_p = &timeout; 3467 } 3468 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3469 } 3470 3471 static int 3472 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3473 { 3474 struct _umtx_time *tm_p, timeout; 3475 int error; 3476 3477 /* Allow a null timespec (wait forever). */ 3478 if (uap->uaddr2 == NULL) 3479 tm_p = NULL; 3480 else { 3481 error = umtx_copyin_umtx_time(uap->uaddr2, 3482 (size_t)uap->uaddr1, &timeout); 3483 if (error != 0) 3484 return (error); 3485 tm_p = &timeout; 3486 } 3487 return do_lock_umutex(td, uap->obj, tm_p, 0); 3488 } 3489 3490 static int 3491 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3492 { 3493 struct _umtx_time *tm_p, timeout; 3494 int error; 3495 3496 /* Allow a null timespec (wait forever). */ 3497 if (uap->uaddr2 == NULL) 3498 tm_p = NULL; 3499 else { 3500 error = umtx_copyin_umtx_time32(uap->uaddr2, 3501 (size_t)uap->uaddr1, &timeout); 3502 if (error != 0) 3503 return (error); 3504 tm_p = &timeout; 3505 } 3506 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3507 } 3508 3509 static int 3510 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3511 { 3512 struct timespec *ts, timeout; 3513 int error; 3514 3515 /* Allow a null timespec (wait forever). */ 3516 if (uap->uaddr2 == NULL) 3517 ts = NULL; 3518 else { 3519 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3520 if (error != 0) 3521 return (error); 3522 ts = &timeout; 3523 } 3524 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3525 } 3526 3527 static int 3528 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3529 { 3530 struct _umtx_time timeout; 3531 int error; 3532 3533 /* Allow a null timespec (wait forever). */ 3534 if (uap->uaddr2 == NULL) { 3535 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3536 } else { 3537 error = umtx_copyin_umtx_time32(uap->uaddr2, 3538 (size_t)uap->uaddr1, &timeout); 3539 if (error != 0) 3540 return (error); 3541 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3542 } 3543 return (error); 3544 } 3545 3546 static int 3547 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3548 { 3549 struct _umtx_time timeout; 3550 int error; 3551 3552 /* Allow a null timespec (wait forever). */ 3553 if (uap->uaddr2 == NULL) { 3554 error = do_rw_wrlock(td, uap->obj, 0); 3555 } else { 3556 error = umtx_copyin_umtx_time32(uap->uaddr2, 3557 (size_t)uap->uaddr1, &timeout); 3558 if (error != 0) 3559 return (error); 3560 error = do_rw_wrlock(td, uap->obj, &timeout); 3561 } 3562 return (error); 3563 } 3564 3565 static int 3566 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3567 { 3568 struct _umtx_time *tm_p, timeout; 3569 int error; 3570 3571 if (uap->uaddr2 == NULL) 3572 tm_p = NULL; 3573 else { 3574 error = umtx_copyin_umtx_time32( 3575 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3576 if (error != 0) 3577 return (error); 3578 tm_p = &timeout; 3579 } 3580 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3581 } 3582 3583 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3584 static int 3585 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3586 { 3587 struct _umtx_time *tm_p, timeout; 3588 int error; 3589 3590 /* Allow a null timespec (wait forever). */ 3591 if (uap->uaddr2 == NULL) 3592 tm_p = NULL; 3593 else { 3594 error = umtx_copyin_umtx_time32(uap->uaddr2, 3595 (size_t)uap->uaddr1, &timeout); 3596 if (error != 0) 3597 return (error); 3598 tm_p = &timeout; 3599 } 3600 return (do_sem_wait(td, uap->obj, tm_p)); 3601 } 3602 #endif 3603 3604 static int 3605 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3606 { 3607 struct _umtx_time *tm_p, timeout; 3608 int error; 3609 3610 /* Allow a null timespec (wait forever). */ 3611 if (uap->uaddr2 == NULL) 3612 tm_p = NULL; 3613 else { 3614 error = umtx_copyin_umtx_time32(uap->uaddr2, 3615 (size_t)uap->uaddr1, &timeout); 3616 if (error != 0) 3617 return (error); 3618 tm_p = &timeout; 3619 } 3620 return (do_sem2_wait(td, uap->obj, tm_p)); 3621 } 3622 3623 static int 3624 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3625 { 3626 int count = uap->val; 3627 uint32_t uaddrs[BATCH_SIZE]; 3628 uint32_t **upp = (uint32_t **)uap->obj; 3629 int tocopy; 3630 int error = 0; 3631 int i, pos = 0; 3632 3633 while (count > 0) { 3634 tocopy = count; 3635 if (tocopy > BATCH_SIZE) 3636 tocopy = BATCH_SIZE; 3637 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3638 if (error != 0) 3639 break; 3640 for (i = 0; i < tocopy; ++i) 3641 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3642 INT_MAX, 1); 3643 count -= tocopy; 3644 pos += tocopy; 3645 } 3646 return (error); 3647 } 3648 3649 static _umtx_op_func op_table_compat32[] = { 3650 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3651 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3652 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3653 __umtx_op_wake, /* UMTX_OP_WAKE */ 3654 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3655 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3656 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3657 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3658 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3659 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3660 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3661 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3662 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3663 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3664 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3665 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3666 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3667 __umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */ 3668 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3669 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3670 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3671 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3672 #else 3673 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3674 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3675 #endif 3676 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3677 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3678 __umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */ 3679 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3680 }; 3681 3682 int 3683 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3684 { 3685 if ((unsigned)uap->op < UMTX_OP_MAX) 3686 return (*op_table_compat32[uap->op])(td, 3687 (struct _umtx_op_args *)uap); 3688 return (EINVAL); 3689 } 3690 #endif 3691 3692 void 3693 umtx_thread_init(struct thread *td) 3694 { 3695 td->td_umtxq = umtxq_alloc(); 3696 td->td_umtxq->uq_thread = td; 3697 } 3698 3699 void 3700 umtx_thread_fini(struct thread *td) 3701 { 3702 umtxq_free(td->td_umtxq); 3703 } 3704 3705 /* 3706 * It will be called when new thread is created, e.g fork(). 3707 */ 3708 void 3709 umtx_thread_alloc(struct thread *td) 3710 { 3711 struct umtx_q *uq; 3712 3713 uq = td->td_umtxq; 3714 uq->uq_inherited_pri = PRI_MAX; 3715 3716 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3717 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3718 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3719 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3720 } 3721 3722 /* 3723 * exec() hook. 3724 */ 3725 static void 3726 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3727 struct image_params *imgp __unused) 3728 { 3729 umtx_thread_cleanup(curthread); 3730 } 3731 3732 /* 3733 * thread_exit() hook. 3734 */ 3735 void 3736 umtx_thread_exit(struct thread *td) 3737 { 3738 umtx_thread_cleanup(td); 3739 } 3740 3741 /* 3742 * clean up umtx data. 3743 */ 3744 static void 3745 umtx_thread_cleanup(struct thread *td) 3746 { 3747 struct umtx_q *uq; 3748 struct umtx_pi *pi; 3749 3750 if ((uq = td->td_umtxq) == NULL) 3751 return; 3752 3753 mtx_lock_spin(&umtx_lock); 3754 uq->uq_inherited_pri = PRI_MAX; 3755 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3756 pi->pi_owner = NULL; 3757 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3758 } 3759 mtx_unlock_spin(&umtx_lock); 3760 thread_lock(td); 3761 sched_lend_user_prio(td, PRI_MAX); 3762 thread_unlock(td); 3763 } 3764