1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy")) 173 174 /* 175 * Don't propagate time-sharing priority, there is a security reason, 176 * a user can simply introduce PI-mutex, let thread A lock the mutex, 177 * and let another thread B block on the mutex, because B is 178 * sleeping, its priority will be boosted, this causes A's priority to 179 * be boosted via priority propagating too and will never be lowered even 180 * if it is using 100%CPU, this is unfair to other processes. 181 */ 182 183 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 184 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 185 PRI_MAX_TIMESHARE : (td)->td_user_pri) 186 187 #define GOLDEN_RATIO_PRIME 2654404609U 188 #define UMTX_CHAINS 512 189 #define UMTX_SHIFTS (__WORD_BIT - 9) 190 191 #define GET_SHARE(flags) \ 192 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 193 194 #define BUSY_SPINS 200 195 196 struct abs_timeout { 197 int clockid; 198 struct timespec cur; 199 struct timespec end; 200 }; 201 202 static uma_zone_t umtx_pi_zone; 203 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 204 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 205 static int umtx_pi_allocated; 206 207 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 208 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 209 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 210 211 #ifdef UMTX_PROFILING 212 static long max_length; 213 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 214 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 215 #endif 216 217 static void umtxq_sysinit(void *); 218 static void umtxq_hash(struct umtx_key *key); 219 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 220 static void umtxq_lock(struct umtx_key *key); 221 static void umtxq_unlock(struct umtx_key *key); 222 static void umtxq_busy(struct umtx_key *key); 223 static void umtxq_unbusy(struct umtx_key *key); 224 static void umtxq_insert_queue(struct umtx_q *uq, int q); 225 static void umtxq_remove_queue(struct umtx_q *uq, int q); 226 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 227 static int umtxq_count(struct umtx_key *key); 228 static struct umtx_pi *umtx_pi_alloc(int); 229 static void umtx_pi_free(struct umtx_pi *pi); 230 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 231 static void umtx_thread_cleanup(struct thread *td); 232 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 233 struct image_params *imgp __unused); 234 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 235 236 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 237 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 238 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 239 240 static struct mtx umtx_lock; 241 242 #ifdef UMTX_PROFILING 243 static void 244 umtx_init_profiling(void) 245 { 246 struct sysctl_oid *chain_oid; 247 char chain_name[10]; 248 int i; 249 250 for (i = 0; i < UMTX_CHAINS; ++i) { 251 snprintf(chain_name, sizeof(chain_name), "%d", i); 252 chain_oid = SYSCTL_ADD_NODE(NULL, 253 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 254 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 255 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 256 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 257 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 258 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 259 } 260 } 261 262 static int 263 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 264 { 265 char buf[512]; 266 struct sbuf sb; 267 struct umtxq_chain *uc; 268 u_int fract, i, j, tot, whole; 269 u_int sf0, sf1, sf2, sf3, sf4; 270 u_int si0, si1, si2, si3, si4; 271 u_int sw0, sw1, sw2, sw3, sw4; 272 273 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 274 for (i = 0; i < 2; i++) { 275 tot = 0; 276 for (j = 0; j < UMTX_CHAINS; ++j) { 277 uc = &umtxq_chains[i][j]; 278 mtx_lock(&uc->uc_lock); 279 tot += uc->max_length; 280 mtx_unlock(&uc->uc_lock); 281 } 282 if (tot == 0) 283 sbuf_printf(&sb, "%u) Empty ", i); 284 else { 285 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 286 si0 = si1 = si2 = si3 = si4 = 0; 287 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 288 for (j = 0; j < UMTX_CHAINS; j++) { 289 uc = &umtxq_chains[i][j]; 290 mtx_lock(&uc->uc_lock); 291 whole = uc->max_length * 100; 292 mtx_unlock(&uc->uc_lock); 293 fract = (whole % tot) * 100; 294 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 295 sf0 = fract; 296 si0 = j; 297 sw0 = whole; 298 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 299 sf1)) { 300 sf1 = fract; 301 si1 = j; 302 sw1 = whole; 303 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 304 sf2)) { 305 sf2 = fract; 306 si2 = j; 307 sw2 = whole; 308 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 309 sf3)) { 310 sf3 = fract; 311 si3 = j; 312 sw3 = whole; 313 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 314 sf4)) { 315 sf4 = fract; 316 si4 = j; 317 sw4 = whole; 318 } 319 } 320 sbuf_printf(&sb, "queue %u:\n", i); 321 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 322 sf0 / tot, si0); 323 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 324 sf1 / tot, si1); 325 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 326 sf2 / tot, si2); 327 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 328 sf3 / tot, si3); 329 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 330 sf4 / tot, si4); 331 } 332 } 333 sbuf_trim(&sb); 334 sbuf_finish(&sb); 335 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 336 sbuf_delete(&sb); 337 return (0); 338 } 339 340 static int 341 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 342 { 343 struct umtxq_chain *uc; 344 u_int i, j; 345 int clear, error; 346 347 clear = 0; 348 error = sysctl_handle_int(oidp, &clear, 0, req); 349 if (error != 0 || req->newptr == NULL) 350 return (error); 351 352 if (clear != 0) { 353 for (i = 0; i < 2; ++i) { 354 for (j = 0; j < UMTX_CHAINS; ++j) { 355 uc = &umtxq_chains[i][j]; 356 mtx_lock(&uc->uc_lock); 357 uc->length = 0; 358 uc->max_length = 0; 359 mtx_unlock(&uc->uc_lock); 360 } 361 } 362 } 363 return (0); 364 } 365 366 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 367 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 368 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 369 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 370 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 371 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 372 #endif 373 374 static void 375 umtxq_sysinit(void *arg __unused) 376 { 377 int i, j; 378 379 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 380 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 381 for (i = 0; i < 2; ++i) { 382 for (j = 0; j < UMTX_CHAINS; ++j) { 383 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 384 MTX_DEF | MTX_DUPOK); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 386 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 387 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 388 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 389 umtxq_chains[i][j].uc_busy = 0; 390 umtxq_chains[i][j].uc_waiters = 0; 391 #ifdef UMTX_PROFILING 392 umtxq_chains[i][j].length = 0; 393 umtxq_chains[i][j].max_length = 0; 394 #endif 395 } 396 } 397 #ifdef UMTX_PROFILING 398 umtx_init_profiling(); 399 #endif 400 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 401 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 402 EVENTHANDLER_PRI_ANY); 403 } 404 405 struct umtx_q * 406 umtxq_alloc(void) 407 { 408 struct umtx_q *uq; 409 410 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 411 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 412 TAILQ_INIT(&uq->uq_spare_queue->head); 413 TAILQ_INIT(&uq->uq_pi_contested); 414 uq->uq_inherited_pri = PRI_MAX; 415 return (uq); 416 } 417 418 void 419 umtxq_free(struct umtx_q *uq) 420 { 421 MPASS(uq->uq_spare_queue != NULL); 422 free(uq->uq_spare_queue, M_UMTX); 423 free(uq, M_UMTX); 424 } 425 426 static inline void 427 umtxq_hash(struct umtx_key *key) 428 { 429 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 430 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 431 } 432 433 static inline struct umtxq_chain * 434 umtxq_getchain(struct umtx_key *key) 435 { 436 if (key->type <= TYPE_SEM) 437 return (&umtxq_chains[1][key->hash]); 438 return (&umtxq_chains[0][key->hash]); 439 } 440 441 /* 442 * Lock a chain. 443 */ 444 static inline void 445 umtxq_lock(struct umtx_key *key) 446 { 447 struct umtxq_chain *uc; 448 449 uc = umtxq_getchain(key); 450 mtx_lock(&uc->uc_lock); 451 } 452 453 /* 454 * Unlock a chain. 455 */ 456 static inline void 457 umtxq_unlock(struct umtx_key *key) 458 { 459 struct umtxq_chain *uc; 460 461 uc = umtxq_getchain(key); 462 mtx_unlock(&uc->uc_lock); 463 } 464 465 /* 466 * Set chain to busy state when following operation 467 * may be blocked (kernel mutex can not be used). 468 */ 469 static inline void 470 umtxq_busy(struct umtx_key *key) 471 { 472 struct umtxq_chain *uc; 473 474 uc = umtxq_getchain(key); 475 mtx_assert(&uc->uc_lock, MA_OWNED); 476 if (uc->uc_busy) { 477 #ifdef SMP 478 if (smp_cpus > 1) { 479 int count = BUSY_SPINS; 480 if (count > 0) { 481 umtxq_unlock(key); 482 while (uc->uc_busy && --count > 0) 483 cpu_spinwait(); 484 umtxq_lock(key); 485 } 486 } 487 #endif 488 while (uc->uc_busy) { 489 uc->uc_waiters++; 490 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 491 uc->uc_waiters--; 492 } 493 } 494 uc->uc_busy = 1; 495 } 496 497 /* 498 * Unbusy a chain. 499 */ 500 static inline void 501 umtxq_unbusy(struct umtx_key *key) 502 { 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_assert(&uc->uc_lock, MA_OWNED); 507 KASSERT(uc->uc_busy != 0, ("not busy")); 508 uc->uc_busy = 0; 509 if (uc->uc_waiters) 510 wakeup_one(uc); 511 } 512 513 static inline void 514 umtxq_unbusy_unlocked(struct umtx_key *key) 515 { 516 517 umtxq_lock(key); 518 umtxq_unbusy(key); 519 umtxq_unlock(key); 520 } 521 522 static struct umtxq_queue * 523 umtxq_queue_lookup(struct umtx_key *key, int q) 524 { 525 struct umtxq_queue *uh; 526 struct umtxq_chain *uc; 527 528 uc = umtxq_getchain(key); 529 UMTXQ_LOCKED_ASSERT(uc); 530 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 531 if (umtx_key_match(&uh->key, key)) 532 return (uh); 533 } 534 535 return (NULL); 536 } 537 538 static inline void 539 umtxq_insert_queue(struct umtx_q *uq, int q) 540 { 541 struct umtxq_queue *uh; 542 struct umtxq_chain *uc; 543 544 uc = umtxq_getchain(&uq->uq_key); 545 UMTXQ_LOCKED_ASSERT(uc); 546 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 547 uh = umtxq_queue_lookup(&uq->uq_key, q); 548 if (uh != NULL) { 549 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 550 } else { 551 uh = uq->uq_spare_queue; 552 uh->key = uq->uq_key; 553 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 554 #ifdef UMTX_PROFILING 555 uc->length++; 556 if (uc->length > uc->max_length) { 557 uc->max_length = uc->length; 558 if (uc->max_length > max_length) 559 max_length = uc->max_length; 560 } 561 #endif 562 } 563 uq->uq_spare_queue = NULL; 564 565 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 566 uh->length++; 567 uq->uq_flags |= UQF_UMTXQ; 568 uq->uq_cur_queue = uh; 569 return; 570 } 571 572 static inline void 573 umtxq_remove_queue(struct umtx_q *uq, int q) 574 { 575 struct umtxq_chain *uc; 576 struct umtxq_queue *uh; 577 578 uc = umtxq_getchain(&uq->uq_key); 579 UMTXQ_LOCKED_ASSERT(uc); 580 if (uq->uq_flags & UQF_UMTXQ) { 581 uh = uq->uq_cur_queue; 582 TAILQ_REMOVE(&uh->head, uq, uq_link); 583 uh->length--; 584 uq->uq_flags &= ~UQF_UMTXQ; 585 if (TAILQ_EMPTY(&uh->head)) { 586 KASSERT(uh->length == 0, 587 ("inconsistent umtxq_queue length")); 588 #ifdef UMTX_PROFILING 589 uc->length--; 590 #endif 591 LIST_REMOVE(uh, link); 592 } else { 593 uh = LIST_FIRST(&uc->uc_spare_queue); 594 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 595 LIST_REMOVE(uh, link); 596 } 597 uq->uq_spare_queue = uh; 598 uq->uq_cur_queue = NULL; 599 } 600 } 601 602 /* 603 * Check if there are multiple waiters 604 */ 605 static int 606 umtxq_count(struct umtx_key *key) 607 { 608 struct umtxq_chain *uc; 609 struct umtxq_queue *uh; 610 611 uc = umtxq_getchain(key); 612 UMTXQ_LOCKED_ASSERT(uc); 613 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 614 if (uh != NULL) 615 return (uh->length); 616 return (0); 617 } 618 619 /* 620 * Check if there are multiple PI waiters and returns first 621 * waiter. 622 */ 623 static int 624 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 625 { 626 struct umtxq_chain *uc; 627 struct umtxq_queue *uh; 628 629 *first = NULL; 630 uc = umtxq_getchain(key); 631 UMTXQ_LOCKED_ASSERT(uc); 632 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 633 if (uh != NULL) { 634 *first = TAILQ_FIRST(&uh->head); 635 return (uh->length); 636 } 637 return (0); 638 } 639 640 static int 641 umtxq_check_susp(struct thread *td) 642 { 643 struct proc *p; 644 int error; 645 646 /* 647 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 648 * eventually break the lockstep loop. 649 */ 650 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 651 return (0); 652 error = 0; 653 p = td->td_proc; 654 PROC_LOCK(p); 655 if (P_SHOULDSTOP(p) || 656 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 657 if (p->p_flag & P_SINGLE_EXIT) 658 error = EINTR; 659 else 660 error = ERESTART; 661 } 662 PROC_UNLOCK(p); 663 return (error); 664 } 665 666 /* 667 * Wake up threads waiting on an userland object. 668 */ 669 670 static int 671 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 672 { 673 struct umtxq_chain *uc; 674 struct umtxq_queue *uh; 675 struct umtx_q *uq; 676 int ret; 677 678 ret = 0; 679 uc = umtxq_getchain(key); 680 UMTXQ_LOCKED_ASSERT(uc); 681 uh = umtxq_queue_lookup(key, q); 682 if (uh != NULL) { 683 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 684 umtxq_remove_queue(uq, q); 685 wakeup(uq); 686 if (++ret >= n_wake) 687 return (ret); 688 } 689 } 690 return (ret); 691 } 692 693 694 /* 695 * Wake up specified thread. 696 */ 697 static inline void 698 umtxq_signal_thread(struct umtx_q *uq) 699 { 700 struct umtxq_chain *uc; 701 702 uc = umtxq_getchain(&uq->uq_key); 703 UMTXQ_LOCKED_ASSERT(uc); 704 umtxq_remove(uq); 705 wakeup(uq); 706 } 707 708 static inline int 709 tstohz(const struct timespec *tsp) 710 { 711 struct timeval tv; 712 713 TIMESPEC_TO_TIMEVAL(&tv, tsp); 714 return tvtohz(&tv); 715 } 716 717 static void 718 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 719 const struct timespec *timeout) 720 { 721 722 timo->clockid = clockid; 723 if (!absolute) { 724 kern_clock_gettime(curthread, clockid, &timo->end); 725 timo->cur = timo->end; 726 timespecadd(&timo->end, timeout); 727 } else { 728 timo->end = *timeout; 729 kern_clock_gettime(curthread, clockid, &timo->cur); 730 } 731 } 732 733 static void 734 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 735 { 736 737 abs_timeout_init(timo, umtxtime->_clockid, 738 (umtxtime->_flags & UMTX_ABSTIME) != 0, 739 &umtxtime->_timeout); 740 } 741 742 static inline void 743 abs_timeout_update(struct abs_timeout *timo) 744 { 745 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 746 } 747 748 static int 749 abs_timeout_gethz(struct abs_timeout *timo) 750 { 751 struct timespec tts; 752 753 if (timespeccmp(&timo->end, &timo->cur, <=)) 754 return (-1); 755 tts = timo->end; 756 timespecsub(&tts, &timo->cur); 757 return (tstohz(&tts)); 758 } 759 760 /* 761 * Put thread into sleep state, before sleeping, check if 762 * thread was removed from umtx queue. 763 */ 764 static inline int 765 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 766 { 767 struct umtxq_chain *uc; 768 int error, timo; 769 770 uc = umtxq_getchain(&uq->uq_key); 771 UMTXQ_LOCKED_ASSERT(uc); 772 for (;;) { 773 if (!(uq->uq_flags & UQF_UMTXQ)) 774 return (0); 775 if (abstime != NULL) { 776 timo = abs_timeout_gethz(abstime); 777 if (timo < 0) 778 return (ETIMEDOUT); 779 } else 780 timo = 0; 781 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 782 if (error != EWOULDBLOCK) { 783 umtxq_lock(&uq->uq_key); 784 break; 785 } 786 if (abstime != NULL) 787 abs_timeout_update(abstime); 788 umtxq_lock(&uq->uq_key); 789 } 790 return (error); 791 } 792 793 /* 794 * Convert userspace address into unique logical address. 795 */ 796 int 797 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 798 { 799 struct thread *td = curthread; 800 vm_map_t map; 801 vm_map_entry_t entry; 802 vm_pindex_t pindex; 803 vm_prot_t prot; 804 boolean_t wired; 805 806 key->type = type; 807 if (share == THREAD_SHARE) { 808 key->shared = 0; 809 key->info.private.vs = td->td_proc->p_vmspace; 810 key->info.private.addr = (uintptr_t)addr; 811 } else { 812 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 813 map = &td->td_proc->p_vmspace->vm_map; 814 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 815 &entry, &key->info.shared.object, &pindex, &prot, 816 &wired) != KERN_SUCCESS) { 817 return EFAULT; 818 } 819 820 if ((share == PROCESS_SHARE) || 821 (share == AUTO_SHARE && 822 VM_INHERIT_SHARE == entry->inheritance)) { 823 key->shared = 1; 824 key->info.shared.offset = entry->offset + entry->start - 825 (vm_offset_t)addr; 826 vm_object_reference(key->info.shared.object); 827 } else { 828 key->shared = 0; 829 key->info.private.vs = td->td_proc->p_vmspace; 830 key->info.private.addr = (uintptr_t)addr; 831 } 832 vm_map_lookup_done(map, entry); 833 } 834 835 umtxq_hash(key); 836 return (0); 837 } 838 839 /* 840 * Release key. 841 */ 842 void 843 umtx_key_release(struct umtx_key *key) 844 { 845 if (key->shared) 846 vm_object_deallocate(key->info.shared.object); 847 } 848 849 /* 850 * Fetch and compare value, sleep on the address if value is not changed. 851 */ 852 static int 853 do_wait(struct thread *td, void *addr, u_long id, 854 struct _umtx_time *timeout, int compat32, int is_private) 855 { 856 struct abs_timeout timo; 857 struct umtx_q *uq; 858 u_long tmp; 859 uint32_t tmp32; 860 int error = 0; 861 862 uq = td->td_umtxq; 863 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 864 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 865 return (error); 866 867 if (timeout != NULL) 868 abs_timeout_init2(&timo, timeout); 869 870 umtxq_lock(&uq->uq_key); 871 umtxq_insert(uq); 872 umtxq_unlock(&uq->uq_key); 873 if (compat32 == 0) { 874 error = fueword(addr, &tmp); 875 if (error != 0) 876 error = EFAULT; 877 } else { 878 error = fueword32(addr, &tmp32); 879 if (error == 0) 880 tmp = tmp32; 881 else 882 error = EFAULT; 883 } 884 umtxq_lock(&uq->uq_key); 885 if (error == 0) { 886 if (tmp == id) 887 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 888 NULL : &timo); 889 if ((uq->uq_flags & UQF_UMTXQ) == 0) 890 error = 0; 891 else 892 umtxq_remove(uq); 893 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 894 umtxq_remove(uq); 895 } 896 umtxq_unlock(&uq->uq_key); 897 umtx_key_release(&uq->uq_key); 898 if (error == ERESTART) 899 error = EINTR; 900 return (error); 901 } 902 903 /* 904 * Wake up threads sleeping on the specified address. 905 */ 906 int 907 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 908 { 909 struct umtx_key key; 910 int ret; 911 912 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 913 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 914 return (ret); 915 umtxq_lock(&key); 916 ret = umtxq_signal(&key, n_wake); 917 umtxq_unlock(&key); 918 umtx_key_release(&key); 919 return (0); 920 } 921 922 /* 923 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 924 */ 925 static int 926 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 927 struct _umtx_time *timeout, int mode) 928 { 929 struct abs_timeout timo; 930 struct umtx_q *uq; 931 uint32_t owner, old, id; 932 int error, rv; 933 934 id = td->td_tid; 935 uq = td->td_umtxq; 936 error = 0; 937 if (timeout != NULL) 938 abs_timeout_init2(&timo, timeout); 939 940 /* 941 * Care must be exercised when dealing with umtx structure. It 942 * can fault on any access. 943 */ 944 for (;;) { 945 rv = fueword32(__DEVOLATILE(void *, &m->m_owner), &owner); 946 if (rv == -1) 947 return (EFAULT); 948 if (mode == _UMUTEX_WAIT) { 949 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 950 return (0); 951 } else { 952 /* 953 * Try the uncontested case. This should be done in userland. 954 */ 955 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 956 &owner, id); 957 /* The address was invalid. */ 958 if (rv == -1) 959 return (EFAULT); 960 961 /* The acquire succeeded. */ 962 if (owner == UMUTEX_UNOWNED) 963 return (0); 964 965 /* If no one owns it but it is contested try to acquire it. */ 966 if (owner == UMUTEX_CONTESTED) { 967 rv = casueword32(&m->m_owner, 968 UMUTEX_CONTESTED, &owner, 969 id | UMUTEX_CONTESTED); 970 /* The address was invalid. */ 971 if (rv == -1) 972 return (EFAULT); 973 974 if (owner == UMUTEX_CONTESTED) 975 return (0); 976 977 rv = umtxq_check_susp(td); 978 if (rv != 0) 979 return (rv); 980 981 /* If this failed the lock has changed, restart. */ 982 continue; 983 } 984 } 985 986 if (mode == _UMUTEX_TRY) 987 return (EBUSY); 988 989 /* 990 * If we caught a signal, we have retried and now 991 * exit immediately. 992 */ 993 if (error != 0) 994 return (error); 995 996 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 997 GET_SHARE(flags), &uq->uq_key)) != 0) 998 return (error); 999 1000 umtxq_lock(&uq->uq_key); 1001 umtxq_busy(&uq->uq_key); 1002 umtxq_insert(uq); 1003 umtxq_unlock(&uq->uq_key); 1004 1005 /* 1006 * Set the contested bit so that a release in user space 1007 * knows to use the system call for unlock. If this fails 1008 * either some one else has acquired the lock or it has been 1009 * released. 1010 */ 1011 rv = casueword32(&m->m_owner, owner, &old, 1012 owner | UMUTEX_CONTESTED); 1013 1014 /* The address was invalid. */ 1015 if (rv == -1) { 1016 umtxq_lock(&uq->uq_key); 1017 umtxq_remove(uq); 1018 umtxq_unbusy(&uq->uq_key); 1019 umtxq_unlock(&uq->uq_key); 1020 umtx_key_release(&uq->uq_key); 1021 return (EFAULT); 1022 } 1023 1024 /* 1025 * We set the contested bit, sleep. Otherwise the lock changed 1026 * and we need to retry or we lost a race to the thread 1027 * unlocking the umtx. 1028 */ 1029 umtxq_lock(&uq->uq_key); 1030 umtxq_unbusy(&uq->uq_key); 1031 if (old == owner) 1032 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1033 NULL : &timo); 1034 umtxq_remove(uq); 1035 umtxq_unlock(&uq->uq_key); 1036 umtx_key_release(&uq->uq_key); 1037 1038 if (error == 0) 1039 error = umtxq_check_susp(td); 1040 } 1041 1042 return (0); 1043 } 1044 1045 /* 1046 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1047 */ 1048 static int 1049 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1050 { 1051 struct umtx_key key; 1052 uint32_t owner, old, id; 1053 int error; 1054 int count; 1055 1056 id = td->td_tid; 1057 /* 1058 * Make sure we own this mtx. 1059 */ 1060 error = fueword32(__DEVOLATILE(uint32_t *, &m->m_owner), &owner); 1061 if (error == -1) 1062 return (EFAULT); 1063 1064 if ((owner & ~UMUTEX_CONTESTED) != id) 1065 return (EPERM); 1066 1067 if ((owner & UMUTEX_CONTESTED) == 0) { 1068 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1069 if (error == -1) 1070 return (EFAULT); 1071 if (old == owner) 1072 return (0); 1073 owner = old; 1074 } 1075 1076 /* We should only ever be in here for contested locks */ 1077 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1078 &key)) != 0) 1079 return (error); 1080 1081 umtxq_lock(&key); 1082 umtxq_busy(&key); 1083 count = umtxq_count(&key); 1084 umtxq_unlock(&key); 1085 1086 /* 1087 * When unlocking the umtx, it must be marked as unowned if 1088 * there is zero or one thread only waiting for it. 1089 * Otherwise, it must be marked as contested. 1090 */ 1091 error = casueword32(&m->m_owner, owner, &old, 1092 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1093 umtxq_lock(&key); 1094 umtxq_signal(&key,1); 1095 umtxq_unbusy(&key); 1096 umtxq_unlock(&key); 1097 umtx_key_release(&key); 1098 if (error == -1) 1099 return (EFAULT); 1100 if (old != owner) 1101 return (EINVAL); 1102 return (0); 1103 } 1104 1105 /* 1106 * Check if the mutex is available and wake up a waiter, 1107 * only for simple mutex. 1108 */ 1109 static int 1110 do_wake_umutex(struct thread *td, struct umutex *m) 1111 { 1112 struct umtx_key key; 1113 uint32_t owner; 1114 uint32_t flags; 1115 int error; 1116 int count; 1117 1118 error = fueword32(__DEVOLATILE(uint32_t *, &m->m_owner), &owner); 1119 if (error == -1) 1120 return (EFAULT); 1121 1122 if ((owner & ~UMUTEX_CONTESTED) != 0) 1123 return (0); 1124 1125 error = fueword32(&m->m_flags, &flags); 1126 if (error == -1) 1127 return (EFAULT); 1128 1129 /* We should only ever be in here for contested locks */ 1130 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1131 &key)) != 0) 1132 return (error); 1133 1134 umtxq_lock(&key); 1135 umtxq_busy(&key); 1136 count = umtxq_count(&key); 1137 umtxq_unlock(&key); 1138 1139 if (count <= 1) { 1140 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1141 UMUTEX_UNOWNED); 1142 if (error == -1) 1143 error = EFAULT; 1144 } 1145 1146 umtxq_lock(&key); 1147 if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1148 umtxq_signal(&key, 1); 1149 umtxq_unbusy(&key); 1150 umtxq_unlock(&key); 1151 umtx_key_release(&key); 1152 return (error); 1153 } 1154 1155 /* 1156 * Check if the mutex has waiters and tries to fix contention bit. 1157 */ 1158 static int 1159 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1160 { 1161 struct umtx_key key; 1162 uint32_t owner, old; 1163 int type; 1164 int error; 1165 int count; 1166 1167 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1168 case 0: 1169 type = TYPE_NORMAL_UMUTEX; 1170 break; 1171 case UMUTEX_PRIO_INHERIT: 1172 type = TYPE_PI_UMUTEX; 1173 break; 1174 case UMUTEX_PRIO_PROTECT: 1175 type = TYPE_PP_UMUTEX; 1176 break; 1177 default: 1178 return (EINVAL); 1179 } 1180 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1181 &key)) != 0) 1182 return (error); 1183 1184 owner = 0; 1185 umtxq_lock(&key); 1186 umtxq_busy(&key); 1187 count = umtxq_count(&key); 1188 umtxq_unlock(&key); 1189 /* 1190 * Only repair contention bit if there is a waiter, this means the mutex 1191 * is still being referenced by userland code, otherwise don't update 1192 * any memory. 1193 */ 1194 if (count > 1) { 1195 error = fueword32(__DEVOLATILE(uint32_t *, &m->m_owner), 1196 &owner); 1197 if (error == -1) 1198 error = EFAULT; 1199 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1200 error = casueword32(&m->m_owner, owner, &old, 1201 owner | UMUTEX_CONTESTED); 1202 if (error == -1) { 1203 error = EFAULT; 1204 break; 1205 } 1206 if (old == owner) 1207 break; 1208 owner = old; 1209 error = umtxq_check_susp(td); 1210 if (error != 0) 1211 break; 1212 } 1213 } else if (count == 1) { 1214 error = fueword32(__DEVOLATILE(uint32_t *, &m->m_owner), 1215 &owner); 1216 if (error == -1) 1217 error = EFAULT; 1218 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1219 (owner & UMUTEX_CONTESTED) == 0) { 1220 error = casueword32(&m->m_owner, owner, &old, 1221 owner | UMUTEX_CONTESTED); 1222 if (error == -1) { 1223 error = EFAULT; 1224 break; 1225 } 1226 if (old == owner) 1227 break; 1228 owner = old; 1229 error = umtxq_check_susp(td); 1230 if (error != 0) 1231 break; 1232 } 1233 } 1234 umtxq_lock(&key); 1235 if (error == EFAULT) { 1236 umtxq_signal(&key, INT_MAX); 1237 } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1238 umtxq_signal(&key, 1); 1239 umtxq_unbusy(&key); 1240 umtxq_unlock(&key); 1241 umtx_key_release(&key); 1242 return (error); 1243 } 1244 1245 static inline struct umtx_pi * 1246 umtx_pi_alloc(int flags) 1247 { 1248 struct umtx_pi *pi; 1249 1250 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1251 TAILQ_INIT(&pi->pi_blocked); 1252 atomic_add_int(&umtx_pi_allocated, 1); 1253 return (pi); 1254 } 1255 1256 static inline void 1257 umtx_pi_free(struct umtx_pi *pi) 1258 { 1259 uma_zfree(umtx_pi_zone, pi); 1260 atomic_add_int(&umtx_pi_allocated, -1); 1261 } 1262 1263 /* 1264 * Adjust the thread's position on a pi_state after its priority has been 1265 * changed. 1266 */ 1267 static int 1268 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1269 { 1270 struct umtx_q *uq, *uq1, *uq2; 1271 struct thread *td1; 1272 1273 mtx_assert(&umtx_lock, MA_OWNED); 1274 if (pi == NULL) 1275 return (0); 1276 1277 uq = td->td_umtxq; 1278 1279 /* 1280 * Check if the thread needs to be moved on the blocked chain. 1281 * It needs to be moved if either its priority is lower than 1282 * the previous thread or higher than the next thread. 1283 */ 1284 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1285 uq2 = TAILQ_NEXT(uq, uq_lockq); 1286 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1287 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1288 /* 1289 * Remove thread from blocked chain and determine where 1290 * it should be moved to. 1291 */ 1292 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1293 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1294 td1 = uq1->uq_thread; 1295 MPASS(td1->td_proc->p_magic == P_MAGIC); 1296 if (UPRI(td1) > UPRI(td)) 1297 break; 1298 } 1299 1300 if (uq1 == NULL) 1301 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1302 else 1303 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1304 } 1305 return (1); 1306 } 1307 1308 /* 1309 * Propagate priority when a thread is blocked on POSIX 1310 * PI mutex. 1311 */ 1312 static void 1313 umtx_propagate_priority(struct thread *td) 1314 { 1315 struct umtx_q *uq; 1316 struct umtx_pi *pi; 1317 int pri; 1318 1319 mtx_assert(&umtx_lock, MA_OWNED); 1320 pri = UPRI(td); 1321 uq = td->td_umtxq; 1322 pi = uq->uq_pi_blocked; 1323 if (pi == NULL) 1324 return; 1325 1326 for (;;) { 1327 td = pi->pi_owner; 1328 if (td == NULL || td == curthread) 1329 return; 1330 1331 MPASS(td->td_proc != NULL); 1332 MPASS(td->td_proc->p_magic == P_MAGIC); 1333 1334 thread_lock(td); 1335 if (td->td_lend_user_pri > pri) 1336 sched_lend_user_prio(td, pri); 1337 else { 1338 thread_unlock(td); 1339 break; 1340 } 1341 thread_unlock(td); 1342 1343 /* 1344 * Pick up the lock that td is blocked on. 1345 */ 1346 uq = td->td_umtxq; 1347 pi = uq->uq_pi_blocked; 1348 if (pi == NULL) 1349 break; 1350 /* Resort td on the list if needed. */ 1351 umtx_pi_adjust_thread(pi, td); 1352 } 1353 } 1354 1355 /* 1356 * Unpropagate priority for a PI mutex when a thread blocked on 1357 * it is interrupted by signal or resumed by others. 1358 */ 1359 static void 1360 umtx_repropagate_priority(struct umtx_pi *pi) 1361 { 1362 struct umtx_q *uq, *uq_owner; 1363 struct umtx_pi *pi2; 1364 int pri; 1365 1366 mtx_assert(&umtx_lock, MA_OWNED); 1367 1368 while (pi != NULL && pi->pi_owner != NULL) { 1369 pri = PRI_MAX; 1370 uq_owner = pi->pi_owner->td_umtxq; 1371 1372 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1373 uq = TAILQ_FIRST(&pi2->pi_blocked); 1374 if (uq != NULL) { 1375 if (pri > UPRI(uq->uq_thread)) 1376 pri = UPRI(uq->uq_thread); 1377 } 1378 } 1379 1380 if (pri > uq_owner->uq_inherited_pri) 1381 pri = uq_owner->uq_inherited_pri; 1382 thread_lock(pi->pi_owner); 1383 sched_lend_user_prio(pi->pi_owner, pri); 1384 thread_unlock(pi->pi_owner); 1385 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1386 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1387 } 1388 } 1389 1390 /* 1391 * Insert a PI mutex into owned list. 1392 */ 1393 static void 1394 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1395 { 1396 struct umtx_q *uq_owner; 1397 1398 uq_owner = owner->td_umtxq; 1399 mtx_assert(&umtx_lock, MA_OWNED); 1400 if (pi->pi_owner != NULL) 1401 panic("pi_ower != NULL"); 1402 pi->pi_owner = owner; 1403 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1404 } 1405 1406 /* 1407 * Claim ownership of a PI mutex. 1408 */ 1409 static int 1410 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1411 { 1412 struct umtx_q *uq, *uq_owner; 1413 1414 uq_owner = owner->td_umtxq; 1415 mtx_lock_spin(&umtx_lock); 1416 if (pi->pi_owner == owner) { 1417 mtx_unlock_spin(&umtx_lock); 1418 return (0); 1419 } 1420 1421 if (pi->pi_owner != NULL) { 1422 /* 1423 * userland may have already messed the mutex, sigh. 1424 */ 1425 mtx_unlock_spin(&umtx_lock); 1426 return (EPERM); 1427 } 1428 umtx_pi_setowner(pi, owner); 1429 uq = TAILQ_FIRST(&pi->pi_blocked); 1430 if (uq != NULL) { 1431 int pri; 1432 1433 pri = UPRI(uq->uq_thread); 1434 thread_lock(owner); 1435 if (pri < UPRI(owner)) 1436 sched_lend_user_prio(owner, pri); 1437 thread_unlock(owner); 1438 } 1439 mtx_unlock_spin(&umtx_lock); 1440 return (0); 1441 } 1442 1443 /* 1444 * Adjust a thread's order position in its blocked PI mutex, 1445 * this may result new priority propagating process. 1446 */ 1447 void 1448 umtx_pi_adjust(struct thread *td, u_char oldpri) 1449 { 1450 struct umtx_q *uq; 1451 struct umtx_pi *pi; 1452 1453 uq = td->td_umtxq; 1454 mtx_lock_spin(&umtx_lock); 1455 /* 1456 * Pick up the lock that td is blocked on. 1457 */ 1458 pi = uq->uq_pi_blocked; 1459 if (pi != NULL) { 1460 umtx_pi_adjust_thread(pi, td); 1461 umtx_repropagate_priority(pi); 1462 } 1463 mtx_unlock_spin(&umtx_lock); 1464 } 1465 1466 /* 1467 * Sleep on a PI mutex. 1468 */ 1469 static int 1470 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1471 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1472 { 1473 struct umtxq_chain *uc; 1474 struct thread *td, *td1; 1475 struct umtx_q *uq1; 1476 int pri; 1477 int error = 0; 1478 1479 td = uq->uq_thread; 1480 KASSERT(td == curthread, ("inconsistent uq_thread")); 1481 uc = umtxq_getchain(&uq->uq_key); 1482 UMTXQ_LOCKED_ASSERT(uc); 1483 UMTXQ_BUSY_ASSERT(uc); 1484 umtxq_insert(uq); 1485 mtx_lock_spin(&umtx_lock); 1486 if (pi->pi_owner == NULL) { 1487 mtx_unlock_spin(&umtx_lock); 1488 /* XXX Only look up thread in current process. */ 1489 td1 = tdfind(owner, curproc->p_pid); 1490 mtx_lock_spin(&umtx_lock); 1491 if (td1 != NULL) { 1492 if (pi->pi_owner == NULL) 1493 umtx_pi_setowner(pi, td1); 1494 PROC_UNLOCK(td1->td_proc); 1495 } 1496 } 1497 1498 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1499 pri = UPRI(uq1->uq_thread); 1500 if (pri > UPRI(td)) 1501 break; 1502 } 1503 1504 if (uq1 != NULL) 1505 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1506 else 1507 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1508 1509 uq->uq_pi_blocked = pi; 1510 thread_lock(td); 1511 td->td_flags |= TDF_UPIBLOCKED; 1512 thread_unlock(td); 1513 umtx_propagate_priority(td); 1514 mtx_unlock_spin(&umtx_lock); 1515 umtxq_unbusy(&uq->uq_key); 1516 1517 error = umtxq_sleep(uq, wmesg, timo); 1518 umtxq_remove(uq); 1519 1520 mtx_lock_spin(&umtx_lock); 1521 uq->uq_pi_blocked = NULL; 1522 thread_lock(td); 1523 td->td_flags &= ~TDF_UPIBLOCKED; 1524 thread_unlock(td); 1525 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1526 umtx_repropagate_priority(pi); 1527 mtx_unlock_spin(&umtx_lock); 1528 umtxq_unlock(&uq->uq_key); 1529 1530 return (error); 1531 } 1532 1533 /* 1534 * Add reference count for a PI mutex. 1535 */ 1536 static void 1537 umtx_pi_ref(struct umtx_pi *pi) 1538 { 1539 struct umtxq_chain *uc; 1540 1541 uc = umtxq_getchain(&pi->pi_key); 1542 UMTXQ_LOCKED_ASSERT(uc); 1543 pi->pi_refcount++; 1544 } 1545 1546 /* 1547 * Decrease reference count for a PI mutex, if the counter 1548 * is decreased to zero, its memory space is freed. 1549 */ 1550 static void 1551 umtx_pi_unref(struct umtx_pi *pi) 1552 { 1553 struct umtxq_chain *uc; 1554 1555 uc = umtxq_getchain(&pi->pi_key); 1556 UMTXQ_LOCKED_ASSERT(uc); 1557 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1558 if (--pi->pi_refcount == 0) { 1559 mtx_lock_spin(&umtx_lock); 1560 if (pi->pi_owner != NULL) { 1561 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1562 pi, pi_link); 1563 pi->pi_owner = NULL; 1564 } 1565 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1566 ("blocked queue not empty")); 1567 mtx_unlock_spin(&umtx_lock); 1568 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1569 umtx_pi_free(pi); 1570 } 1571 } 1572 1573 /* 1574 * Find a PI mutex in hash table. 1575 */ 1576 static struct umtx_pi * 1577 umtx_pi_lookup(struct umtx_key *key) 1578 { 1579 struct umtxq_chain *uc; 1580 struct umtx_pi *pi; 1581 1582 uc = umtxq_getchain(key); 1583 UMTXQ_LOCKED_ASSERT(uc); 1584 1585 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1586 if (umtx_key_match(&pi->pi_key, key)) { 1587 return (pi); 1588 } 1589 } 1590 return (NULL); 1591 } 1592 1593 /* 1594 * Insert a PI mutex into hash table. 1595 */ 1596 static inline void 1597 umtx_pi_insert(struct umtx_pi *pi) 1598 { 1599 struct umtxq_chain *uc; 1600 1601 uc = umtxq_getchain(&pi->pi_key); 1602 UMTXQ_LOCKED_ASSERT(uc); 1603 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1604 } 1605 1606 /* 1607 * Lock a PI mutex. 1608 */ 1609 static int 1610 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1611 struct _umtx_time *timeout, int try) 1612 { 1613 struct abs_timeout timo; 1614 struct umtx_q *uq; 1615 struct umtx_pi *pi, *new_pi; 1616 uint32_t id, owner, old; 1617 int error, rv; 1618 1619 id = td->td_tid; 1620 uq = td->td_umtxq; 1621 1622 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1623 &uq->uq_key)) != 0) 1624 return (error); 1625 1626 if (timeout != NULL) 1627 abs_timeout_init2(&timo, timeout); 1628 1629 umtxq_lock(&uq->uq_key); 1630 pi = umtx_pi_lookup(&uq->uq_key); 1631 if (pi == NULL) { 1632 new_pi = umtx_pi_alloc(M_NOWAIT); 1633 if (new_pi == NULL) { 1634 umtxq_unlock(&uq->uq_key); 1635 new_pi = umtx_pi_alloc(M_WAITOK); 1636 umtxq_lock(&uq->uq_key); 1637 pi = umtx_pi_lookup(&uq->uq_key); 1638 if (pi != NULL) { 1639 umtx_pi_free(new_pi); 1640 new_pi = NULL; 1641 } 1642 } 1643 if (new_pi != NULL) { 1644 new_pi->pi_key = uq->uq_key; 1645 umtx_pi_insert(new_pi); 1646 pi = new_pi; 1647 } 1648 } 1649 umtx_pi_ref(pi); 1650 umtxq_unlock(&uq->uq_key); 1651 1652 /* 1653 * Care must be exercised when dealing with umtx structure. It 1654 * can fault on any access. 1655 */ 1656 for (;;) { 1657 /* 1658 * Try the uncontested case. This should be done in userland. 1659 */ 1660 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1661 /* The address was invalid. */ 1662 if (rv == -1) { 1663 error = EFAULT; 1664 break; 1665 } 1666 1667 /* The acquire succeeded. */ 1668 if (owner == UMUTEX_UNOWNED) { 1669 error = 0; 1670 break; 1671 } 1672 1673 /* If no one owns it but it is contested try to acquire it. */ 1674 if (owner == UMUTEX_CONTESTED) { 1675 rv = casueword32(&m->m_owner, 1676 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1677 /* The address was invalid. */ 1678 if (rv == -1) { 1679 error = EFAULT; 1680 break; 1681 } 1682 1683 if (owner == UMUTEX_CONTESTED) { 1684 umtxq_lock(&uq->uq_key); 1685 umtxq_busy(&uq->uq_key); 1686 error = umtx_pi_claim(pi, td); 1687 umtxq_unbusy(&uq->uq_key); 1688 umtxq_unlock(&uq->uq_key); 1689 break; 1690 } 1691 1692 error = umtxq_check_susp(td); 1693 if (error != 0) 1694 break; 1695 1696 /* If this failed the lock has changed, restart. */ 1697 continue; 1698 } 1699 1700 if (try != 0) { 1701 error = EBUSY; 1702 break; 1703 } 1704 1705 /* 1706 * If we caught a signal, we have retried and now 1707 * exit immediately. 1708 */ 1709 if (error != 0) 1710 break; 1711 1712 umtxq_lock(&uq->uq_key); 1713 umtxq_busy(&uq->uq_key); 1714 umtxq_unlock(&uq->uq_key); 1715 1716 /* 1717 * Set the contested bit so that a release in user space 1718 * knows to use the system call for unlock. If this fails 1719 * either some one else has acquired the lock or it has been 1720 * released. 1721 */ 1722 rv = casueword32(&m->m_owner, owner, &old, 1723 owner | UMUTEX_CONTESTED); 1724 1725 /* The address was invalid. */ 1726 if (rv == -1) { 1727 umtxq_unbusy_unlocked(&uq->uq_key); 1728 error = EFAULT; 1729 break; 1730 } 1731 1732 umtxq_lock(&uq->uq_key); 1733 /* 1734 * We set the contested bit, sleep. Otherwise the lock changed 1735 * and we need to retry or we lost a race to the thread 1736 * unlocking the umtx. 1737 */ 1738 if (old == owner) { 1739 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1740 "umtxpi", timeout == NULL ? NULL : &timo); 1741 if (error != 0) 1742 continue; 1743 } else { 1744 umtxq_unbusy(&uq->uq_key); 1745 umtxq_unlock(&uq->uq_key); 1746 } 1747 1748 error = umtxq_check_susp(td); 1749 if (error != 0) 1750 break; 1751 } 1752 1753 umtxq_lock(&uq->uq_key); 1754 umtx_pi_unref(pi); 1755 umtxq_unlock(&uq->uq_key); 1756 1757 umtx_key_release(&uq->uq_key); 1758 return (error); 1759 } 1760 1761 /* 1762 * Unlock a PI mutex. 1763 */ 1764 static int 1765 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1766 { 1767 struct umtx_key key; 1768 struct umtx_q *uq_first, *uq_first2, *uq_me; 1769 struct umtx_pi *pi, *pi2; 1770 uint32_t owner, old, id; 1771 int error; 1772 int count; 1773 int pri; 1774 1775 id = td->td_tid; 1776 /* 1777 * Make sure we own this mtx. 1778 */ 1779 error = fueword32(__DEVOLATILE(uint32_t *, &m->m_owner), &owner); 1780 if (error == -1) 1781 return (EFAULT); 1782 1783 if ((owner & ~UMUTEX_CONTESTED) != id) 1784 return (EPERM); 1785 1786 /* This should be done in userland */ 1787 if ((owner & UMUTEX_CONTESTED) == 0) { 1788 error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED); 1789 if (error == -1) 1790 return (EFAULT); 1791 if (old == owner) 1792 return (0); 1793 owner = old; 1794 } 1795 1796 /* We should only ever be in here for contested locks */ 1797 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1798 &key)) != 0) 1799 return (error); 1800 1801 umtxq_lock(&key); 1802 umtxq_busy(&key); 1803 count = umtxq_count_pi(&key, &uq_first); 1804 if (uq_first != NULL) { 1805 mtx_lock_spin(&umtx_lock); 1806 pi = uq_first->uq_pi_blocked; 1807 KASSERT(pi != NULL, ("pi == NULL?")); 1808 if (pi->pi_owner != curthread) { 1809 mtx_unlock_spin(&umtx_lock); 1810 umtxq_unbusy(&key); 1811 umtxq_unlock(&key); 1812 umtx_key_release(&key); 1813 /* userland messed the mutex */ 1814 return (EPERM); 1815 } 1816 uq_me = curthread->td_umtxq; 1817 pi->pi_owner = NULL; 1818 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1819 /* get highest priority thread which is still sleeping. */ 1820 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1821 while (uq_first != NULL && 1822 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 1823 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 1824 } 1825 pri = PRI_MAX; 1826 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1827 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1828 if (uq_first2 != NULL) { 1829 if (pri > UPRI(uq_first2->uq_thread)) 1830 pri = UPRI(uq_first2->uq_thread); 1831 } 1832 } 1833 thread_lock(curthread); 1834 sched_lend_user_prio(curthread, pri); 1835 thread_unlock(curthread); 1836 mtx_unlock_spin(&umtx_lock); 1837 if (uq_first) 1838 umtxq_signal_thread(uq_first); 1839 } 1840 umtxq_unlock(&key); 1841 1842 /* 1843 * When unlocking the umtx, it must be marked as unowned if 1844 * there is zero or one thread only waiting for it. 1845 * Otherwise, it must be marked as contested. 1846 */ 1847 error = casueword32(&m->m_owner, owner, &old, 1848 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1849 1850 umtxq_unbusy_unlocked(&key); 1851 umtx_key_release(&key); 1852 if (error == -1) 1853 return (EFAULT); 1854 if (old != owner) 1855 return (EINVAL); 1856 return (0); 1857 } 1858 1859 /* 1860 * Lock a PP mutex. 1861 */ 1862 static int 1863 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 1864 struct _umtx_time *timeout, int try) 1865 { 1866 struct abs_timeout timo; 1867 struct umtx_q *uq, *uq2; 1868 struct umtx_pi *pi; 1869 uint32_t ceiling; 1870 uint32_t owner, id; 1871 int error, pri, old_inherited_pri, su, rv; 1872 1873 id = td->td_tid; 1874 uq = td->td_umtxq; 1875 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1876 &uq->uq_key)) != 0) 1877 return (error); 1878 1879 if (timeout != NULL) 1880 abs_timeout_init2(&timo, timeout); 1881 1882 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1883 for (;;) { 1884 old_inherited_pri = uq->uq_inherited_pri; 1885 umtxq_lock(&uq->uq_key); 1886 umtxq_busy(&uq->uq_key); 1887 umtxq_unlock(&uq->uq_key); 1888 1889 rv = fueword32(&m->m_ceilings[0], &ceiling); 1890 if (rv == -1) { 1891 error = EFAULT; 1892 goto out; 1893 } 1894 ceiling = RTP_PRIO_MAX - ceiling; 1895 if (ceiling > RTP_PRIO_MAX) { 1896 error = EINVAL; 1897 goto out; 1898 } 1899 1900 mtx_lock_spin(&umtx_lock); 1901 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1902 mtx_unlock_spin(&umtx_lock); 1903 error = EINVAL; 1904 goto out; 1905 } 1906 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1907 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1908 thread_lock(td); 1909 if (uq->uq_inherited_pri < UPRI(td)) 1910 sched_lend_user_prio(td, uq->uq_inherited_pri); 1911 thread_unlock(td); 1912 } 1913 mtx_unlock_spin(&umtx_lock); 1914 1915 rv = casueword32(&m->m_owner, 1916 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 1917 /* The address was invalid. */ 1918 if (rv == -1) { 1919 error = EFAULT; 1920 break; 1921 } 1922 1923 if (owner == UMUTEX_CONTESTED) { 1924 error = 0; 1925 break; 1926 } 1927 1928 if (try != 0) { 1929 error = EBUSY; 1930 break; 1931 } 1932 1933 /* 1934 * If we caught a signal, we have retried and now 1935 * exit immediately. 1936 */ 1937 if (error != 0) 1938 break; 1939 1940 umtxq_lock(&uq->uq_key); 1941 umtxq_insert(uq); 1942 umtxq_unbusy(&uq->uq_key); 1943 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 1944 NULL : &timo); 1945 umtxq_remove(uq); 1946 umtxq_unlock(&uq->uq_key); 1947 1948 mtx_lock_spin(&umtx_lock); 1949 uq->uq_inherited_pri = old_inherited_pri; 1950 pri = PRI_MAX; 1951 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1952 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1953 if (uq2 != NULL) { 1954 if (pri > UPRI(uq2->uq_thread)) 1955 pri = UPRI(uq2->uq_thread); 1956 } 1957 } 1958 if (pri > uq->uq_inherited_pri) 1959 pri = uq->uq_inherited_pri; 1960 thread_lock(td); 1961 sched_lend_user_prio(td, pri); 1962 thread_unlock(td); 1963 mtx_unlock_spin(&umtx_lock); 1964 } 1965 1966 if (error != 0) { 1967 mtx_lock_spin(&umtx_lock); 1968 uq->uq_inherited_pri = old_inherited_pri; 1969 pri = PRI_MAX; 1970 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1971 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1972 if (uq2 != NULL) { 1973 if (pri > UPRI(uq2->uq_thread)) 1974 pri = UPRI(uq2->uq_thread); 1975 } 1976 } 1977 if (pri > uq->uq_inherited_pri) 1978 pri = uq->uq_inherited_pri; 1979 thread_lock(td); 1980 sched_lend_user_prio(td, pri); 1981 thread_unlock(td); 1982 mtx_unlock_spin(&umtx_lock); 1983 } 1984 1985 out: 1986 umtxq_unbusy_unlocked(&uq->uq_key); 1987 umtx_key_release(&uq->uq_key); 1988 return (error); 1989 } 1990 1991 /* 1992 * Unlock a PP mutex. 1993 */ 1994 static int 1995 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1996 { 1997 struct umtx_key key; 1998 struct umtx_q *uq, *uq2; 1999 struct umtx_pi *pi; 2000 uint32_t owner, id; 2001 uint32_t rceiling; 2002 int error, pri, new_inherited_pri, su; 2003 2004 id = td->td_tid; 2005 uq = td->td_umtxq; 2006 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2007 2008 /* 2009 * Make sure we own this mtx. 2010 */ 2011 error = fueword32(__DEVOLATILE(uint32_t *, &m->m_owner), &owner); 2012 if (error == -1) 2013 return (EFAULT); 2014 2015 if ((owner & ~UMUTEX_CONTESTED) != id) 2016 return (EPERM); 2017 2018 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2019 if (error != 0) 2020 return (error); 2021 2022 if (rceiling == -1) 2023 new_inherited_pri = PRI_MAX; 2024 else { 2025 rceiling = RTP_PRIO_MAX - rceiling; 2026 if (rceiling > RTP_PRIO_MAX) 2027 return (EINVAL); 2028 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2029 } 2030 2031 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2032 &key)) != 0) 2033 return (error); 2034 umtxq_lock(&key); 2035 umtxq_busy(&key); 2036 umtxq_unlock(&key); 2037 /* 2038 * For priority protected mutex, always set unlocked state 2039 * to UMUTEX_CONTESTED, so that userland always enters kernel 2040 * to lock the mutex, it is necessary because thread priority 2041 * has to be adjusted for such mutex. 2042 */ 2043 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2044 UMUTEX_CONTESTED); 2045 2046 umtxq_lock(&key); 2047 if (error == 0) 2048 umtxq_signal(&key, 1); 2049 umtxq_unbusy(&key); 2050 umtxq_unlock(&key); 2051 2052 if (error == -1) 2053 error = EFAULT; 2054 else { 2055 mtx_lock_spin(&umtx_lock); 2056 if (su != 0) 2057 uq->uq_inherited_pri = new_inherited_pri; 2058 pri = PRI_MAX; 2059 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2060 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2061 if (uq2 != NULL) { 2062 if (pri > UPRI(uq2->uq_thread)) 2063 pri = UPRI(uq2->uq_thread); 2064 } 2065 } 2066 if (pri > uq->uq_inherited_pri) 2067 pri = uq->uq_inherited_pri; 2068 thread_lock(td); 2069 sched_lend_user_prio(td, pri); 2070 thread_unlock(td); 2071 mtx_unlock_spin(&umtx_lock); 2072 } 2073 umtx_key_release(&key); 2074 return (error); 2075 } 2076 2077 static int 2078 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2079 uint32_t *old_ceiling) 2080 { 2081 struct umtx_q *uq; 2082 uint32_t save_ceiling; 2083 uint32_t owner, id; 2084 uint32_t flags; 2085 int error, rv; 2086 2087 error = fueword32(&m->m_flags, &flags); 2088 if (error == -1) 2089 return (EFAULT); 2090 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2091 return (EINVAL); 2092 if (ceiling > RTP_PRIO_MAX) 2093 return (EINVAL); 2094 id = td->td_tid; 2095 uq = td->td_umtxq; 2096 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2097 &uq->uq_key)) != 0) 2098 return (error); 2099 for (;;) { 2100 umtxq_lock(&uq->uq_key); 2101 umtxq_busy(&uq->uq_key); 2102 umtxq_unlock(&uq->uq_key); 2103 2104 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2105 if (rv == -1) { 2106 error = EFAULT; 2107 break; 2108 } 2109 2110 rv = casueword32(&m->m_owner, 2111 UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); 2112 if (rv == -1) { 2113 error = EFAULT; 2114 break; 2115 } 2116 2117 if (owner == UMUTEX_CONTESTED) { 2118 suword32(&m->m_ceilings[0], ceiling); 2119 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2120 UMUTEX_CONTESTED); 2121 error = 0; 2122 break; 2123 } 2124 2125 if ((owner & ~UMUTEX_CONTESTED) == id) { 2126 suword32(&m->m_ceilings[0], ceiling); 2127 error = 0; 2128 break; 2129 } 2130 2131 /* 2132 * If we caught a signal, we have retried and now 2133 * exit immediately. 2134 */ 2135 if (error != 0) 2136 break; 2137 2138 /* 2139 * We set the contested bit, sleep. Otherwise the lock changed 2140 * and we need to retry or we lost a race to the thread 2141 * unlocking the umtx. 2142 */ 2143 umtxq_lock(&uq->uq_key); 2144 umtxq_insert(uq); 2145 umtxq_unbusy(&uq->uq_key); 2146 error = umtxq_sleep(uq, "umtxpp", NULL); 2147 umtxq_remove(uq); 2148 umtxq_unlock(&uq->uq_key); 2149 } 2150 umtxq_lock(&uq->uq_key); 2151 if (error == 0) 2152 umtxq_signal(&uq->uq_key, INT_MAX); 2153 umtxq_unbusy(&uq->uq_key); 2154 umtxq_unlock(&uq->uq_key); 2155 umtx_key_release(&uq->uq_key); 2156 if (error == 0 && old_ceiling != NULL) 2157 suword32(old_ceiling, save_ceiling); 2158 return (error); 2159 } 2160 2161 /* 2162 * Lock a userland POSIX mutex. 2163 */ 2164 static int 2165 do_lock_umutex(struct thread *td, struct umutex *m, 2166 struct _umtx_time *timeout, int mode) 2167 { 2168 uint32_t flags; 2169 int error; 2170 2171 error = fueword32(&m->m_flags, &flags); 2172 if (error == -1) 2173 return (EFAULT); 2174 2175 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2176 case 0: 2177 error = do_lock_normal(td, m, flags, timeout, mode); 2178 break; 2179 case UMUTEX_PRIO_INHERIT: 2180 error = do_lock_pi(td, m, flags, timeout, mode); 2181 break; 2182 case UMUTEX_PRIO_PROTECT: 2183 error = do_lock_pp(td, m, flags, timeout, mode); 2184 break; 2185 default: 2186 return (EINVAL); 2187 } 2188 if (timeout == NULL) { 2189 if (error == EINTR && mode != _UMUTEX_WAIT) 2190 error = ERESTART; 2191 } else { 2192 /* Timed-locking is not restarted. */ 2193 if (error == ERESTART) 2194 error = EINTR; 2195 } 2196 return (error); 2197 } 2198 2199 /* 2200 * Unlock a userland POSIX mutex. 2201 */ 2202 static int 2203 do_unlock_umutex(struct thread *td, struct umutex *m) 2204 { 2205 uint32_t flags; 2206 int error; 2207 2208 error = fueword32(&m->m_flags, &flags); 2209 if (error == -1) 2210 return (EFAULT); 2211 2212 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2213 case 0: 2214 return (do_unlock_normal(td, m, flags)); 2215 case UMUTEX_PRIO_INHERIT: 2216 return (do_unlock_pi(td, m, flags)); 2217 case UMUTEX_PRIO_PROTECT: 2218 return (do_unlock_pp(td, m, flags)); 2219 } 2220 2221 return (EINVAL); 2222 } 2223 2224 static int 2225 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2226 struct timespec *timeout, u_long wflags) 2227 { 2228 struct abs_timeout timo; 2229 struct umtx_q *uq; 2230 uint32_t flags, clockid, hasw; 2231 int error; 2232 2233 uq = td->td_umtxq; 2234 error = fueword32(&cv->c_flags, &flags); 2235 if (error == -1) 2236 return (EFAULT); 2237 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2238 if (error != 0) 2239 return (error); 2240 2241 if ((wflags & CVWAIT_CLOCKID) != 0) { 2242 error = fueword32(&cv->c_clockid, &clockid); 2243 if (error == -1) { 2244 umtx_key_release(&uq->uq_key); 2245 return (EFAULT); 2246 } 2247 if (clockid < CLOCK_REALTIME || 2248 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2249 /* hmm, only HW clock id will work. */ 2250 umtx_key_release(&uq->uq_key); 2251 return (EINVAL); 2252 } 2253 } else { 2254 clockid = CLOCK_REALTIME; 2255 } 2256 2257 umtxq_lock(&uq->uq_key); 2258 umtxq_busy(&uq->uq_key); 2259 umtxq_insert(uq); 2260 umtxq_unlock(&uq->uq_key); 2261 2262 /* 2263 * Set c_has_waiters to 1 before releasing user mutex, also 2264 * don't modify cache line when unnecessary. 2265 */ 2266 error = fueword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 2267 &hasw); 2268 if (error == 0 && hasw == 0) 2269 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2270 2271 umtxq_unbusy_unlocked(&uq->uq_key); 2272 2273 error = do_unlock_umutex(td, m); 2274 2275 if (timeout != NULL) 2276 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2277 timeout); 2278 2279 umtxq_lock(&uq->uq_key); 2280 if (error == 0) { 2281 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2282 NULL : &timo); 2283 } 2284 2285 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2286 error = 0; 2287 else { 2288 /* 2289 * This must be timeout,interrupted by signal or 2290 * surprious wakeup, clear c_has_waiter flag when 2291 * necessary. 2292 */ 2293 umtxq_busy(&uq->uq_key); 2294 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2295 int oldlen = uq->uq_cur_queue->length; 2296 umtxq_remove(uq); 2297 if (oldlen == 1) { 2298 umtxq_unlock(&uq->uq_key); 2299 suword32( 2300 __DEVOLATILE(uint32_t *, 2301 &cv->c_has_waiters), 0); 2302 umtxq_lock(&uq->uq_key); 2303 } 2304 } 2305 umtxq_unbusy(&uq->uq_key); 2306 if (error == ERESTART) 2307 error = EINTR; 2308 } 2309 2310 umtxq_unlock(&uq->uq_key); 2311 umtx_key_release(&uq->uq_key); 2312 return (error); 2313 } 2314 2315 /* 2316 * Signal a userland condition variable. 2317 */ 2318 static int 2319 do_cv_signal(struct thread *td, struct ucond *cv) 2320 { 2321 struct umtx_key key; 2322 int error, cnt, nwake; 2323 uint32_t flags; 2324 2325 error = fueword32(&cv->c_flags, &flags); 2326 if (error == -1) 2327 return (EFAULT); 2328 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2329 return (error); 2330 umtxq_lock(&key); 2331 umtxq_busy(&key); 2332 cnt = umtxq_count(&key); 2333 nwake = umtxq_signal(&key, 1); 2334 if (cnt <= nwake) { 2335 umtxq_unlock(&key); 2336 error = suword32( 2337 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2338 if (error == -1) 2339 error = EFAULT; 2340 umtxq_lock(&key); 2341 } 2342 umtxq_unbusy(&key); 2343 umtxq_unlock(&key); 2344 umtx_key_release(&key); 2345 return (error); 2346 } 2347 2348 static int 2349 do_cv_broadcast(struct thread *td, struct ucond *cv) 2350 { 2351 struct umtx_key key; 2352 int error; 2353 uint32_t flags; 2354 2355 error = fueword32(&cv->c_flags, &flags); 2356 if (error == -1) 2357 return (EFAULT); 2358 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2359 return (error); 2360 2361 umtxq_lock(&key); 2362 umtxq_busy(&key); 2363 umtxq_signal(&key, INT_MAX); 2364 umtxq_unlock(&key); 2365 2366 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2367 if (error == -1) 2368 error = EFAULT; 2369 2370 umtxq_unbusy_unlocked(&key); 2371 2372 umtx_key_release(&key); 2373 return (error); 2374 } 2375 2376 static int 2377 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2378 { 2379 struct abs_timeout timo; 2380 struct umtx_q *uq; 2381 uint32_t flags, wrflags; 2382 int32_t state, oldstate; 2383 int32_t blocked_readers; 2384 int error, rv; 2385 2386 uq = td->td_umtxq; 2387 error = fueword32(&rwlock->rw_flags, &flags); 2388 if (error == -1) 2389 return (EFAULT); 2390 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2391 if (error != 0) 2392 return (error); 2393 2394 if (timeout != NULL) 2395 abs_timeout_init2(&timo, timeout); 2396 2397 wrflags = URWLOCK_WRITE_OWNER; 2398 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2399 wrflags |= URWLOCK_WRITE_WAITERS; 2400 2401 for (;;) { 2402 rv = fueword32(__DEVOLATILE(int32_t *, &rwlock->rw_state), 2403 &state); 2404 if (rv == -1) { 2405 umtx_key_release(&uq->uq_key); 2406 return (EFAULT); 2407 } 2408 2409 /* try to lock it */ 2410 while (!(state & wrflags)) { 2411 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2412 umtx_key_release(&uq->uq_key); 2413 return (EAGAIN); 2414 } 2415 rv = casueword32(&rwlock->rw_state, state, 2416 &oldstate, state + 1); 2417 if (rv == -1) { 2418 umtx_key_release(&uq->uq_key); 2419 return (EFAULT); 2420 } 2421 if (oldstate == state) { 2422 umtx_key_release(&uq->uq_key); 2423 return (0); 2424 } 2425 error = umtxq_check_susp(td); 2426 if (error != 0) 2427 break; 2428 state = oldstate; 2429 } 2430 2431 if (error) 2432 break; 2433 2434 /* grab monitor lock */ 2435 umtxq_lock(&uq->uq_key); 2436 umtxq_busy(&uq->uq_key); 2437 umtxq_unlock(&uq->uq_key); 2438 2439 /* 2440 * re-read the state, in case it changed between the try-lock above 2441 * and the check below 2442 */ 2443 rv = fueword32(__DEVOLATILE(int32_t *, &rwlock->rw_state), 2444 &state); 2445 if (rv == -1) 2446 error = EFAULT; 2447 2448 /* set read contention bit */ 2449 while (error == 0 && (state & wrflags) && 2450 !(state & URWLOCK_READ_WAITERS)) { 2451 rv = casueword32(&rwlock->rw_state, state, 2452 &oldstate, state | URWLOCK_READ_WAITERS); 2453 if (rv == -1) { 2454 error = EFAULT; 2455 break; 2456 } 2457 if (oldstate == state) 2458 goto sleep; 2459 state = oldstate; 2460 error = umtxq_check_susp(td); 2461 if (error != 0) 2462 break; 2463 } 2464 if (error != 0) { 2465 umtxq_unbusy_unlocked(&uq->uq_key); 2466 break; 2467 } 2468 2469 /* state is changed while setting flags, restart */ 2470 if (!(state & wrflags)) { 2471 umtxq_unbusy_unlocked(&uq->uq_key); 2472 error = umtxq_check_susp(td); 2473 if (error != 0) 2474 break; 2475 continue; 2476 } 2477 2478 sleep: 2479 /* contention bit is set, before sleeping, increase read waiter count */ 2480 rv = fueword32(&rwlock->rw_blocked_readers, 2481 &blocked_readers); 2482 if (rv == -1) { 2483 umtxq_unbusy_unlocked(&uq->uq_key); 2484 error = EFAULT; 2485 break; 2486 } 2487 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2488 2489 while (state & wrflags) { 2490 umtxq_lock(&uq->uq_key); 2491 umtxq_insert(uq); 2492 umtxq_unbusy(&uq->uq_key); 2493 2494 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2495 NULL : &timo); 2496 2497 umtxq_busy(&uq->uq_key); 2498 umtxq_remove(uq); 2499 umtxq_unlock(&uq->uq_key); 2500 if (error) 2501 break; 2502 rv = fueword32(__DEVOLATILE(int32_t *, 2503 &rwlock->rw_state), &state); 2504 if (rv == -1) { 2505 error = EFAULT; 2506 break; 2507 } 2508 } 2509 2510 /* decrease read waiter count, and may clear read contention bit */ 2511 rv = fueword32(&rwlock->rw_blocked_readers, 2512 &blocked_readers); 2513 if (rv == -1) { 2514 umtxq_unbusy_unlocked(&uq->uq_key); 2515 error = EFAULT; 2516 break; 2517 } 2518 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2519 if (blocked_readers == 1) { 2520 rv = fueword32(__DEVOLATILE(int32_t *, 2521 &rwlock->rw_state), &state); 2522 if (rv == -1) 2523 error = EFAULT; 2524 while (error == 0) { 2525 rv = casueword32(&rwlock->rw_state, state, 2526 &oldstate, state & ~URWLOCK_READ_WAITERS); 2527 if (rv == -1) { 2528 error = EFAULT; 2529 break; 2530 } 2531 if (oldstate == state) 2532 break; 2533 state = oldstate; 2534 error = umtxq_check_susp(td); 2535 } 2536 } 2537 2538 umtxq_unbusy_unlocked(&uq->uq_key); 2539 if (error != 0) 2540 break; 2541 } 2542 umtx_key_release(&uq->uq_key); 2543 if (error == ERESTART) 2544 error = EINTR; 2545 return (error); 2546 } 2547 2548 static int 2549 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2550 { 2551 struct abs_timeout timo; 2552 struct umtx_q *uq; 2553 uint32_t flags; 2554 int32_t state, oldstate; 2555 int32_t blocked_writers; 2556 int32_t blocked_readers; 2557 int error, rv; 2558 2559 uq = td->td_umtxq; 2560 error = fueword32(&rwlock->rw_flags, &flags); 2561 if (error == -1) 2562 return (EFAULT); 2563 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2564 if (error != 0) 2565 return (error); 2566 2567 if (timeout != NULL) 2568 abs_timeout_init2(&timo, timeout); 2569 2570 blocked_readers = 0; 2571 for (;;) { 2572 rv = fueword32(__DEVOLATILE(int32_t *, &rwlock->rw_state), 2573 &state); 2574 if (rv == -1) { 2575 umtx_key_release(&uq->uq_key); 2576 return (EFAULT); 2577 } 2578 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2579 rv = casueword32(&rwlock->rw_state, state, 2580 &oldstate, state | URWLOCK_WRITE_OWNER); 2581 if (rv == -1) { 2582 umtx_key_release(&uq->uq_key); 2583 return (EFAULT); 2584 } 2585 if (oldstate == state) { 2586 umtx_key_release(&uq->uq_key); 2587 return (0); 2588 } 2589 state = oldstate; 2590 error = umtxq_check_susp(td); 2591 if (error != 0) 2592 break; 2593 } 2594 2595 if (error) { 2596 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2597 blocked_readers != 0) { 2598 umtxq_lock(&uq->uq_key); 2599 umtxq_busy(&uq->uq_key); 2600 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2601 umtxq_unbusy(&uq->uq_key); 2602 umtxq_unlock(&uq->uq_key); 2603 } 2604 2605 break; 2606 } 2607 2608 /* grab monitor lock */ 2609 umtxq_lock(&uq->uq_key); 2610 umtxq_busy(&uq->uq_key); 2611 umtxq_unlock(&uq->uq_key); 2612 2613 /* 2614 * re-read the state, in case it changed between the try-lock above 2615 * and the check below 2616 */ 2617 rv = fueword32(__DEVOLATILE(int32_t *, &rwlock->rw_state), 2618 &state); 2619 if (rv == -1) 2620 error = EFAULT; 2621 2622 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2623 URWLOCK_READER_COUNT(state) != 0) && 2624 (state & URWLOCK_WRITE_WAITERS) == 0) { 2625 rv = casueword32(&rwlock->rw_state, state, 2626 &oldstate, state | URWLOCK_WRITE_WAITERS); 2627 if (rv == -1) { 2628 error = EFAULT; 2629 break; 2630 } 2631 if (oldstate == state) 2632 goto sleep; 2633 state = oldstate; 2634 error = umtxq_check_susp(td); 2635 if (error != 0) 2636 break; 2637 } 2638 if (error != 0) { 2639 umtxq_unbusy_unlocked(&uq->uq_key); 2640 break; 2641 } 2642 2643 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2644 umtxq_unbusy_unlocked(&uq->uq_key); 2645 error = umtxq_check_susp(td); 2646 if (error != 0) 2647 break; 2648 continue; 2649 } 2650 sleep: 2651 rv = fueword32(&rwlock->rw_blocked_writers, 2652 &blocked_writers); 2653 if (rv == -1) { 2654 umtxq_unbusy_unlocked(&uq->uq_key); 2655 error = EFAULT; 2656 break; 2657 } 2658 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2659 2660 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2661 umtxq_lock(&uq->uq_key); 2662 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2663 umtxq_unbusy(&uq->uq_key); 2664 2665 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2666 NULL : &timo); 2667 2668 umtxq_busy(&uq->uq_key); 2669 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2670 umtxq_unlock(&uq->uq_key); 2671 if (error) 2672 break; 2673 rv = fueword32(__DEVOLATILE(int32_t *, 2674 &rwlock->rw_state), &state); 2675 if (rv == -1) { 2676 error = EFAULT; 2677 break; 2678 } 2679 } 2680 2681 rv = fueword32(&rwlock->rw_blocked_writers, 2682 &blocked_writers); 2683 if (rv == -1) { 2684 umtxq_unbusy_unlocked(&uq->uq_key); 2685 error = EFAULT; 2686 break; 2687 } 2688 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2689 if (blocked_writers == 1) { 2690 rv = fueword32(__DEVOLATILE(int32_t *, 2691 &rwlock->rw_state), &state); 2692 if (rv == -1) { 2693 umtxq_unbusy_unlocked(&uq->uq_key); 2694 error = EFAULT; 2695 break; 2696 } 2697 for (;;) { 2698 rv = casueword32(&rwlock->rw_state, state, 2699 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2700 if (rv == -1) { 2701 error = EFAULT; 2702 break; 2703 } 2704 if (oldstate == state) 2705 break; 2706 state = oldstate; 2707 error = umtxq_check_susp(td); 2708 /* 2709 * We are leaving the URWLOCK_WRITE_WAITERS 2710 * behind, but this should not harm the 2711 * correctness. 2712 */ 2713 if (error != 0) 2714 break; 2715 } 2716 rv = fueword32(&rwlock->rw_blocked_readers, 2717 &blocked_readers); 2718 if (rv == -1) { 2719 umtxq_unbusy_unlocked(&uq->uq_key); 2720 error = EFAULT; 2721 break; 2722 } 2723 } else 2724 blocked_readers = 0; 2725 2726 umtxq_unbusy_unlocked(&uq->uq_key); 2727 } 2728 2729 umtx_key_release(&uq->uq_key); 2730 if (error == ERESTART) 2731 error = EINTR; 2732 return (error); 2733 } 2734 2735 static int 2736 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2737 { 2738 struct umtx_q *uq; 2739 uint32_t flags; 2740 int32_t state, oldstate; 2741 int error, rv, q, count; 2742 2743 uq = td->td_umtxq; 2744 error = fueword32(&rwlock->rw_flags, &flags); 2745 if (error == -1) 2746 return (EFAULT); 2747 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2748 if (error != 0) 2749 return (error); 2750 2751 error = fueword32(__DEVOLATILE(int32_t *, &rwlock->rw_state), &state); 2752 if (error == -1) { 2753 error = EFAULT; 2754 goto out; 2755 } 2756 if (state & URWLOCK_WRITE_OWNER) { 2757 for (;;) { 2758 rv = casueword32(&rwlock->rw_state, state, 2759 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2760 if (rv == -1) { 2761 error = EFAULT; 2762 goto out; 2763 } 2764 if (oldstate != state) { 2765 state = oldstate; 2766 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2767 error = EPERM; 2768 goto out; 2769 } 2770 error = umtxq_check_susp(td); 2771 if (error != 0) 2772 goto out; 2773 } else 2774 break; 2775 } 2776 } else if (URWLOCK_READER_COUNT(state) != 0) { 2777 for (;;) { 2778 rv = casueword32(&rwlock->rw_state, state, 2779 &oldstate, state - 1); 2780 if (rv == -1) { 2781 error = EFAULT; 2782 goto out; 2783 } 2784 if (oldstate != state) { 2785 state = oldstate; 2786 if (URWLOCK_READER_COUNT(oldstate) == 0) { 2787 error = EPERM; 2788 goto out; 2789 } 2790 error = umtxq_check_susp(td); 2791 if (error != 0) 2792 goto out; 2793 } else 2794 break; 2795 } 2796 } else { 2797 error = EPERM; 2798 goto out; 2799 } 2800 2801 count = 0; 2802 2803 if (!(flags & URWLOCK_PREFER_READER)) { 2804 if (state & URWLOCK_WRITE_WAITERS) { 2805 count = 1; 2806 q = UMTX_EXCLUSIVE_QUEUE; 2807 } else if (state & URWLOCK_READ_WAITERS) { 2808 count = INT_MAX; 2809 q = UMTX_SHARED_QUEUE; 2810 } 2811 } else { 2812 if (state & URWLOCK_READ_WAITERS) { 2813 count = INT_MAX; 2814 q = UMTX_SHARED_QUEUE; 2815 } else if (state & URWLOCK_WRITE_WAITERS) { 2816 count = 1; 2817 q = UMTX_EXCLUSIVE_QUEUE; 2818 } 2819 } 2820 2821 if (count) { 2822 umtxq_lock(&uq->uq_key); 2823 umtxq_busy(&uq->uq_key); 2824 umtxq_signal_queue(&uq->uq_key, count, q); 2825 umtxq_unbusy(&uq->uq_key); 2826 umtxq_unlock(&uq->uq_key); 2827 } 2828 out: 2829 umtx_key_release(&uq->uq_key); 2830 return (error); 2831 } 2832 2833 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 2834 static int 2835 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 2836 { 2837 struct abs_timeout timo; 2838 struct umtx_q *uq; 2839 uint32_t flags, count, count1; 2840 int error, rv; 2841 2842 uq = td->td_umtxq; 2843 error = fueword32(&sem->_flags, &flags); 2844 if (error == -1) 2845 return (EFAULT); 2846 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2847 if (error != 0) 2848 return (error); 2849 2850 if (timeout != NULL) 2851 abs_timeout_init2(&timo, timeout); 2852 2853 umtxq_lock(&uq->uq_key); 2854 umtxq_busy(&uq->uq_key); 2855 umtxq_insert(uq); 2856 umtxq_unlock(&uq->uq_key); 2857 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 2858 if (rv == 0) 2859 rv = fueword32(__DEVOLATILE(uint32_t *, &sem->_count), &count); 2860 if (rv == -1 || count != 0) { 2861 umtxq_lock(&uq->uq_key); 2862 umtxq_unbusy(&uq->uq_key); 2863 umtxq_remove(uq); 2864 umtxq_unlock(&uq->uq_key); 2865 umtx_key_release(&uq->uq_key); 2866 return (rv == -1 ? EFAULT : 0); 2867 } 2868 umtxq_lock(&uq->uq_key); 2869 umtxq_unbusy(&uq->uq_key); 2870 2871 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2872 2873 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2874 error = 0; 2875 else { 2876 umtxq_remove(uq); 2877 /* A relative timeout cannot be restarted. */ 2878 if (error == ERESTART && timeout != NULL && 2879 (timeout->_flags & UMTX_ABSTIME) == 0) 2880 error = EINTR; 2881 } 2882 umtxq_unlock(&uq->uq_key); 2883 umtx_key_release(&uq->uq_key); 2884 return (error); 2885 } 2886 2887 /* 2888 * Signal a userland semaphore. 2889 */ 2890 static int 2891 do_sem_wake(struct thread *td, struct _usem *sem) 2892 { 2893 struct umtx_key key; 2894 int error, cnt; 2895 uint32_t flags; 2896 2897 error = fueword32(&sem->_flags, &flags); 2898 if (error == -1) 2899 return (EFAULT); 2900 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 2901 return (error); 2902 umtxq_lock(&key); 2903 umtxq_busy(&key); 2904 cnt = umtxq_count(&key); 2905 if (cnt > 0) { 2906 umtxq_signal(&key, 1); 2907 /* 2908 * Check if count is greater than 0, this means the memory is 2909 * still being referenced by user code, so we can safely 2910 * update _has_waiters flag. 2911 */ 2912 if (cnt == 1) { 2913 umtxq_unlock(&key); 2914 error = suword32( 2915 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0); 2916 umtxq_lock(&key); 2917 if (error == -1) 2918 error = EFAULT; 2919 } 2920 } 2921 umtxq_unbusy(&key); 2922 umtxq_unlock(&key); 2923 umtx_key_release(&key); 2924 return (error); 2925 } 2926 #endif 2927 2928 static int 2929 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 2930 { 2931 struct abs_timeout timo; 2932 struct umtx_q *uq; 2933 uint32_t count, flags; 2934 int error, rv; 2935 2936 uq = td->td_umtxq; 2937 flags = fuword32(&sem->_flags); 2938 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 2939 if (error != 0) 2940 return (error); 2941 2942 if (timeout != NULL) 2943 abs_timeout_init2(&timo, timeout); 2944 2945 umtxq_lock(&uq->uq_key); 2946 umtxq_busy(&uq->uq_key); 2947 umtxq_insert(uq); 2948 umtxq_unlock(&uq->uq_key); 2949 rv = fueword32(__DEVOLATILE(uint32_t *, &sem->_count), &count); 2950 if (rv == -1) { 2951 umtxq_lock(&uq->uq_key); 2952 umtxq_unbusy(&uq->uq_key); 2953 umtxq_remove(uq); 2954 umtxq_unlock(&uq->uq_key); 2955 umtx_key_release(&uq->uq_key); 2956 return (EFAULT); 2957 } 2958 for (;;) { 2959 if (USEM_COUNT(count) != 0) { 2960 umtxq_lock(&uq->uq_key); 2961 umtxq_unbusy(&uq->uq_key); 2962 umtxq_remove(uq); 2963 umtxq_unlock(&uq->uq_key); 2964 umtx_key_release(&uq->uq_key); 2965 return (0); 2966 } 2967 if (count == USEM_HAS_WAITERS) 2968 break; 2969 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 2970 if (rv == -1) { 2971 umtxq_lock(&uq->uq_key); 2972 umtxq_unbusy(&uq->uq_key); 2973 umtxq_remove(uq); 2974 umtxq_unlock(&uq->uq_key); 2975 umtx_key_release(&uq->uq_key); 2976 return (EFAULT); 2977 } 2978 if (count == 0) 2979 break; 2980 } 2981 umtxq_lock(&uq->uq_key); 2982 umtxq_unbusy(&uq->uq_key); 2983 2984 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 2985 2986 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2987 error = 0; 2988 else { 2989 umtxq_remove(uq); 2990 /* A relative timeout cannot be restarted. */ 2991 if (error == ERESTART && timeout != NULL && 2992 (timeout->_flags & UMTX_ABSTIME) == 0) 2993 error = EINTR; 2994 } 2995 umtxq_unlock(&uq->uq_key); 2996 umtx_key_release(&uq->uq_key); 2997 return (error); 2998 } 2999 3000 /* 3001 * Signal a userland semaphore. 3002 */ 3003 static int 3004 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3005 { 3006 struct umtx_key key; 3007 int error, cnt, rv; 3008 uint32_t count, flags; 3009 3010 rv = fueword32(&sem->_flags, &flags); 3011 if (rv == -1) 3012 return (EFAULT); 3013 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3014 return (error); 3015 umtxq_lock(&key); 3016 umtxq_busy(&key); 3017 cnt = umtxq_count(&key); 3018 if (cnt > 0) { 3019 umtxq_signal(&key, 1); 3020 3021 /* 3022 * If this was the last sleeping thread, clear the waiters 3023 * flag in _count. 3024 */ 3025 if (cnt == 1) { 3026 umtxq_unlock(&key); 3027 rv = fueword32(__DEVOLATILE(uint32_t *, &sem->_count), 3028 &count); 3029 while (rv != -1 && count & USEM_HAS_WAITERS) 3030 rv = casueword32(&sem->_count, count, &count, 3031 count & ~USEM_HAS_WAITERS); 3032 if (rv == -1) 3033 error = EFAULT; 3034 umtxq_lock(&key); 3035 } 3036 } 3037 umtxq_unbusy(&key); 3038 umtxq_unlock(&key); 3039 umtx_key_release(&key); 3040 return (error); 3041 } 3042 3043 inline int 3044 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3045 { 3046 int error; 3047 3048 error = copyin(addr, tsp, sizeof(struct timespec)); 3049 if (error == 0) { 3050 if (tsp->tv_sec < 0 || 3051 tsp->tv_nsec >= 1000000000 || 3052 tsp->tv_nsec < 0) 3053 error = EINVAL; 3054 } 3055 return (error); 3056 } 3057 3058 static inline int 3059 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3060 { 3061 int error; 3062 3063 if (size <= sizeof(struct timespec)) { 3064 tp->_clockid = CLOCK_REALTIME; 3065 tp->_flags = 0; 3066 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3067 } else 3068 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3069 if (error != 0) 3070 return (error); 3071 if (tp->_timeout.tv_sec < 0 || 3072 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3073 return (EINVAL); 3074 return (0); 3075 } 3076 3077 static int 3078 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3079 { 3080 3081 return (EOPNOTSUPP); 3082 } 3083 3084 static int 3085 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3086 { 3087 struct _umtx_time timeout, *tm_p; 3088 int error; 3089 3090 if (uap->uaddr2 == NULL) 3091 tm_p = NULL; 3092 else { 3093 error = umtx_copyin_umtx_time( 3094 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3095 if (error != 0) 3096 return (error); 3097 tm_p = &timeout; 3098 } 3099 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3100 } 3101 3102 static int 3103 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3104 { 3105 struct _umtx_time timeout, *tm_p; 3106 int error; 3107 3108 if (uap->uaddr2 == NULL) 3109 tm_p = NULL; 3110 else { 3111 error = umtx_copyin_umtx_time( 3112 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3113 if (error != 0) 3114 return (error); 3115 tm_p = &timeout; 3116 } 3117 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3118 } 3119 3120 static int 3121 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3122 { 3123 struct _umtx_time *tm_p, timeout; 3124 int error; 3125 3126 if (uap->uaddr2 == NULL) 3127 tm_p = NULL; 3128 else { 3129 error = umtx_copyin_umtx_time( 3130 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3131 if (error != 0) 3132 return (error); 3133 tm_p = &timeout; 3134 } 3135 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3136 } 3137 3138 static int 3139 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3140 { 3141 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3142 } 3143 3144 #define BATCH_SIZE 128 3145 static int 3146 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3147 { 3148 int count = uap->val; 3149 void *uaddrs[BATCH_SIZE]; 3150 char **upp = (char **)uap->obj; 3151 int tocopy; 3152 int error = 0; 3153 int i, pos = 0; 3154 3155 while (count > 0) { 3156 tocopy = count; 3157 if (tocopy > BATCH_SIZE) 3158 tocopy = BATCH_SIZE; 3159 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3160 if (error != 0) 3161 break; 3162 for (i = 0; i < tocopy; ++i) 3163 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3164 count -= tocopy; 3165 pos += tocopy; 3166 } 3167 return (error); 3168 } 3169 3170 static int 3171 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3172 { 3173 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3174 } 3175 3176 static int 3177 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3178 { 3179 struct _umtx_time *tm_p, timeout; 3180 int error; 3181 3182 /* Allow a null timespec (wait forever). */ 3183 if (uap->uaddr2 == NULL) 3184 tm_p = NULL; 3185 else { 3186 error = umtx_copyin_umtx_time( 3187 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3188 if (error != 0) 3189 return (error); 3190 tm_p = &timeout; 3191 } 3192 return do_lock_umutex(td, uap->obj, tm_p, 0); 3193 } 3194 3195 static int 3196 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3197 { 3198 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3199 } 3200 3201 static int 3202 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3203 { 3204 struct _umtx_time *tm_p, timeout; 3205 int error; 3206 3207 /* Allow a null timespec (wait forever). */ 3208 if (uap->uaddr2 == NULL) 3209 tm_p = NULL; 3210 else { 3211 error = umtx_copyin_umtx_time( 3212 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3213 if (error != 0) 3214 return (error); 3215 tm_p = &timeout; 3216 } 3217 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3218 } 3219 3220 static int 3221 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3222 { 3223 return do_wake_umutex(td, uap->obj); 3224 } 3225 3226 static int 3227 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3228 { 3229 return do_unlock_umutex(td, uap->obj); 3230 } 3231 3232 static int 3233 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3234 { 3235 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3236 } 3237 3238 static int 3239 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3240 { 3241 struct timespec *ts, timeout; 3242 int error; 3243 3244 /* Allow a null timespec (wait forever). */ 3245 if (uap->uaddr2 == NULL) 3246 ts = NULL; 3247 else { 3248 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3249 if (error != 0) 3250 return (error); 3251 ts = &timeout; 3252 } 3253 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3254 } 3255 3256 static int 3257 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3258 { 3259 return do_cv_signal(td, uap->obj); 3260 } 3261 3262 static int 3263 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3264 { 3265 return do_cv_broadcast(td, uap->obj); 3266 } 3267 3268 static int 3269 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3270 { 3271 struct _umtx_time timeout; 3272 int error; 3273 3274 /* Allow a null timespec (wait forever). */ 3275 if (uap->uaddr2 == NULL) { 3276 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3277 } else { 3278 error = umtx_copyin_umtx_time(uap->uaddr2, 3279 (size_t)uap->uaddr1, &timeout); 3280 if (error != 0) 3281 return (error); 3282 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3283 } 3284 return (error); 3285 } 3286 3287 static int 3288 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3289 { 3290 struct _umtx_time timeout; 3291 int error; 3292 3293 /* Allow a null timespec (wait forever). */ 3294 if (uap->uaddr2 == NULL) { 3295 error = do_rw_wrlock(td, uap->obj, 0); 3296 } else { 3297 error = umtx_copyin_umtx_time(uap->uaddr2, 3298 (size_t)uap->uaddr1, &timeout); 3299 if (error != 0) 3300 return (error); 3301 3302 error = do_rw_wrlock(td, uap->obj, &timeout); 3303 } 3304 return (error); 3305 } 3306 3307 static int 3308 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3309 { 3310 return do_rw_unlock(td, uap->obj); 3311 } 3312 3313 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3314 static int 3315 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3316 { 3317 struct _umtx_time *tm_p, timeout; 3318 int error; 3319 3320 /* Allow a null timespec (wait forever). */ 3321 if (uap->uaddr2 == NULL) 3322 tm_p = NULL; 3323 else { 3324 error = umtx_copyin_umtx_time( 3325 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3326 if (error != 0) 3327 return (error); 3328 tm_p = &timeout; 3329 } 3330 return (do_sem_wait(td, uap->obj, tm_p)); 3331 } 3332 3333 static int 3334 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3335 { 3336 return do_sem_wake(td, uap->obj); 3337 } 3338 #endif 3339 3340 static int 3341 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3342 { 3343 return do_wake2_umutex(td, uap->obj, uap->val); 3344 } 3345 3346 static int 3347 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3348 { 3349 struct _umtx_time *tm_p, timeout; 3350 int error; 3351 3352 /* Allow a null timespec (wait forever). */ 3353 if (uap->uaddr2 == NULL) 3354 tm_p = NULL; 3355 else { 3356 error = umtx_copyin_umtx_time( 3357 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3358 if (error != 0) 3359 return (error); 3360 tm_p = &timeout; 3361 } 3362 return (do_sem2_wait(td, uap->obj, tm_p)); 3363 } 3364 3365 static int 3366 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3367 { 3368 return do_sem2_wake(td, uap->obj); 3369 } 3370 3371 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3372 3373 static _umtx_op_func op_table[] = { 3374 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3375 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3376 __umtx_op_wait, /* UMTX_OP_WAIT */ 3377 __umtx_op_wake, /* UMTX_OP_WAKE */ 3378 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3379 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3380 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3381 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3382 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3383 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3384 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3385 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3386 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3387 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3388 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3389 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3390 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3391 __umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */ 3392 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3393 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3394 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3395 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3396 #else 3397 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3398 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3399 #endif 3400 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3401 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3402 __umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */ 3403 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3404 }; 3405 3406 int 3407 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3408 { 3409 if ((unsigned)uap->op < UMTX_OP_MAX) 3410 return (*op_table[uap->op])(td, uap); 3411 return (EINVAL); 3412 } 3413 3414 #ifdef COMPAT_FREEBSD32 3415 3416 struct timespec32 { 3417 int32_t tv_sec; 3418 int32_t tv_nsec; 3419 }; 3420 3421 struct umtx_time32 { 3422 struct timespec32 timeout; 3423 uint32_t flags; 3424 uint32_t clockid; 3425 }; 3426 3427 static inline int 3428 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3429 { 3430 struct timespec32 ts32; 3431 int error; 3432 3433 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3434 if (error == 0) { 3435 if (ts32.tv_sec < 0 || 3436 ts32.tv_nsec >= 1000000000 || 3437 ts32.tv_nsec < 0) 3438 error = EINVAL; 3439 else { 3440 tsp->tv_sec = ts32.tv_sec; 3441 tsp->tv_nsec = ts32.tv_nsec; 3442 } 3443 } 3444 return (error); 3445 } 3446 3447 static inline int 3448 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3449 { 3450 struct umtx_time32 t32; 3451 int error; 3452 3453 t32.clockid = CLOCK_REALTIME; 3454 t32.flags = 0; 3455 if (size <= sizeof(struct timespec32)) 3456 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3457 else 3458 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3459 if (error != 0) 3460 return (error); 3461 if (t32.timeout.tv_sec < 0 || 3462 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3463 return (EINVAL); 3464 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3465 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3466 tp->_flags = t32.flags; 3467 tp->_clockid = t32.clockid; 3468 return (0); 3469 } 3470 3471 static int 3472 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3473 { 3474 struct _umtx_time *tm_p, timeout; 3475 int error; 3476 3477 if (uap->uaddr2 == NULL) 3478 tm_p = NULL; 3479 else { 3480 error = umtx_copyin_umtx_time32(uap->uaddr2, 3481 (size_t)uap->uaddr1, &timeout); 3482 if (error != 0) 3483 return (error); 3484 tm_p = &timeout; 3485 } 3486 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3487 } 3488 3489 static int 3490 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3491 { 3492 struct _umtx_time *tm_p, timeout; 3493 int error; 3494 3495 /* Allow a null timespec (wait forever). */ 3496 if (uap->uaddr2 == NULL) 3497 tm_p = NULL; 3498 else { 3499 error = umtx_copyin_umtx_time(uap->uaddr2, 3500 (size_t)uap->uaddr1, &timeout); 3501 if (error != 0) 3502 return (error); 3503 tm_p = &timeout; 3504 } 3505 return do_lock_umutex(td, uap->obj, tm_p, 0); 3506 } 3507 3508 static int 3509 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3510 { 3511 struct _umtx_time *tm_p, timeout; 3512 int error; 3513 3514 /* Allow a null timespec (wait forever). */ 3515 if (uap->uaddr2 == NULL) 3516 tm_p = NULL; 3517 else { 3518 error = umtx_copyin_umtx_time32(uap->uaddr2, 3519 (size_t)uap->uaddr1, &timeout); 3520 if (error != 0) 3521 return (error); 3522 tm_p = &timeout; 3523 } 3524 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3525 } 3526 3527 static int 3528 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3529 { 3530 struct timespec *ts, timeout; 3531 int error; 3532 3533 /* Allow a null timespec (wait forever). */ 3534 if (uap->uaddr2 == NULL) 3535 ts = NULL; 3536 else { 3537 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3538 if (error != 0) 3539 return (error); 3540 ts = &timeout; 3541 } 3542 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3543 } 3544 3545 static int 3546 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3547 { 3548 struct _umtx_time timeout; 3549 int error; 3550 3551 /* Allow a null timespec (wait forever). */ 3552 if (uap->uaddr2 == NULL) { 3553 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3554 } else { 3555 error = umtx_copyin_umtx_time32(uap->uaddr2, 3556 (size_t)uap->uaddr1, &timeout); 3557 if (error != 0) 3558 return (error); 3559 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3560 } 3561 return (error); 3562 } 3563 3564 static int 3565 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3566 { 3567 struct _umtx_time timeout; 3568 int error; 3569 3570 /* Allow a null timespec (wait forever). */ 3571 if (uap->uaddr2 == NULL) { 3572 error = do_rw_wrlock(td, uap->obj, 0); 3573 } else { 3574 error = umtx_copyin_umtx_time32(uap->uaddr2, 3575 (size_t)uap->uaddr1, &timeout); 3576 if (error != 0) 3577 return (error); 3578 error = do_rw_wrlock(td, uap->obj, &timeout); 3579 } 3580 return (error); 3581 } 3582 3583 static int 3584 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3585 { 3586 struct _umtx_time *tm_p, timeout; 3587 int error; 3588 3589 if (uap->uaddr2 == NULL) 3590 tm_p = NULL; 3591 else { 3592 error = umtx_copyin_umtx_time32( 3593 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3594 if (error != 0) 3595 return (error); 3596 tm_p = &timeout; 3597 } 3598 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3599 } 3600 3601 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3602 static int 3603 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3604 { 3605 struct _umtx_time *tm_p, timeout; 3606 int error; 3607 3608 /* Allow a null timespec (wait forever). */ 3609 if (uap->uaddr2 == NULL) 3610 tm_p = NULL; 3611 else { 3612 error = umtx_copyin_umtx_time32(uap->uaddr2, 3613 (size_t)uap->uaddr1, &timeout); 3614 if (error != 0) 3615 return (error); 3616 tm_p = &timeout; 3617 } 3618 return (do_sem_wait(td, uap->obj, tm_p)); 3619 } 3620 #endif 3621 3622 static int 3623 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3624 { 3625 struct _umtx_time *tm_p, timeout; 3626 int error; 3627 3628 /* Allow a null timespec (wait forever). */ 3629 if (uap->uaddr2 == NULL) 3630 tm_p = NULL; 3631 else { 3632 error = umtx_copyin_umtx_time32(uap->uaddr2, 3633 (size_t)uap->uaddr1, &timeout); 3634 if (error != 0) 3635 return (error); 3636 tm_p = &timeout; 3637 } 3638 return (do_sem2_wait(td, uap->obj, tm_p)); 3639 } 3640 3641 static int 3642 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3643 { 3644 int count = uap->val; 3645 uint32_t uaddrs[BATCH_SIZE]; 3646 uint32_t **upp = (uint32_t **)uap->obj; 3647 int tocopy; 3648 int error = 0; 3649 int i, pos = 0; 3650 3651 while (count > 0) { 3652 tocopy = count; 3653 if (tocopy > BATCH_SIZE) 3654 tocopy = BATCH_SIZE; 3655 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3656 if (error != 0) 3657 break; 3658 for (i = 0; i < tocopy; ++i) 3659 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3660 INT_MAX, 1); 3661 count -= tocopy; 3662 pos += tocopy; 3663 } 3664 return (error); 3665 } 3666 3667 static _umtx_op_func op_table_compat32[] = { 3668 __umtx_op_unimpl, /* UMTX_OP_RESERVED0 */ 3669 __umtx_op_unimpl, /* UMTX_OP_RESERVED1 */ 3670 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3671 __umtx_op_wake, /* UMTX_OP_WAKE */ 3672 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3673 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3674 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3675 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3676 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3677 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3678 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3679 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3680 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3681 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3682 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3683 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3684 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3685 __umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */ 3686 __umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */ 3687 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3688 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3689 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3690 #else 3691 __umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */ 3692 __umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */ 3693 #endif 3694 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3695 __umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */ 3696 __umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */ 3697 __umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */ 3698 }; 3699 3700 int 3701 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3702 { 3703 if ((unsigned)uap->op < UMTX_OP_MAX) 3704 return (*op_table_compat32[uap->op])(td, 3705 (struct _umtx_op_args *)uap); 3706 return (EINVAL); 3707 } 3708 #endif 3709 3710 void 3711 umtx_thread_init(struct thread *td) 3712 { 3713 td->td_umtxq = umtxq_alloc(); 3714 td->td_umtxq->uq_thread = td; 3715 } 3716 3717 void 3718 umtx_thread_fini(struct thread *td) 3719 { 3720 umtxq_free(td->td_umtxq); 3721 } 3722 3723 /* 3724 * It will be called when new thread is created, e.g fork(). 3725 */ 3726 void 3727 umtx_thread_alloc(struct thread *td) 3728 { 3729 struct umtx_q *uq; 3730 3731 uq = td->td_umtxq; 3732 uq->uq_inherited_pri = PRI_MAX; 3733 3734 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3735 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3736 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3737 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3738 } 3739 3740 /* 3741 * exec() hook. 3742 */ 3743 static void 3744 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3745 struct image_params *imgp __unused) 3746 { 3747 umtx_thread_cleanup(curthread); 3748 } 3749 3750 /* 3751 * thread_exit() hook. 3752 */ 3753 void 3754 umtx_thread_exit(struct thread *td) 3755 { 3756 umtx_thread_cleanup(td); 3757 } 3758 3759 /* 3760 * clean up umtx data. 3761 */ 3762 static void 3763 umtx_thread_cleanup(struct thread *td) 3764 { 3765 struct umtx_q *uq; 3766 struct umtx_pi *pi; 3767 3768 if ((uq = td->td_umtxq) == NULL) 3769 return; 3770 3771 mtx_lock_spin(&umtx_lock); 3772 uq->uq_inherited_pri = PRI_MAX; 3773 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3774 pi->pi_owner = NULL; 3775 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3776 } 3777 mtx_unlock_spin(&umtx_lock); 3778 thread_lock(td); 3779 sched_lend_user_prio(td, PRI_MAX); 3780 thread_unlock(td); 3781 } 3782