1 /*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 #include "opt_umtx_profiling.h" 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 #include <sys/sysent.h> 47 #include <sys/systm.h> 48 #include <sys/sysproto.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/eventhandler.h> 51 #include <sys/umtx.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_param.h> 55 #include <vm/pmap.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 59 #include <machine/cpu.h> 60 61 #ifdef COMPAT_FREEBSD32 62 #include <compat/freebsd32/freebsd32_proto.h> 63 #endif 64 65 #define _UMUTEX_TRY 1 66 #define _UMUTEX_WAIT 2 67 68 #ifdef UMTX_PROFILING 69 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71 #endif 72 73 /* Priority inheritance mutex info. */ 74 struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92 }; 93 94 /* A userland synchronous object user. */ 95 struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104 #define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130 }; 131 132 TAILQ_HEAD(umtxq_head, umtx_q); 133 134 /* Per-key wait-queue */ 135 struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140 }; 141 142 LIST_HEAD(umtxq_list, umtxq_queue); 143 144 /* Userland lock object's wait-queue chain */ 145 struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151 #define UMTX_SHARED_QUEUE 0 152 #define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165 #ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168 #endif 169 }; 170 171 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy")) 173 174 /* 175 * Don't propagate time-sharing priority, there is a security reason, 176 * a user can simply introduce PI-mutex, let thread A lock the mutex, 177 * and let another thread B block on the mutex, because B is 178 * sleeping, its priority will be boosted, this causes A's priority to 179 * be boosted via priority propagating too and will never be lowered even 180 * if it is using 100%CPU, this is unfair to other processes. 181 */ 182 183 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 184 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 185 PRI_MAX_TIMESHARE : (td)->td_user_pri) 186 187 #define GOLDEN_RATIO_PRIME 2654404609U 188 #define UMTX_CHAINS 512 189 #define UMTX_SHIFTS (__WORD_BIT - 9) 190 191 #define GET_SHARE(flags) \ 192 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 193 194 #define BUSY_SPINS 200 195 196 struct abs_timeout { 197 int clockid; 198 struct timespec cur; 199 struct timespec end; 200 }; 201 202 static uma_zone_t umtx_pi_zone; 203 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 204 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 205 static int umtx_pi_allocated; 206 207 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 208 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 209 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 210 211 #ifdef UMTX_PROFILING 212 static long max_length; 213 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 214 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 215 #endif 216 217 static void umtxq_sysinit(void *); 218 static void umtxq_hash(struct umtx_key *key); 219 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 220 static void umtxq_lock(struct umtx_key *key); 221 static void umtxq_unlock(struct umtx_key *key); 222 static void umtxq_busy(struct umtx_key *key); 223 static void umtxq_unbusy(struct umtx_key *key); 224 static void umtxq_insert_queue(struct umtx_q *uq, int q); 225 static void umtxq_remove_queue(struct umtx_q *uq, int q); 226 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 227 static int umtxq_count(struct umtx_key *key); 228 static struct umtx_pi *umtx_pi_alloc(int); 229 static void umtx_pi_free(struct umtx_pi *pi); 230 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 231 static void umtx_thread_cleanup(struct thread *td); 232 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 233 struct image_params *imgp __unused); 234 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 235 236 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 237 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 238 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 239 240 static struct mtx umtx_lock; 241 242 #ifdef UMTX_PROFILING 243 static void 244 umtx_init_profiling(void) 245 { 246 struct sysctl_oid *chain_oid; 247 char chain_name[10]; 248 int i; 249 250 for (i = 0; i < UMTX_CHAINS; ++i) { 251 snprintf(chain_name, sizeof(chain_name), "%d", i); 252 chain_oid = SYSCTL_ADD_NODE(NULL, 253 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 254 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 255 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 256 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 257 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 258 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 259 } 260 } 261 262 static int 263 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 264 { 265 char buf[512]; 266 struct sbuf sb; 267 struct umtxq_chain *uc; 268 u_int fract, i, j, tot, whole; 269 u_int sf0, sf1, sf2, sf3, sf4; 270 u_int si0, si1, si2, si3, si4; 271 u_int sw0, sw1, sw2, sw3, sw4; 272 273 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 274 for (i = 0; i < 2; i++) { 275 tot = 0; 276 for (j = 0; j < UMTX_CHAINS; ++j) { 277 uc = &umtxq_chains[i][j]; 278 mtx_lock(&uc->uc_lock); 279 tot += uc->max_length; 280 mtx_unlock(&uc->uc_lock); 281 } 282 if (tot == 0) 283 sbuf_printf(&sb, "%u) Empty ", i); 284 else { 285 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 286 si0 = si1 = si2 = si3 = si4 = 0; 287 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 288 for (j = 0; j < UMTX_CHAINS; j++) { 289 uc = &umtxq_chains[i][j]; 290 mtx_lock(&uc->uc_lock); 291 whole = uc->max_length * 100; 292 mtx_unlock(&uc->uc_lock); 293 fract = (whole % tot) * 100; 294 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 295 sf0 = fract; 296 si0 = j; 297 sw0 = whole; 298 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 299 sf1)) { 300 sf1 = fract; 301 si1 = j; 302 sw1 = whole; 303 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 304 sf2)) { 305 sf2 = fract; 306 si2 = j; 307 sw2 = whole; 308 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 309 sf3)) { 310 sf3 = fract; 311 si3 = j; 312 sw3 = whole; 313 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 314 sf4)) { 315 sf4 = fract; 316 si4 = j; 317 sw4 = whole; 318 } 319 } 320 sbuf_printf(&sb, "queue %u:\n", i); 321 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 322 sf0 / tot, si0); 323 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 324 sf1 / tot, si1); 325 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 326 sf2 / tot, si2); 327 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 328 sf3 / tot, si3); 329 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 330 sf4 / tot, si4); 331 } 332 } 333 sbuf_trim(&sb); 334 sbuf_finish(&sb); 335 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 336 sbuf_delete(&sb); 337 return (0); 338 } 339 340 static int 341 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 342 { 343 struct umtxq_chain *uc; 344 u_int i, j; 345 int clear, error; 346 347 clear = 0; 348 error = sysctl_handle_int(oidp, &clear, 0, req); 349 if (error != 0 || req->newptr == NULL) 350 return (error); 351 352 if (clear != 0) { 353 for (i = 0; i < 2; ++i) { 354 for (j = 0; j < UMTX_CHAINS; ++j) { 355 uc = &umtxq_chains[i][j]; 356 mtx_lock(&uc->uc_lock); 357 uc->length = 0; 358 uc->max_length = 0; 359 mtx_unlock(&uc->uc_lock); 360 } 361 } 362 } 363 return (0); 364 } 365 366 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 367 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 368 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 369 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 370 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 371 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 372 #endif 373 374 static void 375 umtxq_sysinit(void *arg __unused) 376 { 377 int i, j; 378 379 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 380 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 381 for (i = 0; i < 2; ++i) { 382 for (j = 0; j < UMTX_CHAINS; ++j) { 383 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 384 MTX_DEF | MTX_DUPOK); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 386 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 387 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 388 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 389 umtxq_chains[i][j].uc_busy = 0; 390 umtxq_chains[i][j].uc_waiters = 0; 391 #ifdef UMTX_PROFILING 392 umtxq_chains[i][j].length = 0; 393 umtxq_chains[i][j].max_length = 0; 394 #endif 395 } 396 } 397 #ifdef UMTX_PROFILING 398 umtx_init_profiling(); 399 #endif 400 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 401 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 402 EVENTHANDLER_PRI_ANY); 403 } 404 405 struct umtx_q * 406 umtxq_alloc(void) 407 { 408 struct umtx_q *uq; 409 410 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 411 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 412 TAILQ_INIT(&uq->uq_spare_queue->head); 413 TAILQ_INIT(&uq->uq_pi_contested); 414 uq->uq_inherited_pri = PRI_MAX; 415 return (uq); 416 } 417 418 void 419 umtxq_free(struct umtx_q *uq) 420 { 421 MPASS(uq->uq_spare_queue != NULL); 422 free(uq->uq_spare_queue, M_UMTX); 423 free(uq, M_UMTX); 424 } 425 426 static inline void 427 umtxq_hash(struct umtx_key *key) 428 { 429 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 430 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 431 } 432 433 static inline struct umtxq_chain * 434 umtxq_getchain(struct umtx_key *key) 435 { 436 if (key->type <= TYPE_SEM) 437 return (&umtxq_chains[1][key->hash]); 438 return (&umtxq_chains[0][key->hash]); 439 } 440 441 /* 442 * Lock a chain. 443 */ 444 static inline void 445 umtxq_lock(struct umtx_key *key) 446 { 447 struct umtxq_chain *uc; 448 449 uc = umtxq_getchain(key); 450 mtx_lock(&uc->uc_lock); 451 } 452 453 /* 454 * Unlock a chain. 455 */ 456 static inline void 457 umtxq_unlock(struct umtx_key *key) 458 { 459 struct umtxq_chain *uc; 460 461 uc = umtxq_getchain(key); 462 mtx_unlock(&uc->uc_lock); 463 } 464 465 /* 466 * Set chain to busy state when following operation 467 * may be blocked (kernel mutex can not be used). 468 */ 469 static inline void 470 umtxq_busy(struct umtx_key *key) 471 { 472 struct umtxq_chain *uc; 473 474 uc = umtxq_getchain(key); 475 mtx_assert(&uc->uc_lock, MA_OWNED); 476 if (uc->uc_busy) { 477 #ifdef SMP 478 if (smp_cpus > 1) { 479 int count = BUSY_SPINS; 480 if (count > 0) { 481 umtxq_unlock(key); 482 while (uc->uc_busy && --count > 0) 483 cpu_spinwait(); 484 umtxq_lock(key); 485 } 486 } 487 #endif 488 while (uc->uc_busy) { 489 uc->uc_waiters++; 490 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 491 uc->uc_waiters--; 492 } 493 } 494 uc->uc_busy = 1; 495 } 496 497 /* 498 * Unbusy a chain. 499 */ 500 static inline void 501 umtxq_unbusy(struct umtx_key *key) 502 { 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_assert(&uc->uc_lock, MA_OWNED); 507 KASSERT(uc->uc_busy != 0, ("not busy")); 508 uc->uc_busy = 0; 509 if (uc->uc_waiters) 510 wakeup_one(uc); 511 } 512 513 static struct umtxq_queue * 514 umtxq_queue_lookup(struct umtx_key *key, int q) 515 { 516 struct umtxq_queue *uh; 517 struct umtxq_chain *uc; 518 519 uc = umtxq_getchain(key); 520 UMTXQ_LOCKED_ASSERT(uc); 521 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 522 if (umtx_key_match(&uh->key, key)) 523 return (uh); 524 } 525 526 return (NULL); 527 } 528 529 static inline void 530 umtxq_insert_queue(struct umtx_q *uq, int q) 531 { 532 struct umtxq_queue *uh; 533 struct umtxq_chain *uc; 534 535 uc = umtxq_getchain(&uq->uq_key); 536 UMTXQ_LOCKED_ASSERT(uc); 537 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 538 uh = umtxq_queue_lookup(&uq->uq_key, q); 539 if (uh != NULL) { 540 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 541 } else { 542 uh = uq->uq_spare_queue; 543 uh->key = uq->uq_key; 544 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 545 #ifdef UMTX_PROFILING 546 uc->length++; 547 if (uc->length > uc->max_length) { 548 uc->max_length = uc->length; 549 if (uc->max_length > max_length) 550 max_length = uc->max_length; 551 } 552 #endif 553 } 554 uq->uq_spare_queue = NULL; 555 556 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 557 uh->length++; 558 uq->uq_flags |= UQF_UMTXQ; 559 uq->uq_cur_queue = uh; 560 return; 561 } 562 563 static inline void 564 umtxq_remove_queue(struct umtx_q *uq, int q) 565 { 566 struct umtxq_chain *uc; 567 struct umtxq_queue *uh; 568 569 uc = umtxq_getchain(&uq->uq_key); 570 UMTXQ_LOCKED_ASSERT(uc); 571 if (uq->uq_flags & UQF_UMTXQ) { 572 uh = uq->uq_cur_queue; 573 TAILQ_REMOVE(&uh->head, uq, uq_link); 574 uh->length--; 575 uq->uq_flags &= ~UQF_UMTXQ; 576 if (TAILQ_EMPTY(&uh->head)) { 577 KASSERT(uh->length == 0, 578 ("inconsistent umtxq_queue length")); 579 #ifdef UMTX_PROFILING 580 uc->length--; 581 #endif 582 LIST_REMOVE(uh, link); 583 } else { 584 uh = LIST_FIRST(&uc->uc_spare_queue); 585 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 586 LIST_REMOVE(uh, link); 587 } 588 uq->uq_spare_queue = uh; 589 uq->uq_cur_queue = NULL; 590 } 591 } 592 593 /* 594 * Check if there are multiple waiters 595 */ 596 static int 597 umtxq_count(struct umtx_key *key) 598 { 599 struct umtxq_chain *uc; 600 struct umtxq_queue *uh; 601 602 uc = umtxq_getchain(key); 603 UMTXQ_LOCKED_ASSERT(uc); 604 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 605 if (uh != NULL) 606 return (uh->length); 607 return (0); 608 } 609 610 /* 611 * Check if there are multiple PI waiters and returns first 612 * waiter. 613 */ 614 static int 615 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 616 { 617 struct umtxq_chain *uc; 618 struct umtxq_queue *uh; 619 620 *first = NULL; 621 uc = umtxq_getchain(key); 622 UMTXQ_LOCKED_ASSERT(uc); 623 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 624 if (uh != NULL) { 625 *first = TAILQ_FIRST(&uh->head); 626 return (uh->length); 627 } 628 return (0); 629 } 630 631 static int 632 umtxq_check_susp(struct thread *td) 633 { 634 struct proc *p; 635 int error; 636 637 /* 638 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 639 * eventually break the lockstep loop. 640 */ 641 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 642 return (0); 643 error = 0; 644 p = td->td_proc; 645 PROC_LOCK(p); 646 if (P_SHOULDSTOP(p) || 647 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 648 if (p->p_flag & P_SINGLE_EXIT) 649 error = EINTR; 650 else 651 error = ERESTART; 652 } 653 PROC_UNLOCK(p); 654 return (error); 655 } 656 657 /* 658 * Wake up threads waiting on an userland object. 659 */ 660 661 static int 662 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 663 { 664 struct umtxq_chain *uc; 665 struct umtxq_queue *uh; 666 struct umtx_q *uq; 667 int ret; 668 669 ret = 0; 670 uc = umtxq_getchain(key); 671 UMTXQ_LOCKED_ASSERT(uc); 672 uh = umtxq_queue_lookup(key, q); 673 if (uh != NULL) { 674 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 675 umtxq_remove_queue(uq, q); 676 wakeup(uq); 677 if (++ret >= n_wake) 678 return (ret); 679 } 680 } 681 return (ret); 682 } 683 684 685 /* 686 * Wake up specified thread. 687 */ 688 static inline void 689 umtxq_signal_thread(struct umtx_q *uq) 690 { 691 struct umtxq_chain *uc; 692 693 uc = umtxq_getchain(&uq->uq_key); 694 UMTXQ_LOCKED_ASSERT(uc); 695 umtxq_remove(uq); 696 wakeup(uq); 697 } 698 699 static inline int 700 tstohz(const struct timespec *tsp) 701 { 702 struct timeval tv; 703 704 TIMESPEC_TO_TIMEVAL(&tv, tsp); 705 return tvtohz(&tv); 706 } 707 708 static void 709 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 710 const struct timespec *timeout) 711 { 712 713 timo->clockid = clockid; 714 if (!absolute) { 715 kern_clock_gettime(curthread, clockid, &timo->end); 716 timo->cur = timo->end; 717 timespecadd(&timo->end, timeout); 718 } else { 719 timo->end = *timeout; 720 kern_clock_gettime(curthread, clockid, &timo->cur); 721 } 722 } 723 724 static void 725 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 726 { 727 728 abs_timeout_init(timo, umtxtime->_clockid, 729 (umtxtime->_flags & UMTX_ABSTIME) != 0, 730 &umtxtime->_timeout); 731 } 732 733 static inline void 734 abs_timeout_update(struct abs_timeout *timo) 735 { 736 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 737 } 738 739 static int 740 abs_timeout_gethz(struct abs_timeout *timo) 741 { 742 struct timespec tts; 743 744 if (timespeccmp(&timo->end, &timo->cur, <=)) 745 return (-1); 746 tts = timo->end; 747 timespecsub(&tts, &timo->cur); 748 return (tstohz(&tts)); 749 } 750 751 /* 752 * Put thread into sleep state, before sleeping, check if 753 * thread was removed from umtx queue. 754 */ 755 static inline int 756 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 757 { 758 struct umtxq_chain *uc; 759 int error, timo; 760 761 uc = umtxq_getchain(&uq->uq_key); 762 UMTXQ_LOCKED_ASSERT(uc); 763 for (;;) { 764 if (!(uq->uq_flags & UQF_UMTXQ)) 765 return (0); 766 if (abstime != NULL) { 767 timo = abs_timeout_gethz(abstime); 768 if (timo < 0) 769 return (ETIMEDOUT); 770 } else 771 timo = 0; 772 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 773 if (error != EWOULDBLOCK) { 774 umtxq_lock(&uq->uq_key); 775 break; 776 } 777 if (abstime != NULL) 778 abs_timeout_update(abstime); 779 umtxq_lock(&uq->uq_key); 780 } 781 return (error); 782 } 783 784 /* 785 * Convert userspace address into unique logical address. 786 */ 787 int 788 umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 789 { 790 struct thread *td = curthread; 791 vm_map_t map; 792 vm_map_entry_t entry; 793 vm_pindex_t pindex; 794 vm_prot_t prot; 795 boolean_t wired; 796 797 key->type = type; 798 if (share == THREAD_SHARE) { 799 key->shared = 0; 800 key->info.private.vs = td->td_proc->p_vmspace; 801 key->info.private.addr = (uintptr_t)addr; 802 } else { 803 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 804 map = &td->td_proc->p_vmspace->vm_map; 805 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 806 &entry, &key->info.shared.object, &pindex, &prot, 807 &wired) != KERN_SUCCESS) { 808 return EFAULT; 809 } 810 811 if ((share == PROCESS_SHARE) || 812 (share == AUTO_SHARE && 813 VM_INHERIT_SHARE == entry->inheritance)) { 814 key->shared = 1; 815 key->info.shared.offset = entry->offset + entry->start - 816 (vm_offset_t)addr; 817 vm_object_reference(key->info.shared.object); 818 } else { 819 key->shared = 0; 820 key->info.private.vs = td->td_proc->p_vmspace; 821 key->info.private.addr = (uintptr_t)addr; 822 } 823 vm_map_lookup_done(map, entry); 824 } 825 826 umtxq_hash(key); 827 return (0); 828 } 829 830 /* 831 * Release key. 832 */ 833 void 834 umtx_key_release(struct umtx_key *key) 835 { 836 if (key->shared) 837 vm_object_deallocate(key->info.shared.object); 838 } 839 840 /* 841 * Lock a umtx object. 842 */ 843 static int 844 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 845 const struct timespec *timeout) 846 { 847 struct abs_timeout timo; 848 struct umtx_q *uq; 849 u_long owner; 850 u_long old; 851 int error = 0; 852 853 uq = td->td_umtxq; 854 if (timeout != NULL) 855 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 856 857 /* 858 * Care must be exercised when dealing with umtx structure. It 859 * can fault on any access. 860 */ 861 for (;;) { 862 /* 863 * Try the uncontested case. This should be done in userland. 864 */ 865 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 866 867 /* The acquire succeeded. */ 868 if (owner == UMTX_UNOWNED) 869 return (0); 870 871 /* The address was invalid. */ 872 if (owner == -1) 873 return (EFAULT); 874 875 /* If no one owns it but it is contested try to acquire it. */ 876 if (owner == UMTX_CONTESTED) { 877 owner = casuword(&umtx->u_owner, 878 UMTX_CONTESTED, id | UMTX_CONTESTED); 879 880 if (owner == UMTX_CONTESTED) 881 return (0); 882 883 /* The address was invalid. */ 884 if (owner == -1) 885 return (EFAULT); 886 887 error = umtxq_check_susp(td); 888 if (error != 0) 889 break; 890 891 /* If this failed the lock has changed, restart. */ 892 continue; 893 } 894 895 /* 896 * If we caught a signal, we have retried and now 897 * exit immediately. 898 */ 899 if (error != 0) 900 break; 901 902 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 903 AUTO_SHARE, &uq->uq_key)) != 0) 904 return (error); 905 906 umtxq_lock(&uq->uq_key); 907 umtxq_busy(&uq->uq_key); 908 umtxq_insert(uq); 909 umtxq_unbusy(&uq->uq_key); 910 umtxq_unlock(&uq->uq_key); 911 912 /* 913 * Set the contested bit so that a release in user space 914 * knows to use the system call for unlock. If this fails 915 * either some one else has acquired the lock or it has been 916 * released. 917 */ 918 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 919 920 /* The address was invalid. */ 921 if (old == -1) { 922 umtxq_lock(&uq->uq_key); 923 umtxq_remove(uq); 924 umtxq_unlock(&uq->uq_key); 925 umtx_key_release(&uq->uq_key); 926 return (EFAULT); 927 } 928 929 /* 930 * We set the contested bit, sleep. Otherwise the lock changed 931 * and we need to retry or we lost a race to the thread 932 * unlocking the umtx. 933 */ 934 umtxq_lock(&uq->uq_key); 935 if (old == owner) 936 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 937 &timo); 938 umtxq_remove(uq); 939 umtxq_unlock(&uq->uq_key); 940 umtx_key_release(&uq->uq_key); 941 942 if (error == 0) 943 error = umtxq_check_susp(td); 944 } 945 946 if (timeout == NULL) { 947 /* Mutex locking is restarted if it is interrupted. */ 948 if (error == EINTR) 949 error = ERESTART; 950 } else { 951 /* Timed-locking is not restarted. */ 952 if (error == ERESTART) 953 error = EINTR; 954 } 955 return (error); 956 } 957 958 /* 959 * Unlock a umtx object. 960 */ 961 static int 962 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 963 { 964 struct umtx_key key; 965 u_long owner; 966 u_long old; 967 int error; 968 int count; 969 970 /* 971 * Make sure we own this mtx. 972 */ 973 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 974 if (owner == -1) 975 return (EFAULT); 976 977 if ((owner & ~UMTX_CONTESTED) != id) 978 return (EPERM); 979 980 /* This should be done in userland */ 981 if ((owner & UMTX_CONTESTED) == 0) { 982 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 983 if (old == -1) 984 return (EFAULT); 985 if (old == owner) 986 return (0); 987 owner = old; 988 } 989 990 /* We should only ever be in here for contested locks */ 991 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 992 &key)) != 0) 993 return (error); 994 995 umtxq_lock(&key); 996 umtxq_busy(&key); 997 count = umtxq_count(&key); 998 umtxq_unlock(&key); 999 1000 /* 1001 * When unlocking the umtx, it must be marked as unowned if 1002 * there is zero or one thread only waiting for it. 1003 * Otherwise, it must be marked as contested. 1004 */ 1005 old = casuword(&umtx->u_owner, owner, 1006 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1007 umtxq_lock(&key); 1008 umtxq_signal(&key,1); 1009 umtxq_unbusy(&key); 1010 umtxq_unlock(&key); 1011 umtx_key_release(&key); 1012 if (old == -1) 1013 return (EFAULT); 1014 if (old != owner) 1015 return (EINVAL); 1016 return (0); 1017 } 1018 1019 #ifdef COMPAT_FREEBSD32 1020 1021 /* 1022 * Lock a umtx object. 1023 */ 1024 static int 1025 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1026 const struct timespec *timeout) 1027 { 1028 struct abs_timeout timo; 1029 struct umtx_q *uq; 1030 uint32_t owner; 1031 uint32_t old; 1032 int error = 0; 1033 1034 uq = td->td_umtxq; 1035 1036 if (timeout != NULL) 1037 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1038 1039 /* 1040 * Care must be exercised when dealing with umtx structure. It 1041 * can fault on any access. 1042 */ 1043 for (;;) { 1044 /* 1045 * Try the uncontested case. This should be done in userland. 1046 */ 1047 owner = casuword32(m, UMUTEX_UNOWNED, id); 1048 1049 /* The acquire succeeded. */ 1050 if (owner == UMUTEX_UNOWNED) 1051 return (0); 1052 1053 /* The address was invalid. */ 1054 if (owner == -1) 1055 return (EFAULT); 1056 1057 /* If no one owns it but it is contested try to acquire it. */ 1058 if (owner == UMUTEX_CONTESTED) { 1059 owner = casuword32(m, 1060 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1061 if (owner == UMUTEX_CONTESTED) 1062 return (0); 1063 1064 /* The address was invalid. */ 1065 if (owner == -1) 1066 return (EFAULT); 1067 1068 error = umtxq_check_susp(td); 1069 if (error != 0) 1070 break; 1071 1072 /* If this failed the lock has changed, restart. */ 1073 continue; 1074 } 1075 1076 /* 1077 * If we caught a signal, we have retried and now 1078 * exit immediately. 1079 */ 1080 if (error != 0) 1081 return (error); 1082 1083 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1084 AUTO_SHARE, &uq->uq_key)) != 0) 1085 return (error); 1086 1087 umtxq_lock(&uq->uq_key); 1088 umtxq_busy(&uq->uq_key); 1089 umtxq_insert(uq); 1090 umtxq_unbusy(&uq->uq_key); 1091 umtxq_unlock(&uq->uq_key); 1092 1093 /* 1094 * Set the contested bit so that a release in user space 1095 * knows to use the system call for unlock. If this fails 1096 * either some one else has acquired the lock or it has been 1097 * released. 1098 */ 1099 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1100 1101 /* The address was invalid. */ 1102 if (old == -1) { 1103 umtxq_lock(&uq->uq_key); 1104 umtxq_remove(uq); 1105 umtxq_unlock(&uq->uq_key); 1106 umtx_key_release(&uq->uq_key); 1107 return (EFAULT); 1108 } 1109 1110 /* 1111 * We set the contested bit, sleep. Otherwise the lock changed 1112 * and we need to retry or we lost a race to the thread 1113 * unlocking the umtx. 1114 */ 1115 umtxq_lock(&uq->uq_key); 1116 if (old == owner) 1117 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1118 NULL : &timo); 1119 umtxq_remove(uq); 1120 umtxq_unlock(&uq->uq_key); 1121 umtx_key_release(&uq->uq_key); 1122 1123 if (error == 0) 1124 error = umtxq_check_susp(td); 1125 } 1126 1127 if (timeout == NULL) { 1128 /* Mutex locking is restarted if it is interrupted. */ 1129 if (error == EINTR) 1130 error = ERESTART; 1131 } else { 1132 /* Timed-locking is not restarted. */ 1133 if (error == ERESTART) 1134 error = EINTR; 1135 } 1136 return (error); 1137 } 1138 1139 /* 1140 * Unlock a umtx object. 1141 */ 1142 static int 1143 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1144 { 1145 struct umtx_key key; 1146 uint32_t owner; 1147 uint32_t old; 1148 int error; 1149 int count; 1150 1151 /* 1152 * Make sure we own this mtx. 1153 */ 1154 owner = fuword32(m); 1155 if (owner == -1) 1156 return (EFAULT); 1157 1158 if ((owner & ~UMUTEX_CONTESTED) != id) 1159 return (EPERM); 1160 1161 /* This should be done in userland */ 1162 if ((owner & UMUTEX_CONTESTED) == 0) { 1163 old = casuword32(m, owner, UMUTEX_UNOWNED); 1164 if (old == -1) 1165 return (EFAULT); 1166 if (old == owner) 1167 return (0); 1168 owner = old; 1169 } 1170 1171 /* We should only ever be in here for contested locks */ 1172 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1173 &key)) != 0) 1174 return (error); 1175 1176 umtxq_lock(&key); 1177 umtxq_busy(&key); 1178 count = umtxq_count(&key); 1179 umtxq_unlock(&key); 1180 1181 /* 1182 * When unlocking the umtx, it must be marked as unowned if 1183 * there is zero or one thread only waiting for it. 1184 * Otherwise, it must be marked as contested. 1185 */ 1186 old = casuword32(m, owner, 1187 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1188 umtxq_lock(&key); 1189 umtxq_signal(&key,1); 1190 umtxq_unbusy(&key); 1191 umtxq_unlock(&key); 1192 umtx_key_release(&key); 1193 if (old == -1) 1194 return (EFAULT); 1195 if (old != owner) 1196 return (EINVAL); 1197 return (0); 1198 } 1199 #endif 1200 1201 /* 1202 * Fetch and compare value, sleep on the address if value is not changed. 1203 */ 1204 static int 1205 do_wait(struct thread *td, void *addr, u_long id, 1206 struct _umtx_time *timeout, int compat32, int is_private) 1207 { 1208 struct abs_timeout timo; 1209 struct umtx_q *uq; 1210 u_long tmp; 1211 int error = 0; 1212 1213 uq = td->td_umtxq; 1214 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1215 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1216 return (error); 1217 1218 if (timeout != NULL) 1219 abs_timeout_init2(&timo, timeout); 1220 1221 umtxq_lock(&uq->uq_key); 1222 umtxq_insert(uq); 1223 umtxq_unlock(&uq->uq_key); 1224 if (compat32 == 0) 1225 tmp = fuword(addr); 1226 else 1227 tmp = (unsigned int)fuword32(addr); 1228 umtxq_lock(&uq->uq_key); 1229 if (tmp == id) 1230 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1231 NULL : &timo); 1232 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1233 error = 0; 1234 else 1235 umtxq_remove(uq); 1236 umtxq_unlock(&uq->uq_key); 1237 umtx_key_release(&uq->uq_key); 1238 if (error == ERESTART) 1239 error = EINTR; 1240 return (error); 1241 } 1242 1243 /* 1244 * Wake up threads sleeping on the specified address. 1245 */ 1246 int 1247 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1248 { 1249 struct umtx_key key; 1250 int ret; 1251 1252 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1253 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1254 return (ret); 1255 umtxq_lock(&key); 1256 ret = umtxq_signal(&key, n_wake); 1257 umtxq_unlock(&key); 1258 umtx_key_release(&key); 1259 return (0); 1260 } 1261 1262 /* 1263 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1264 */ 1265 static int 1266 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1267 struct _umtx_time *timeout, int mode) 1268 { 1269 struct abs_timeout timo; 1270 struct umtx_q *uq; 1271 uint32_t owner, old, id; 1272 int error = 0; 1273 1274 id = td->td_tid; 1275 uq = td->td_umtxq; 1276 1277 if (timeout != NULL) 1278 abs_timeout_init2(&timo, timeout); 1279 1280 /* 1281 * Care must be exercised when dealing with umtx structure. It 1282 * can fault on any access. 1283 */ 1284 for (;;) { 1285 owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); 1286 if (mode == _UMUTEX_WAIT) { 1287 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 1288 return (0); 1289 } else { 1290 /* 1291 * Try the uncontested case. This should be done in userland. 1292 */ 1293 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1294 1295 /* The acquire succeeded. */ 1296 if (owner == UMUTEX_UNOWNED) 1297 return (0); 1298 1299 /* The address was invalid. */ 1300 if (owner == -1) 1301 return (EFAULT); 1302 1303 /* If no one owns it but it is contested try to acquire it. */ 1304 if (owner == UMUTEX_CONTESTED) { 1305 owner = casuword32(&m->m_owner, 1306 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1307 1308 if (owner == UMUTEX_CONTESTED) 1309 return (0); 1310 1311 /* The address was invalid. */ 1312 if (owner == -1) 1313 return (EFAULT); 1314 1315 error = umtxq_check_susp(td); 1316 if (error != 0) 1317 return (error); 1318 1319 /* If this failed the lock has changed, restart. */ 1320 continue; 1321 } 1322 } 1323 1324 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1325 (owner & ~UMUTEX_CONTESTED) == id) 1326 return (EDEADLK); 1327 1328 if (mode == _UMUTEX_TRY) 1329 return (EBUSY); 1330 1331 /* 1332 * If we caught a signal, we have retried and now 1333 * exit immediately. 1334 */ 1335 if (error != 0) 1336 return (error); 1337 1338 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1339 GET_SHARE(flags), &uq->uq_key)) != 0) 1340 return (error); 1341 1342 umtxq_lock(&uq->uq_key); 1343 umtxq_busy(&uq->uq_key); 1344 umtxq_insert(uq); 1345 umtxq_unlock(&uq->uq_key); 1346 1347 /* 1348 * Set the contested bit so that a release in user space 1349 * knows to use the system call for unlock. If this fails 1350 * either some one else has acquired the lock or it has been 1351 * released. 1352 */ 1353 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1354 1355 /* The address was invalid. */ 1356 if (old == -1) { 1357 umtxq_lock(&uq->uq_key); 1358 umtxq_remove(uq); 1359 umtxq_unbusy(&uq->uq_key); 1360 umtxq_unlock(&uq->uq_key); 1361 umtx_key_release(&uq->uq_key); 1362 return (EFAULT); 1363 } 1364 1365 /* 1366 * We set the contested bit, sleep. Otherwise the lock changed 1367 * and we need to retry or we lost a race to the thread 1368 * unlocking the umtx. 1369 */ 1370 umtxq_lock(&uq->uq_key); 1371 umtxq_unbusy(&uq->uq_key); 1372 if (old == owner) 1373 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1374 NULL : &timo); 1375 umtxq_remove(uq); 1376 umtxq_unlock(&uq->uq_key); 1377 umtx_key_release(&uq->uq_key); 1378 1379 if (error == 0) 1380 error = umtxq_check_susp(td); 1381 } 1382 1383 return (0); 1384 } 1385 1386 /* 1387 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1388 */ 1389 static int 1390 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1391 { 1392 struct umtx_key key; 1393 uint32_t owner, old, id; 1394 int error; 1395 int count; 1396 1397 id = td->td_tid; 1398 /* 1399 * Make sure we own this mtx. 1400 */ 1401 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1402 if (owner == -1) 1403 return (EFAULT); 1404 1405 if ((owner & ~UMUTEX_CONTESTED) != id) 1406 return (EPERM); 1407 1408 if ((owner & UMUTEX_CONTESTED) == 0) { 1409 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1410 if (old == -1) 1411 return (EFAULT); 1412 if (old == owner) 1413 return (0); 1414 owner = old; 1415 } 1416 1417 /* We should only ever be in here for contested locks */ 1418 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1419 &key)) != 0) 1420 return (error); 1421 1422 umtxq_lock(&key); 1423 umtxq_busy(&key); 1424 count = umtxq_count(&key); 1425 umtxq_unlock(&key); 1426 1427 /* 1428 * When unlocking the umtx, it must be marked as unowned if 1429 * there is zero or one thread only waiting for it. 1430 * Otherwise, it must be marked as contested. 1431 */ 1432 old = casuword32(&m->m_owner, owner, 1433 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1434 umtxq_lock(&key); 1435 umtxq_signal(&key,1); 1436 umtxq_unbusy(&key); 1437 umtxq_unlock(&key); 1438 umtx_key_release(&key); 1439 if (old == -1) 1440 return (EFAULT); 1441 if (old != owner) 1442 return (EINVAL); 1443 return (0); 1444 } 1445 1446 /* 1447 * Check if the mutex is available and wake up a waiter, 1448 * only for simple mutex. 1449 */ 1450 static int 1451 do_wake_umutex(struct thread *td, struct umutex *m) 1452 { 1453 struct umtx_key key; 1454 uint32_t owner; 1455 uint32_t flags; 1456 int error; 1457 int count; 1458 1459 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1460 if (owner == -1) 1461 return (EFAULT); 1462 1463 if ((owner & ~UMUTEX_CONTESTED) != 0) 1464 return (0); 1465 1466 flags = fuword32(&m->m_flags); 1467 1468 /* We should only ever be in here for contested locks */ 1469 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1470 &key)) != 0) 1471 return (error); 1472 1473 umtxq_lock(&key); 1474 umtxq_busy(&key); 1475 count = umtxq_count(&key); 1476 umtxq_unlock(&key); 1477 1478 if (count <= 1) 1479 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); 1480 1481 umtxq_lock(&key); 1482 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1483 umtxq_signal(&key, 1); 1484 umtxq_unbusy(&key); 1485 umtxq_unlock(&key); 1486 umtx_key_release(&key); 1487 return (0); 1488 } 1489 1490 /* 1491 * Check if the mutex has waiters and tries to fix contention bit. 1492 */ 1493 static int 1494 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1495 { 1496 struct umtx_key key; 1497 uint32_t owner, old; 1498 int type; 1499 int error; 1500 int count; 1501 1502 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1503 case 0: 1504 type = TYPE_NORMAL_UMUTEX; 1505 break; 1506 case UMUTEX_PRIO_INHERIT: 1507 type = TYPE_PI_UMUTEX; 1508 break; 1509 case UMUTEX_PRIO_PROTECT: 1510 type = TYPE_PP_UMUTEX; 1511 break; 1512 default: 1513 return (EINVAL); 1514 } 1515 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1516 &key)) != 0) 1517 return (error); 1518 1519 owner = 0; 1520 umtxq_lock(&key); 1521 umtxq_busy(&key); 1522 count = umtxq_count(&key); 1523 umtxq_unlock(&key); 1524 /* 1525 * Only repair contention bit if there is a waiter, this means the mutex 1526 * is still being referenced by userland code, otherwise don't update 1527 * any memory. 1528 */ 1529 if (count > 1) { 1530 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1531 while ((owner & UMUTEX_CONTESTED) ==0) { 1532 old = casuword32(&m->m_owner, owner, 1533 owner|UMUTEX_CONTESTED); 1534 if (old == owner) 1535 break; 1536 owner = old; 1537 if (old == -1) 1538 break; 1539 error = umtxq_check_susp(td); 1540 if (error != 0) 1541 break; 1542 } 1543 } else if (count == 1) { 1544 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1545 while ((owner & ~UMUTEX_CONTESTED) != 0 && 1546 (owner & UMUTEX_CONTESTED) == 0) { 1547 old = casuword32(&m->m_owner, owner, 1548 owner|UMUTEX_CONTESTED); 1549 if (old == owner) 1550 break; 1551 owner = old; 1552 if (old == -1) 1553 break; 1554 error = umtxq_check_susp(td); 1555 if (error != 0) 1556 break; 1557 } 1558 } 1559 umtxq_lock(&key); 1560 if (owner == -1) { 1561 error = EFAULT; 1562 umtxq_signal(&key, INT_MAX); 1563 } 1564 else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1565 umtxq_signal(&key, 1); 1566 umtxq_unbusy(&key); 1567 umtxq_unlock(&key); 1568 umtx_key_release(&key); 1569 return (error); 1570 } 1571 1572 static inline struct umtx_pi * 1573 umtx_pi_alloc(int flags) 1574 { 1575 struct umtx_pi *pi; 1576 1577 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1578 TAILQ_INIT(&pi->pi_blocked); 1579 atomic_add_int(&umtx_pi_allocated, 1); 1580 return (pi); 1581 } 1582 1583 static inline void 1584 umtx_pi_free(struct umtx_pi *pi) 1585 { 1586 uma_zfree(umtx_pi_zone, pi); 1587 atomic_add_int(&umtx_pi_allocated, -1); 1588 } 1589 1590 /* 1591 * Adjust the thread's position on a pi_state after its priority has been 1592 * changed. 1593 */ 1594 static int 1595 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1596 { 1597 struct umtx_q *uq, *uq1, *uq2; 1598 struct thread *td1; 1599 1600 mtx_assert(&umtx_lock, MA_OWNED); 1601 if (pi == NULL) 1602 return (0); 1603 1604 uq = td->td_umtxq; 1605 1606 /* 1607 * Check if the thread needs to be moved on the blocked chain. 1608 * It needs to be moved if either its priority is lower than 1609 * the previous thread or higher than the next thread. 1610 */ 1611 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1612 uq2 = TAILQ_NEXT(uq, uq_lockq); 1613 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1614 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1615 /* 1616 * Remove thread from blocked chain and determine where 1617 * it should be moved to. 1618 */ 1619 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1620 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1621 td1 = uq1->uq_thread; 1622 MPASS(td1->td_proc->p_magic == P_MAGIC); 1623 if (UPRI(td1) > UPRI(td)) 1624 break; 1625 } 1626 1627 if (uq1 == NULL) 1628 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1629 else 1630 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1631 } 1632 return (1); 1633 } 1634 1635 /* 1636 * Propagate priority when a thread is blocked on POSIX 1637 * PI mutex. 1638 */ 1639 static void 1640 umtx_propagate_priority(struct thread *td) 1641 { 1642 struct umtx_q *uq; 1643 struct umtx_pi *pi; 1644 int pri; 1645 1646 mtx_assert(&umtx_lock, MA_OWNED); 1647 pri = UPRI(td); 1648 uq = td->td_umtxq; 1649 pi = uq->uq_pi_blocked; 1650 if (pi == NULL) 1651 return; 1652 1653 for (;;) { 1654 td = pi->pi_owner; 1655 if (td == NULL || td == curthread) 1656 return; 1657 1658 MPASS(td->td_proc != NULL); 1659 MPASS(td->td_proc->p_magic == P_MAGIC); 1660 1661 thread_lock(td); 1662 if (td->td_lend_user_pri > pri) 1663 sched_lend_user_prio(td, pri); 1664 else { 1665 thread_unlock(td); 1666 break; 1667 } 1668 thread_unlock(td); 1669 1670 /* 1671 * Pick up the lock that td is blocked on. 1672 */ 1673 uq = td->td_umtxq; 1674 pi = uq->uq_pi_blocked; 1675 if (pi == NULL) 1676 break; 1677 /* Resort td on the list if needed. */ 1678 umtx_pi_adjust_thread(pi, td); 1679 } 1680 } 1681 1682 /* 1683 * Unpropagate priority for a PI mutex when a thread blocked on 1684 * it is interrupted by signal or resumed by others. 1685 */ 1686 static void 1687 umtx_repropagate_priority(struct umtx_pi *pi) 1688 { 1689 struct umtx_q *uq, *uq_owner; 1690 struct umtx_pi *pi2; 1691 int pri; 1692 1693 mtx_assert(&umtx_lock, MA_OWNED); 1694 1695 while (pi != NULL && pi->pi_owner != NULL) { 1696 pri = PRI_MAX; 1697 uq_owner = pi->pi_owner->td_umtxq; 1698 1699 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1700 uq = TAILQ_FIRST(&pi2->pi_blocked); 1701 if (uq != NULL) { 1702 if (pri > UPRI(uq->uq_thread)) 1703 pri = UPRI(uq->uq_thread); 1704 } 1705 } 1706 1707 if (pri > uq_owner->uq_inherited_pri) 1708 pri = uq_owner->uq_inherited_pri; 1709 thread_lock(pi->pi_owner); 1710 sched_lend_user_prio(pi->pi_owner, pri); 1711 thread_unlock(pi->pi_owner); 1712 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1713 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1714 } 1715 } 1716 1717 /* 1718 * Insert a PI mutex into owned list. 1719 */ 1720 static void 1721 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1722 { 1723 struct umtx_q *uq_owner; 1724 1725 uq_owner = owner->td_umtxq; 1726 mtx_assert(&umtx_lock, MA_OWNED); 1727 if (pi->pi_owner != NULL) 1728 panic("pi_ower != NULL"); 1729 pi->pi_owner = owner; 1730 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1731 } 1732 1733 /* 1734 * Claim ownership of a PI mutex. 1735 */ 1736 static int 1737 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1738 { 1739 struct umtx_q *uq, *uq_owner; 1740 1741 uq_owner = owner->td_umtxq; 1742 mtx_lock_spin(&umtx_lock); 1743 if (pi->pi_owner == owner) { 1744 mtx_unlock_spin(&umtx_lock); 1745 return (0); 1746 } 1747 1748 if (pi->pi_owner != NULL) { 1749 /* 1750 * userland may have already messed the mutex, sigh. 1751 */ 1752 mtx_unlock_spin(&umtx_lock); 1753 return (EPERM); 1754 } 1755 umtx_pi_setowner(pi, owner); 1756 uq = TAILQ_FIRST(&pi->pi_blocked); 1757 if (uq != NULL) { 1758 int pri; 1759 1760 pri = UPRI(uq->uq_thread); 1761 thread_lock(owner); 1762 if (pri < UPRI(owner)) 1763 sched_lend_user_prio(owner, pri); 1764 thread_unlock(owner); 1765 } 1766 mtx_unlock_spin(&umtx_lock); 1767 return (0); 1768 } 1769 1770 /* 1771 * Adjust a thread's order position in its blocked PI mutex, 1772 * this may result new priority propagating process. 1773 */ 1774 void 1775 umtx_pi_adjust(struct thread *td, u_char oldpri) 1776 { 1777 struct umtx_q *uq; 1778 struct umtx_pi *pi; 1779 1780 uq = td->td_umtxq; 1781 mtx_lock_spin(&umtx_lock); 1782 /* 1783 * Pick up the lock that td is blocked on. 1784 */ 1785 pi = uq->uq_pi_blocked; 1786 if (pi != NULL) { 1787 umtx_pi_adjust_thread(pi, td); 1788 umtx_repropagate_priority(pi); 1789 } 1790 mtx_unlock_spin(&umtx_lock); 1791 } 1792 1793 /* 1794 * Sleep on a PI mutex. 1795 */ 1796 static int 1797 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1798 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1799 { 1800 struct umtxq_chain *uc; 1801 struct thread *td, *td1; 1802 struct umtx_q *uq1; 1803 int pri; 1804 int error = 0; 1805 1806 td = uq->uq_thread; 1807 KASSERT(td == curthread, ("inconsistent uq_thread")); 1808 uc = umtxq_getchain(&uq->uq_key); 1809 UMTXQ_LOCKED_ASSERT(uc); 1810 UMTXQ_BUSY_ASSERT(uc); 1811 umtxq_insert(uq); 1812 mtx_lock_spin(&umtx_lock); 1813 if (pi->pi_owner == NULL) { 1814 mtx_unlock_spin(&umtx_lock); 1815 /* XXX Only look up thread in current process. */ 1816 td1 = tdfind(owner, curproc->p_pid); 1817 mtx_lock_spin(&umtx_lock); 1818 if (td1 != NULL) { 1819 if (pi->pi_owner == NULL) 1820 umtx_pi_setowner(pi, td1); 1821 PROC_UNLOCK(td1->td_proc); 1822 } 1823 } 1824 1825 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1826 pri = UPRI(uq1->uq_thread); 1827 if (pri > UPRI(td)) 1828 break; 1829 } 1830 1831 if (uq1 != NULL) 1832 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1833 else 1834 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1835 1836 uq->uq_pi_blocked = pi; 1837 thread_lock(td); 1838 td->td_flags |= TDF_UPIBLOCKED; 1839 thread_unlock(td); 1840 umtx_propagate_priority(td); 1841 mtx_unlock_spin(&umtx_lock); 1842 umtxq_unbusy(&uq->uq_key); 1843 1844 error = umtxq_sleep(uq, wmesg, timo); 1845 umtxq_remove(uq); 1846 1847 mtx_lock_spin(&umtx_lock); 1848 uq->uq_pi_blocked = NULL; 1849 thread_lock(td); 1850 td->td_flags &= ~TDF_UPIBLOCKED; 1851 thread_unlock(td); 1852 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1853 umtx_repropagate_priority(pi); 1854 mtx_unlock_spin(&umtx_lock); 1855 umtxq_unlock(&uq->uq_key); 1856 1857 return (error); 1858 } 1859 1860 /* 1861 * Add reference count for a PI mutex. 1862 */ 1863 static void 1864 umtx_pi_ref(struct umtx_pi *pi) 1865 { 1866 struct umtxq_chain *uc; 1867 1868 uc = umtxq_getchain(&pi->pi_key); 1869 UMTXQ_LOCKED_ASSERT(uc); 1870 pi->pi_refcount++; 1871 } 1872 1873 /* 1874 * Decrease reference count for a PI mutex, if the counter 1875 * is decreased to zero, its memory space is freed. 1876 */ 1877 static void 1878 umtx_pi_unref(struct umtx_pi *pi) 1879 { 1880 struct umtxq_chain *uc; 1881 1882 uc = umtxq_getchain(&pi->pi_key); 1883 UMTXQ_LOCKED_ASSERT(uc); 1884 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1885 if (--pi->pi_refcount == 0) { 1886 mtx_lock_spin(&umtx_lock); 1887 if (pi->pi_owner != NULL) { 1888 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1889 pi, pi_link); 1890 pi->pi_owner = NULL; 1891 } 1892 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1893 ("blocked queue not empty")); 1894 mtx_unlock_spin(&umtx_lock); 1895 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1896 umtx_pi_free(pi); 1897 } 1898 } 1899 1900 /* 1901 * Find a PI mutex in hash table. 1902 */ 1903 static struct umtx_pi * 1904 umtx_pi_lookup(struct umtx_key *key) 1905 { 1906 struct umtxq_chain *uc; 1907 struct umtx_pi *pi; 1908 1909 uc = umtxq_getchain(key); 1910 UMTXQ_LOCKED_ASSERT(uc); 1911 1912 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1913 if (umtx_key_match(&pi->pi_key, key)) { 1914 return (pi); 1915 } 1916 } 1917 return (NULL); 1918 } 1919 1920 /* 1921 * Insert a PI mutex into hash table. 1922 */ 1923 static inline void 1924 umtx_pi_insert(struct umtx_pi *pi) 1925 { 1926 struct umtxq_chain *uc; 1927 1928 uc = umtxq_getchain(&pi->pi_key); 1929 UMTXQ_LOCKED_ASSERT(uc); 1930 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1931 } 1932 1933 /* 1934 * Lock a PI mutex. 1935 */ 1936 static int 1937 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1938 struct _umtx_time *timeout, int try) 1939 { 1940 struct abs_timeout timo; 1941 struct umtx_q *uq; 1942 struct umtx_pi *pi, *new_pi; 1943 uint32_t id, owner, old; 1944 int error; 1945 1946 id = td->td_tid; 1947 uq = td->td_umtxq; 1948 1949 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1950 &uq->uq_key)) != 0) 1951 return (error); 1952 1953 if (timeout != NULL) 1954 abs_timeout_init2(&timo, timeout); 1955 1956 umtxq_lock(&uq->uq_key); 1957 pi = umtx_pi_lookup(&uq->uq_key); 1958 if (pi == NULL) { 1959 new_pi = umtx_pi_alloc(M_NOWAIT); 1960 if (new_pi == NULL) { 1961 umtxq_unlock(&uq->uq_key); 1962 new_pi = umtx_pi_alloc(M_WAITOK); 1963 umtxq_lock(&uq->uq_key); 1964 pi = umtx_pi_lookup(&uq->uq_key); 1965 if (pi != NULL) { 1966 umtx_pi_free(new_pi); 1967 new_pi = NULL; 1968 } 1969 } 1970 if (new_pi != NULL) { 1971 new_pi->pi_key = uq->uq_key; 1972 umtx_pi_insert(new_pi); 1973 pi = new_pi; 1974 } 1975 } 1976 umtx_pi_ref(pi); 1977 umtxq_unlock(&uq->uq_key); 1978 1979 /* 1980 * Care must be exercised when dealing with umtx structure. It 1981 * can fault on any access. 1982 */ 1983 for (;;) { 1984 /* 1985 * Try the uncontested case. This should be done in userland. 1986 */ 1987 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1988 1989 /* The acquire succeeded. */ 1990 if (owner == UMUTEX_UNOWNED) { 1991 error = 0; 1992 break; 1993 } 1994 1995 /* The address was invalid. */ 1996 if (owner == -1) { 1997 error = EFAULT; 1998 break; 1999 } 2000 2001 /* If no one owns it but it is contested try to acquire it. */ 2002 if (owner == UMUTEX_CONTESTED) { 2003 owner = casuword32(&m->m_owner, 2004 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2005 2006 if (owner == UMUTEX_CONTESTED) { 2007 umtxq_lock(&uq->uq_key); 2008 umtxq_busy(&uq->uq_key); 2009 error = umtx_pi_claim(pi, td); 2010 umtxq_unbusy(&uq->uq_key); 2011 umtxq_unlock(&uq->uq_key); 2012 break; 2013 } 2014 2015 /* The address was invalid. */ 2016 if (owner == -1) { 2017 error = EFAULT; 2018 break; 2019 } 2020 2021 error = umtxq_check_susp(td); 2022 if (error != 0) 2023 break; 2024 2025 /* If this failed the lock has changed, restart. */ 2026 continue; 2027 } 2028 2029 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 2030 (owner & ~UMUTEX_CONTESTED) == id) { 2031 error = EDEADLK; 2032 break; 2033 } 2034 2035 if (try != 0) { 2036 error = EBUSY; 2037 break; 2038 } 2039 2040 /* 2041 * If we caught a signal, we have retried and now 2042 * exit immediately. 2043 */ 2044 if (error != 0) 2045 break; 2046 2047 umtxq_lock(&uq->uq_key); 2048 umtxq_busy(&uq->uq_key); 2049 umtxq_unlock(&uq->uq_key); 2050 2051 /* 2052 * Set the contested bit so that a release in user space 2053 * knows to use the system call for unlock. If this fails 2054 * either some one else has acquired the lock or it has been 2055 * released. 2056 */ 2057 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 2058 2059 /* The address was invalid. */ 2060 if (old == -1) { 2061 umtxq_lock(&uq->uq_key); 2062 umtxq_unbusy(&uq->uq_key); 2063 umtxq_unlock(&uq->uq_key); 2064 error = EFAULT; 2065 break; 2066 } 2067 2068 umtxq_lock(&uq->uq_key); 2069 /* 2070 * We set the contested bit, sleep. Otherwise the lock changed 2071 * and we need to retry or we lost a race to the thread 2072 * unlocking the umtx. 2073 */ 2074 if (old == owner) 2075 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2076 "umtxpi", timeout == NULL ? NULL : &timo); 2077 else { 2078 umtxq_unbusy(&uq->uq_key); 2079 umtxq_unlock(&uq->uq_key); 2080 } 2081 2082 error = umtxq_check_susp(td); 2083 if (error != 0) 2084 break; 2085 } 2086 2087 umtxq_lock(&uq->uq_key); 2088 umtx_pi_unref(pi); 2089 umtxq_unlock(&uq->uq_key); 2090 2091 umtx_key_release(&uq->uq_key); 2092 return (error); 2093 } 2094 2095 /* 2096 * Unlock a PI mutex. 2097 */ 2098 static int 2099 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 2100 { 2101 struct umtx_key key; 2102 struct umtx_q *uq_first, *uq_first2, *uq_me; 2103 struct umtx_pi *pi, *pi2; 2104 uint32_t owner, old, id; 2105 int error; 2106 int count; 2107 int pri; 2108 2109 id = td->td_tid; 2110 /* 2111 * Make sure we own this mtx. 2112 */ 2113 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2114 if (owner == -1) 2115 return (EFAULT); 2116 2117 if ((owner & ~UMUTEX_CONTESTED) != id) 2118 return (EPERM); 2119 2120 /* This should be done in userland */ 2121 if ((owner & UMUTEX_CONTESTED) == 0) { 2122 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 2123 if (old == -1) 2124 return (EFAULT); 2125 if (old == owner) 2126 return (0); 2127 owner = old; 2128 } 2129 2130 /* We should only ever be in here for contested locks */ 2131 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 2132 &key)) != 0) 2133 return (error); 2134 2135 umtxq_lock(&key); 2136 umtxq_busy(&key); 2137 count = umtxq_count_pi(&key, &uq_first); 2138 if (uq_first != NULL) { 2139 mtx_lock_spin(&umtx_lock); 2140 pi = uq_first->uq_pi_blocked; 2141 KASSERT(pi != NULL, ("pi == NULL?")); 2142 if (pi->pi_owner != curthread) { 2143 mtx_unlock_spin(&umtx_lock); 2144 umtxq_unbusy(&key); 2145 umtxq_unlock(&key); 2146 umtx_key_release(&key); 2147 /* userland messed the mutex */ 2148 return (EPERM); 2149 } 2150 uq_me = curthread->td_umtxq; 2151 pi->pi_owner = NULL; 2152 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 2153 /* get highest priority thread which is still sleeping. */ 2154 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2155 while (uq_first != NULL && 2156 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2157 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2158 } 2159 pri = PRI_MAX; 2160 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2161 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2162 if (uq_first2 != NULL) { 2163 if (pri > UPRI(uq_first2->uq_thread)) 2164 pri = UPRI(uq_first2->uq_thread); 2165 } 2166 } 2167 thread_lock(curthread); 2168 sched_lend_user_prio(curthread, pri); 2169 thread_unlock(curthread); 2170 mtx_unlock_spin(&umtx_lock); 2171 if (uq_first) 2172 umtxq_signal_thread(uq_first); 2173 } 2174 umtxq_unlock(&key); 2175 2176 /* 2177 * When unlocking the umtx, it must be marked as unowned if 2178 * there is zero or one thread only waiting for it. 2179 * Otherwise, it must be marked as contested. 2180 */ 2181 old = casuword32(&m->m_owner, owner, 2182 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 2183 2184 umtxq_lock(&key); 2185 umtxq_unbusy(&key); 2186 umtxq_unlock(&key); 2187 umtx_key_release(&key); 2188 if (old == -1) 2189 return (EFAULT); 2190 if (old != owner) 2191 return (EINVAL); 2192 return (0); 2193 } 2194 2195 /* 2196 * Lock a PP mutex. 2197 */ 2198 static int 2199 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2200 struct _umtx_time *timeout, int try) 2201 { 2202 struct abs_timeout timo; 2203 struct umtx_q *uq, *uq2; 2204 struct umtx_pi *pi; 2205 uint32_t ceiling; 2206 uint32_t owner, id; 2207 int error, pri, old_inherited_pri, su; 2208 2209 id = td->td_tid; 2210 uq = td->td_umtxq; 2211 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2212 &uq->uq_key)) != 0) 2213 return (error); 2214 2215 if (timeout != NULL) 2216 abs_timeout_init2(&timo, timeout); 2217 2218 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2219 for (;;) { 2220 old_inherited_pri = uq->uq_inherited_pri; 2221 umtxq_lock(&uq->uq_key); 2222 umtxq_busy(&uq->uq_key); 2223 umtxq_unlock(&uq->uq_key); 2224 2225 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 2226 if (ceiling > RTP_PRIO_MAX) { 2227 error = EINVAL; 2228 goto out; 2229 } 2230 2231 mtx_lock_spin(&umtx_lock); 2232 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2233 mtx_unlock_spin(&umtx_lock); 2234 error = EINVAL; 2235 goto out; 2236 } 2237 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2238 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2239 thread_lock(td); 2240 if (uq->uq_inherited_pri < UPRI(td)) 2241 sched_lend_user_prio(td, uq->uq_inherited_pri); 2242 thread_unlock(td); 2243 } 2244 mtx_unlock_spin(&umtx_lock); 2245 2246 owner = casuword32(&m->m_owner, 2247 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2248 2249 if (owner == UMUTEX_CONTESTED) { 2250 error = 0; 2251 break; 2252 } 2253 2254 /* The address was invalid. */ 2255 if (owner == -1) { 2256 error = EFAULT; 2257 break; 2258 } 2259 2260 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 2261 (owner & ~UMUTEX_CONTESTED) == id) { 2262 error = EDEADLK; 2263 break; 2264 } 2265 2266 if (try != 0) { 2267 error = EBUSY; 2268 break; 2269 } 2270 2271 /* 2272 * If we caught a signal, we have retried and now 2273 * exit immediately. 2274 */ 2275 if (error != 0) 2276 break; 2277 2278 umtxq_lock(&uq->uq_key); 2279 umtxq_insert(uq); 2280 umtxq_unbusy(&uq->uq_key); 2281 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2282 NULL : &timo); 2283 umtxq_remove(uq); 2284 umtxq_unlock(&uq->uq_key); 2285 2286 mtx_lock_spin(&umtx_lock); 2287 uq->uq_inherited_pri = old_inherited_pri; 2288 pri = PRI_MAX; 2289 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2290 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2291 if (uq2 != NULL) { 2292 if (pri > UPRI(uq2->uq_thread)) 2293 pri = UPRI(uq2->uq_thread); 2294 } 2295 } 2296 if (pri > uq->uq_inherited_pri) 2297 pri = uq->uq_inherited_pri; 2298 thread_lock(td); 2299 sched_lend_user_prio(td, pri); 2300 thread_unlock(td); 2301 mtx_unlock_spin(&umtx_lock); 2302 } 2303 2304 if (error != 0) { 2305 mtx_lock_spin(&umtx_lock); 2306 uq->uq_inherited_pri = old_inherited_pri; 2307 pri = PRI_MAX; 2308 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2309 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2310 if (uq2 != NULL) { 2311 if (pri > UPRI(uq2->uq_thread)) 2312 pri = UPRI(uq2->uq_thread); 2313 } 2314 } 2315 if (pri > uq->uq_inherited_pri) 2316 pri = uq->uq_inherited_pri; 2317 thread_lock(td); 2318 sched_lend_user_prio(td, pri); 2319 thread_unlock(td); 2320 mtx_unlock_spin(&umtx_lock); 2321 } 2322 2323 out: 2324 umtxq_lock(&uq->uq_key); 2325 umtxq_unbusy(&uq->uq_key); 2326 umtxq_unlock(&uq->uq_key); 2327 umtx_key_release(&uq->uq_key); 2328 return (error); 2329 } 2330 2331 /* 2332 * Unlock a PP mutex. 2333 */ 2334 static int 2335 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2336 { 2337 struct umtx_key key; 2338 struct umtx_q *uq, *uq2; 2339 struct umtx_pi *pi; 2340 uint32_t owner, id; 2341 uint32_t rceiling; 2342 int error, pri, new_inherited_pri, su; 2343 2344 id = td->td_tid; 2345 uq = td->td_umtxq; 2346 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2347 2348 /* 2349 * Make sure we own this mtx. 2350 */ 2351 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2352 if (owner == -1) 2353 return (EFAULT); 2354 2355 if ((owner & ~UMUTEX_CONTESTED) != id) 2356 return (EPERM); 2357 2358 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2359 if (error != 0) 2360 return (error); 2361 2362 if (rceiling == -1) 2363 new_inherited_pri = PRI_MAX; 2364 else { 2365 rceiling = RTP_PRIO_MAX - rceiling; 2366 if (rceiling > RTP_PRIO_MAX) 2367 return (EINVAL); 2368 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2369 } 2370 2371 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2372 &key)) != 0) 2373 return (error); 2374 umtxq_lock(&key); 2375 umtxq_busy(&key); 2376 umtxq_unlock(&key); 2377 /* 2378 * For priority protected mutex, always set unlocked state 2379 * to UMUTEX_CONTESTED, so that userland always enters kernel 2380 * to lock the mutex, it is necessary because thread priority 2381 * has to be adjusted for such mutex. 2382 */ 2383 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2384 UMUTEX_CONTESTED); 2385 2386 umtxq_lock(&key); 2387 if (error == 0) 2388 umtxq_signal(&key, 1); 2389 umtxq_unbusy(&key); 2390 umtxq_unlock(&key); 2391 2392 if (error == -1) 2393 error = EFAULT; 2394 else { 2395 mtx_lock_spin(&umtx_lock); 2396 if (su != 0) 2397 uq->uq_inherited_pri = new_inherited_pri; 2398 pri = PRI_MAX; 2399 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2400 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2401 if (uq2 != NULL) { 2402 if (pri > UPRI(uq2->uq_thread)) 2403 pri = UPRI(uq2->uq_thread); 2404 } 2405 } 2406 if (pri > uq->uq_inherited_pri) 2407 pri = uq->uq_inherited_pri; 2408 thread_lock(td); 2409 sched_lend_user_prio(td, pri); 2410 thread_unlock(td); 2411 mtx_unlock_spin(&umtx_lock); 2412 } 2413 umtx_key_release(&key); 2414 return (error); 2415 } 2416 2417 static int 2418 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2419 uint32_t *old_ceiling) 2420 { 2421 struct umtx_q *uq; 2422 uint32_t save_ceiling; 2423 uint32_t owner, id; 2424 uint32_t flags; 2425 int error; 2426 2427 flags = fuword32(&m->m_flags); 2428 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2429 return (EINVAL); 2430 if (ceiling > RTP_PRIO_MAX) 2431 return (EINVAL); 2432 id = td->td_tid; 2433 uq = td->td_umtxq; 2434 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2435 &uq->uq_key)) != 0) 2436 return (error); 2437 for (;;) { 2438 umtxq_lock(&uq->uq_key); 2439 umtxq_busy(&uq->uq_key); 2440 umtxq_unlock(&uq->uq_key); 2441 2442 save_ceiling = fuword32(&m->m_ceilings[0]); 2443 2444 owner = casuword32(&m->m_owner, 2445 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2446 2447 if (owner == UMUTEX_CONTESTED) { 2448 suword32(&m->m_ceilings[0], ceiling); 2449 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2450 UMUTEX_CONTESTED); 2451 error = 0; 2452 break; 2453 } 2454 2455 /* The address was invalid. */ 2456 if (owner == -1) { 2457 error = EFAULT; 2458 break; 2459 } 2460 2461 if ((owner & ~UMUTEX_CONTESTED) == id) { 2462 suword32(&m->m_ceilings[0], ceiling); 2463 error = 0; 2464 break; 2465 } 2466 2467 /* 2468 * If we caught a signal, we have retried and now 2469 * exit immediately. 2470 */ 2471 if (error != 0) 2472 break; 2473 2474 /* 2475 * We set the contested bit, sleep. Otherwise the lock changed 2476 * and we need to retry or we lost a race to the thread 2477 * unlocking the umtx. 2478 */ 2479 umtxq_lock(&uq->uq_key); 2480 umtxq_insert(uq); 2481 umtxq_unbusy(&uq->uq_key); 2482 error = umtxq_sleep(uq, "umtxpp", NULL); 2483 umtxq_remove(uq); 2484 umtxq_unlock(&uq->uq_key); 2485 } 2486 umtxq_lock(&uq->uq_key); 2487 if (error == 0) 2488 umtxq_signal(&uq->uq_key, INT_MAX); 2489 umtxq_unbusy(&uq->uq_key); 2490 umtxq_unlock(&uq->uq_key); 2491 umtx_key_release(&uq->uq_key); 2492 if (error == 0 && old_ceiling != NULL) 2493 suword32(old_ceiling, save_ceiling); 2494 return (error); 2495 } 2496 2497 /* 2498 * Lock a userland POSIX mutex. 2499 */ 2500 static int 2501 do_lock_umutex(struct thread *td, struct umutex *m, 2502 struct _umtx_time *timeout, int mode) 2503 { 2504 uint32_t flags; 2505 int error; 2506 2507 flags = fuword32(&m->m_flags); 2508 if (flags == -1) 2509 return (EFAULT); 2510 2511 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2512 case 0: 2513 error = do_lock_normal(td, m, flags, timeout, mode); 2514 break; 2515 case UMUTEX_PRIO_INHERIT: 2516 error = do_lock_pi(td, m, flags, timeout, mode); 2517 break; 2518 case UMUTEX_PRIO_PROTECT: 2519 error = do_lock_pp(td, m, flags, timeout, mode); 2520 break; 2521 default: 2522 return (EINVAL); 2523 } 2524 if (timeout == NULL) { 2525 if (error == EINTR && mode != _UMUTEX_WAIT) 2526 error = ERESTART; 2527 } else { 2528 /* Timed-locking is not restarted. */ 2529 if (error == ERESTART) 2530 error = EINTR; 2531 } 2532 return (error); 2533 } 2534 2535 /* 2536 * Unlock a userland POSIX mutex. 2537 */ 2538 static int 2539 do_unlock_umutex(struct thread *td, struct umutex *m) 2540 { 2541 uint32_t flags; 2542 2543 flags = fuword32(&m->m_flags); 2544 if (flags == -1) 2545 return (EFAULT); 2546 2547 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2548 case 0: 2549 return (do_unlock_normal(td, m, flags)); 2550 case UMUTEX_PRIO_INHERIT: 2551 return (do_unlock_pi(td, m, flags)); 2552 case UMUTEX_PRIO_PROTECT: 2553 return (do_unlock_pp(td, m, flags)); 2554 } 2555 2556 return (EINVAL); 2557 } 2558 2559 static int 2560 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2561 struct timespec *timeout, u_long wflags) 2562 { 2563 struct abs_timeout timo; 2564 struct umtx_q *uq; 2565 uint32_t flags; 2566 uint32_t clockid; 2567 int error; 2568 2569 uq = td->td_umtxq; 2570 flags = fuword32(&cv->c_flags); 2571 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2572 if (error != 0) 2573 return (error); 2574 2575 if ((wflags & CVWAIT_CLOCKID) != 0) { 2576 clockid = fuword32(&cv->c_clockid); 2577 if (clockid < CLOCK_REALTIME || 2578 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2579 /* hmm, only HW clock id will work. */ 2580 return (EINVAL); 2581 } 2582 } else { 2583 clockid = CLOCK_REALTIME; 2584 } 2585 2586 umtxq_lock(&uq->uq_key); 2587 umtxq_busy(&uq->uq_key); 2588 umtxq_insert(uq); 2589 umtxq_unlock(&uq->uq_key); 2590 2591 /* 2592 * Set c_has_waiters to 1 before releasing user mutex, also 2593 * don't modify cache line when unnecessary. 2594 */ 2595 if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0) 2596 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2597 2598 umtxq_lock(&uq->uq_key); 2599 umtxq_unbusy(&uq->uq_key); 2600 umtxq_unlock(&uq->uq_key); 2601 2602 error = do_unlock_umutex(td, m); 2603 2604 if (timeout != NULL) 2605 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2606 timeout); 2607 2608 umtxq_lock(&uq->uq_key); 2609 if (error == 0) { 2610 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2611 NULL : &timo); 2612 } 2613 2614 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2615 error = 0; 2616 else { 2617 /* 2618 * This must be timeout,interrupted by signal or 2619 * surprious wakeup, clear c_has_waiter flag when 2620 * necessary. 2621 */ 2622 umtxq_busy(&uq->uq_key); 2623 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2624 int oldlen = uq->uq_cur_queue->length; 2625 umtxq_remove(uq); 2626 if (oldlen == 1) { 2627 umtxq_unlock(&uq->uq_key); 2628 suword32( 2629 __DEVOLATILE(uint32_t *, 2630 &cv->c_has_waiters), 0); 2631 umtxq_lock(&uq->uq_key); 2632 } 2633 } 2634 umtxq_unbusy(&uq->uq_key); 2635 if (error == ERESTART) 2636 error = EINTR; 2637 } 2638 2639 umtxq_unlock(&uq->uq_key); 2640 umtx_key_release(&uq->uq_key); 2641 return (error); 2642 } 2643 2644 /* 2645 * Signal a userland condition variable. 2646 */ 2647 static int 2648 do_cv_signal(struct thread *td, struct ucond *cv) 2649 { 2650 struct umtx_key key; 2651 int error, cnt, nwake; 2652 uint32_t flags; 2653 2654 flags = fuword32(&cv->c_flags); 2655 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2656 return (error); 2657 umtxq_lock(&key); 2658 umtxq_busy(&key); 2659 cnt = umtxq_count(&key); 2660 nwake = umtxq_signal(&key, 1); 2661 if (cnt <= nwake) { 2662 umtxq_unlock(&key); 2663 error = suword32( 2664 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2665 umtxq_lock(&key); 2666 } 2667 umtxq_unbusy(&key); 2668 umtxq_unlock(&key); 2669 umtx_key_release(&key); 2670 return (error); 2671 } 2672 2673 static int 2674 do_cv_broadcast(struct thread *td, struct ucond *cv) 2675 { 2676 struct umtx_key key; 2677 int error; 2678 uint32_t flags; 2679 2680 flags = fuword32(&cv->c_flags); 2681 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2682 return (error); 2683 2684 umtxq_lock(&key); 2685 umtxq_busy(&key); 2686 umtxq_signal(&key, INT_MAX); 2687 umtxq_unlock(&key); 2688 2689 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2690 2691 umtxq_lock(&key); 2692 umtxq_unbusy(&key); 2693 umtxq_unlock(&key); 2694 2695 umtx_key_release(&key); 2696 return (error); 2697 } 2698 2699 static int 2700 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2701 { 2702 struct abs_timeout timo; 2703 struct umtx_q *uq; 2704 uint32_t flags, wrflags; 2705 int32_t state, oldstate; 2706 int32_t blocked_readers; 2707 int error; 2708 2709 uq = td->td_umtxq; 2710 flags = fuword32(&rwlock->rw_flags); 2711 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2712 if (error != 0) 2713 return (error); 2714 2715 if (timeout != NULL) 2716 abs_timeout_init2(&timo, timeout); 2717 2718 wrflags = URWLOCK_WRITE_OWNER; 2719 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2720 wrflags |= URWLOCK_WRITE_WAITERS; 2721 2722 for (;;) { 2723 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2724 /* try to lock it */ 2725 while (!(state & wrflags)) { 2726 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2727 umtx_key_release(&uq->uq_key); 2728 return (EAGAIN); 2729 } 2730 oldstate = casuword32(&rwlock->rw_state, state, state + 1); 2731 if (oldstate == -1) { 2732 umtx_key_release(&uq->uq_key); 2733 return (EFAULT); 2734 } 2735 if (oldstate == state) { 2736 umtx_key_release(&uq->uq_key); 2737 return (0); 2738 } 2739 error = umtxq_check_susp(td); 2740 if (error != 0) 2741 break; 2742 state = oldstate; 2743 } 2744 2745 if (error) 2746 break; 2747 2748 /* grab monitor lock */ 2749 umtxq_lock(&uq->uq_key); 2750 umtxq_busy(&uq->uq_key); 2751 umtxq_unlock(&uq->uq_key); 2752 2753 /* 2754 * re-read the state, in case it changed between the try-lock above 2755 * and the check below 2756 */ 2757 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2758 2759 /* set read contention bit */ 2760 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { 2761 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); 2762 if (oldstate == -1) { 2763 error = EFAULT; 2764 break; 2765 } 2766 if (oldstate == state) 2767 goto sleep; 2768 state = oldstate; 2769 error = umtxq_check_susp(td); 2770 if (error != 0) 2771 break; 2772 } 2773 if (error != 0) { 2774 umtxq_lock(&uq->uq_key); 2775 umtxq_unbusy(&uq->uq_key); 2776 umtxq_unlock(&uq->uq_key); 2777 break; 2778 } 2779 2780 /* state is changed while setting flags, restart */ 2781 if (!(state & wrflags)) { 2782 umtxq_lock(&uq->uq_key); 2783 umtxq_unbusy(&uq->uq_key); 2784 umtxq_unlock(&uq->uq_key); 2785 error = umtxq_check_susp(td); 2786 if (error != 0) 2787 break; 2788 continue; 2789 } 2790 2791 sleep: 2792 /* contention bit is set, before sleeping, increase read waiter count */ 2793 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2794 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2795 2796 while (state & wrflags) { 2797 umtxq_lock(&uq->uq_key); 2798 umtxq_insert(uq); 2799 umtxq_unbusy(&uq->uq_key); 2800 2801 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2802 NULL : &timo); 2803 2804 umtxq_busy(&uq->uq_key); 2805 umtxq_remove(uq); 2806 umtxq_unlock(&uq->uq_key); 2807 if (error) 2808 break; 2809 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2810 } 2811 2812 /* decrease read waiter count, and may clear read contention bit */ 2813 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2814 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2815 if (blocked_readers == 1) { 2816 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2817 for (;;) { 2818 oldstate = casuword32(&rwlock->rw_state, state, 2819 state & ~URWLOCK_READ_WAITERS); 2820 if (oldstate == -1) { 2821 error = EFAULT; 2822 break; 2823 } 2824 if (oldstate == state) 2825 break; 2826 state = oldstate; 2827 error = umtxq_check_susp(td); 2828 if (error != 0) 2829 break; 2830 } 2831 } 2832 2833 umtxq_lock(&uq->uq_key); 2834 umtxq_unbusy(&uq->uq_key); 2835 umtxq_unlock(&uq->uq_key); 2836 if (error != 0) 2837 break; 2838 } 2839 umtx_key_release(&uq->uq_key); 2840 if (error == ERESTART) 2841 error = EINTR; 2842 return (error); 2843 } 2844 2845 static int 2846 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2847 { 2848 struct abs_timeout timo; 2849 struct umtx_q *uq; 2850 uint32_t flags; 2851 int32_t state, oldstate; 2852 int32_t blocked_writers; 2853 int32_t blocked_readers; 2854 int error; 2855 2856 uq = td->td_umtxq; 2857 flags = fuword32(&rwlock->rw_flags); 2858 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2859 if (error != 0) 2860 return (error); 2861 2862 if (timeout != NULL) 2863 abs_timeout_init2(&timo, timeout); 2864 2865 blocked_readers = 0; 2866 for (;;) { 2867 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2868 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2869 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); 2870 if (oldstate == -1) { 2871 umtx_key_release(&uq->uq_key); 2872 return (EFAULT); 2873 } 2874 if (oldstate == state) { 2875 umtx_key_release(&uq->uq_key); 2876 return (0); 2877 } 2878 state = oldstate; 2879 error = umtxq_check_susp(td); 2880 if (error != 0) 2881 break; 2882 } 2883 2884 if (error) { 2885 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2886 blocked_readers != 0) { 2887 umtxq_lock(&uq->uq_key); 2888 umtxq_busy(&uq->uq_key); 2889 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2890 umtxq_unbusy(&uq->uq_key); 2891 umtxq_unlock(&uq->uq_key); 2892 } 2893 2894 break; 2895 } 2896 2897 /* grab monitor lock */ 2898 umtxq_lock(&uq->uq_key); 2899 umtxq_busy(&uq->uq_key); 2900 umtxq_unlock(&uq->uq_key); 2901 2902 /* 2903 * re-read the state, in case it changed between the try-lock above 2904 * and the check below 2905 */ 2906 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2907 2908 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && 2909 (state & URWLOCK_WRITE_WAITERS) == 0) { 2910 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); 2911 if (oldstate == -1) { 2912 error = EFAULT; 2913 break; 2914 } 2915 if (oldstate == state) 2916 goto sleep; 2917 state = oldstate; 2918 error = umtxq_check_susp(td); 2919 if (error != 0) 2920 break; 2921 } 2922 if (error != 0) { 2923 umtxq_lock(&uq->uq_key); 2924 umtxq_unbusy(&uq->uq_key); 2925 umtxq_unlock(&uq->uq_key); 2926 break; 2927 } 2928 2929 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2930 umtxq_lock(&uq->uq_key); 2931 umtxq_unbusy(&uq->uq_key); 2932 umtxq_unlock(&uq->uq_key); 2933 error = umtxq_check_susp(td); 2934 if (error != 0) 2935 break; 2936 continue; 2937 } 2938 sleep: 2939 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2940 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2941 2942 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2943 umtxq_lock(&uq->uq_key); 2944 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2945 umtxq_unbusy(&uq->uq_key); 2946 2947 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2948 NULL : &timo); 2949 2950 umtxq_busy(&uq->uq_key); 2951 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2952 umtxq_unlock(&uq->uq_key); 2953 if (error) 2954 break; 2955 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2956 } 2957 2958 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2959 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2960 if (blocked_writers == 1) { 2961 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2962 for (;;) { 2963 oldstate = casuword32(&rwlock->rw_state, state, 2964 state & ~URWLOCK_WRITE_WAITERS); 2965 if (oldstate == -1) { 2966 error = EFAULT; 2967 break; 2968 } 2969 if (oldstate == state) 2970 break; 2971 state = oldstate; 2972 error = umtxq_check_susp(td); 2973 /* 2974 * We are leaving the URWLOCK_WRITE_WAITERS 2975 * behind, but this should not harm the 2976 * correctness. 2977 */ 2978 if (error != 0) 2979 break; 2980 } 2981 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2982 } else 2983 blocked_readers = 0; 2984 2985 umtxq_lock(&uq->uq_key); 2986 umtxq_unbusy(&uq->uq_key); 2987 umtxq_unlock(&uq->uq_key); 2988 } 2989 2990 umtx_key_release(&uq->uq_key); 2991 if (error == ERESTART) 2992 error = EINTR; 2993 return (error); 2994 } 2995 2996 static int 2997 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2998 { 2999 struct umtx_q *uq; 3000 uint32_t flags; 3001 int32_t state, oldstate; 3002 int error, q, count; 3003 3004 uq = td->td_umtxq; 3005 flags = fuword32(&rwlock->rw_flags); 3006 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3007 if (error != 0) 3008 return (error); 3009 3010 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 3011 if (state & URWLOCK_WRITE_OWNER) { 3012 for (;;) { 3013 oldstate = casuword32(&rwlock->rw_state, state, 3014 state & ~URWLOCK_WRITE_OWNER); 3015 if (oldstate == -1) { 3016 error = EFAULT; 3017 goto out; 3018 } 3019 if (oldstate != state) { 3020 state = oldstate; 3021 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3022 error = EPERM; 3023 goto out; 3024 } 3025 error = umtxq_check_susp(td); 3026 if (error != 0) 3027 goto out; 3028 } else 3029 break; 3030 } 3031 } else if (URWLOCK_READER_COUNT(state) != 0) { 3032 for (;;) { 3033 oldstate = casuword32(&rwlock->rw_state, state, 3034 state - 1); 3035 if (oldstate == -1) { 3036 error = EFAULT; 3037 goto out; 3038 } 3039 if (oldstate != state) { 3040 state = oldstate; 3041 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3042 error = EPERM; 3043 goto out; 3044 } 3045 error = umtxq_check_susp(td); 3046 if (error != 0) 3047 goto out; 3048 } else 3049 break; 3050 } 3051 } else { 3052 error = EPERM; 3053 goto out; 3054 } 3055 3056 count = 0; 3057 3058 if (!(flags & URWLOCK_PREFER_READER)) { 3059 if (state & URWLOCK_WRITE_WAITERS) { 3060 count = 1; 3061 q = UMTX_EXCLUSIVE_QUEUE; 3062 } else if (state & URWLOCK_READ_WAITERS) { 3063 count = INT_MAX; 3064 q = UMTX_SHARED_QUEUE; 3065 } 3066 } else { 3067 if (state & URWLOCK_READ_WAITERS) { 3068 count = INT_MAX; 3069 q = UMTX_SHARED_QUEUE; 3070 } else if (state & URWLOCK_WRITE_WAITERS) { 3071 count = 1; 3072 q = UMTX_EXCLUSIVE_QUEUE; 3073 } 3074 } 3075 3076 if (count) { 3077 umtxq_lock(&uq->uq_key); 3078 umtxq_busy(&uq->uq_key); 3079 umtxq_signal_queue(&uq->uq_key, count, q); 3080 umtxq_unbusy(&uq->uq_key); 3081 umtxq_unlock(&uq->uq_key); 3082 } 3083 out: 3084 umtx_key_release(&uq->uq_key); 3085 return (error); 3086 } 3087 3088 static int 3089 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3090 { 3091 struct abs_timeout timo; 3092 struct umtx_q *uq; 3093 uint32_t flags, count; 3094 int error; 3095 3096 uq = td->td_umtxq; 3097 flags = fuword32(&sem->_flags); 3098 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3099 if (error != 0) 3100 return (error); 3101 3102 if (timeout != NULL) 3103 abs_timeout_init2(&timo, timeout); 3104 3105 umtxq_lock(&uq->uq_key); 3106 umtxq_busy(&uq->uq_key); 3107 umtxq_insert(uq); 3108 umtxq_unlock(&uq->uq_key); 3109 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1); 3110 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); 3111 if (count != 0) { 3112 umtxq_lock(&uq->uq_key); 3113 umtxq_unbusy(&uq->uq_key); 3114 umtxq_remove(uq); 3115 umtxq_unlock(&uq->uq_key); 3116 umtx_key_release(&uq->uq_key); 3117 return (0); 3118 } 3119 umtxq_lock(&uq->uq_key); 3120 umtxq_unbusy(&uq->uq_key); 3121 3122 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3123 3124 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3125 error = 0; 3126 else { 3127 umtxq_remove(uq); 3128 /* A relative timeout cannot be restarted. */ 3129 if (error == ERESTART && timeout != NULL && 3130 (timeout->_flags & UMTX_ABSTIME) == 0) 3131 error = EINTR; 3132 } 3133 umtxq_unlock(&uq->uq_key); 3134 umtx_key_release(&uq->uq_key); 3135 return (error); 3136 } 3137 3138 /* 3139 * Signal a userland condition variable. 3140 */ 3141 static int 3142 do_sem_wake(struct thread *td, struct _usem *sem) 3143 { 3144 struct umtx_key key; 3145 int error, cnt; 3146 uint32_t flags; 3147 3148 flags = fuword32(&sem->_flags); 3149 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3150 return (error); 3151 umtxq_lock(&key); 3152 umtxq_busy(&key); 3153 cnt = umtxq_count(&key); 3154 if (cnt > 0) { 3155 umtxq_signal(&key, 1); 3156 /* 3157 * Check if count is greater than 0, this means the memory is 3158 * still being referenced by user code, so we can safely 3159 * update _has_waiters flag. 3160 */ 3161 if (cnt == 1) { 3162 umtxq_unlock(&key); 3163 error = suword32( 3164 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0); 3165 umtxq_lock(&key); 3166 } 3167 } 3168 umtxq_unbusy(&key); 3169 umtxq_unlock(&key); 3170 umtx_key_release(&key); 3171 return (error); 3172 } 3173 3174 int 3175 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 3176 /* struct umtx *umtx */ 3177 { 3178 return do_lock_umtx(td, uap->umtx, td->td_tid, 0); 3179 } 3180 3181 int 3182 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 3183 /* struct umtx *umtx */ 3184 { 3185 return do_unlock_umtx(td, uap->umtx, td->td_tid); 3186 } 3187 3188 inline int 3189 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3190 { 3191 int error; 3192 3193 error = copyin(addr, tsp, sizeof(struct timespec)); 3194 if (error == 0) { 3195 if (tsp->tv_sec < 0 || 3196 tsp->tv_nsec >= 1000000000 || 3197 tsp->tv_nsec < 0) 3198 error = EINVAL; 3199 } 3200 return (error); 3201 } 3202 3203 static inline int 3204 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3205 { 3206 int error; 3207 3208 if (size <= sizeof(struct timespec)) { 3209 tp->_clockid = CLOCK_REALTIME; 3210 tp->_flags = 0; 3211 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3212 } else 3213 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3214 if (error != 0) 3215 return (error); 3216 if (tp->_timeout.tv_sec < 0 || 3217 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3218 return (EINVAL); 3219 return (0); 3220 } 3221 3222 static int 3223 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 3224 { 3225 struct timespec *ts, timeout; 3226 int error; 3227 3228 /* Allow a null timespec (wait forever). */ 3229 if (uap->uaddr2 == NULL) 3230 ts = NULL; 3231 else { 3232 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3233 if (error != 0) 3234 return (error); 3235 ts = &timeout; 3236 } 3237 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3238 } 3239 3240 static int 3241 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 3242 { 3243 return (do_unlock_umtx(td, uap->obj, uap->val)); 3244 } 3245 3246 static int 3247 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3248 { 3249 struct _umtx_time timeout, *tm_p; 3250 int error; 3251 3252 if (uap->uaddr2 == NULL) 3253 tm_p = NULL; 3254 else { 3255 error = umtx_copyin_umtx_time( 3256 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3257 if (error != 0) 3258 return (error); 3259 tm_p = &timeout; 3260 } 3261 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3262 } 3263 3264 static int 3265 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3266 { 3267 struct _umtx_time timeout, *tm_p; 3268 int error; 3269 3270 if (uap->uaddr2 == NULL) 3271 tm_p = NULL; 3272 else { 3273 error = umtx_copyin_umtx_time( 3274 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3275 if (error != 0) 3276 return (error); 3277 tm_p = &timeout; 3278 } 3279 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3280 } 3281 3282 static int 3283 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3284 { 3285 struct _umtx_time *tm_p, timeout; 3286 int error; 3287 3288 if (uap->uaddr2 == NULL) 3289 tm_p = NULL; 3290 else { 3291 error = umtx_copyin_umtx_time( 3292 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3293 if (error != 0) 3294 return (error); 3295 tm_p = &timeout; 3296 } 3297 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3298 } 3299 3300 static int 3301 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3302 { 3303 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3304 } 3305 3306 #define BATCH_SIZE 128 3307 static int 3308 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3309 { 3310 int count = uap->val; 3311 void *uaddrs[BATCH_SIZE]; 3312 char **upp = (char **)uap->obj; 3313 int tocopy; 3314 int error = 0; 3315 int i, pos = 0; 3316 3317 while (count > 0) { 3318 tocopy = count; 3319 if (tocopy > BATCH_SIZE) 3320 tocopy = BATCH_SIZE; 3321 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3322 if (error != 0) 3323 break; 3324 for (i = 0; i < tocopy; ++i) 3325 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3326 count -= tocopy; 3327 pos += tocopy; 3328 } 3329 return (error); 3330 } 3331 3332 static int 3333 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3334 { 3335 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3336 } 3337 3338 static int 3339 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3340 { 3341 struct _umtx_time *tm_p, timeout; 3342 int error; 3343 3344 /* Allow a null timespec (wait forever). */ 3345 if (uap->uaddr2 == NULL) 3346 tm_p = NULL; 3347 else { 3348 error = umtx_copyin_umtx_time( 3349 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3350 if (error != 0) 3351 return (error); 3352 tm_p = &timeout; 3353 } 3354 return do_lock_umutex(td, uap->obj, tm_p, 0); 3355 } 3356 3357 static int 3358 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3359 { 3360 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3361 } 3362 3363 static int 3364 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3365 { 3366 struct _umtx_time *tm_p, timeout; 3367 int error; 3368 3369 /* Allow a null timespec (wait forever). */ 3370 if (uap->uaddr2 == NULL) 3371 tm_p = NULL; 3372 else { 3373 error = umtx_copyin_umtx_time( 3374 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3375 if (error != 0) 3376 return (error); 3377 tm_p = &timeout; 3378 } 3379 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3380 } 3381 3382 static int 3383 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3384 { 3385 return do_wake_umutex(td, uap->obj); 3386 } 3387 3388 static int 3389 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3390 { 3391 return do_unlock_umutex(td, uap->obj); 3392 } 3393 3394 static int 3395 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3396 { 3397 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3398 } 3399 3400 static int 3401 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3402 { 3403 struct timespec *ts, timeout; 3404 int error; 3405 3406 /* Allow a null timespec (wait forever). */ 3407 if (uap->uaddr2 == NULL) 3408 ts = NULL; 3409 else { 3410 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3411 if (error != 0) 3412 return (error); 3413 ts = &timeout; 3414 } 3415 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3416 } 3417 3418 static int 3419 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3420 { 3421 return do_cv_signal(td, uap->obj); 3422 } 3423 3424 static int 3425 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3426 { 3427 return do_cv_broadcast(td, uap->obj); 3428 } 3429 3430 static int 3431 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3432 { 3433 struct _umtx_time timeout; 3434 int error; 3435 3436 /* Allow a null timespec (wait forever). */ 3437 if (uap->uaddr2 == NULL) { 3438 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3439 } else { 3440 error = umtx_copyin_umtx_time(uap->uaddr2, 3441 (size_t)uap->uaddr1, &timeout); 3442 if (error != 0) 3443 return (error); 3444 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3445 } 3446 return (error); 3447 } 3448 3449 static int 3450 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3451 { 3452 struct _umtx_time timeout; 3453 int error; 3454 3455 /* Allow a null timespec (wait forever). */ 3456 if (uap->uaddr2 == NULL) { 3457 error = do_rw_wrlock(td, uap->obj, 0); 3458 } else { 3459 error = umtx_copyin_umtx_time(uap->uaddr2, 3460 (size_t)uap->uaddr1, &timeout); 3461 if (error != 0) 3462 return (error); 3463 3464 error = do_rw_wrlock(td, uap->obj, &timeout); 3465 } 3466 return (error); 3467 } 3468 3469 static int 3470 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3471 { 3472 return do_rw_unlock(td, uap->obj); 3473 } 3474 3475 static int 3476 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3477 { 3478 struct _umtx_time *tm_p, timeout; 3479 int error; 3480 3481 /* Allow a null timespec (wait forever). */ 3482 if (uap->uaddr2 == NULL) 3483 tm_p = NULL; 3484 else { 3485 error = umtx_copyin_umtx_time( 3486 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3487 if (error != 0) 3488 return (error); 3489 tm_p = &timeout; 3490 } 3491 return (do_sem_wait(td, uap->obj, tm_p)); 3492 } 3493 3494 static int 3495 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3496 { 3497 return do_sem_wake(td, uap->obj); 3498 } 3499 3500 static int 3501 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3502 { 3503 return do_wake2_umutex(td, uap->obj, uap->val); 3504 } 3505 3506 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3507 3508 static _umtx_op_func op_table[] = { 3509 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 3510 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 3511 __umtx_op_wait, /* UMTX_OP_WAIT */ 3512 __umtx_op_wake, /* UMTX_OP_WAKE */ 3513 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3514 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3515 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3516 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3517 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3518 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3519 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3520 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3521 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3522 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3523 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3524 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3525 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3526 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ 3527 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ 3528 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3529 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3530 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3531 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ 3532 }; 3533 3534 int 3535 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3536 { 3537 if ((unsigned)uap->op < UMTX_OP_MAX) 3538 return (*op_table[uap->op])(td, uap); 3539 return (EINVAL); 3540 } 3541 3542 #ifdef COMPAT_FREEBSD32 3543 int 3544 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 3545 /* struct umtx *umtx */ 3546 { 3547 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 3548 } 3549 3550 int 3551 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 3552 /* struct umtx *umtx */ 3553 { 3554 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 3555 } 3556 3557 struct timespec32 { 3558 int32_t tv_sec; 3559 int32_t tv_nsec; 3560 }; 3561 3562 struct umtx_time32 { 3563 struct timespec32 timeout; 3564 uint32_t flags; 3565 uint32_t clockid; 3566 }; 3567 3568 static inline int 3569 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3570 { 3571 struct timespec32 ts32; 3572 int error; 3573 3574 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3575 if (error == 0) { 3576 if (ts32.tv_sec < 0 || 3577 ts32.tv_nsec >= 1000000000 || 3578 ts32.tv_nsec < 0) 3579 error = EINVAL; 3580 else { 3581 tsp->tv_sec = ts32.tv_sec; 3582 tsp->tv_nsec = ts32.tv_nsec; 3583 } 3584 } 3585 return (error); 3586 } 3587 3588 static inline int 3589 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3590 { 3591 struct umtx_time32 t32; 3592 int error; 3593 3594 t32.clockid = CLOCK_REALTIME; 3595 t32.flags = 0; 3596 if (size <= sizeof(struct timespec32)) 3597 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3598 else 3599 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3600 if (error != 0) 3601 return (error); 3602 if (t32.timeout.tv_sec < 0 || 3603 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3604 return (EINVAL); 3605 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3606 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3607 tp->_flags = t32.flags; 3608 tp->_clockid = t32.clockid; 3609 return (0); 3610 } 3611 3612 static int 3613 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3614 { 3615 struct timespec *ts, timeout; 3616 int error; 3617 3618 /* Allow a null timespec (wait forever). */ 3619 if (uap->uaddr2 == NULL) 3620 ts = NULL; 3621 else { 3622 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3623 if (error != 0) 3624 return (error); 3625 ts = &timeout; 3626 } 3627 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3628 } 3629 3630 static int 3631 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3632 { 3633 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 3634 } 3635 3636 static int 3637 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3638 { 3639 struct _umtx_time *tm_p, timeout; 3640 int error; 3641 3642 if (uap->uaddr2 == NULL) 3643 tm_p = NULL; 3644 else { 3645 error = umtx_copyin_umtx_time32(uap->uaddr2, 3646 (size_t)uap->uaddr1, &timeout); 3647 if (error != 0) 3648 return (error); 3649 tm_p = &timeout; 3650 } 3651 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3652 } 3653 3654 static int 3655 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3656 { 3657 struct _umtx_time *tm_p, timeout; 3658 int error; 3659 3660 /* Allow a null timespec (wait forever). */ 3661 if (uap->uaddr2 == NULL) 3662 tm_p = NULL; 3663 else { 3664 error = umtx_copyin_umtx_time(uap->uaddr2, 3665 (size_t)uap->uaddr1, &timeout); 3666 if (error != 0) 3667 return (error); 3668 tm_p = &timeout; 3669 } 3670 return do_lock_umutex(td, uap->obj, tm_p, 0); 3671 } 3672 3673 static int 3674 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3675 { 3676 struct _umtx_time *tm_p, timeout; 3677 int error; 3678 3679 /* Allow a null timespec (wait forever). */ 3680 if (uap->uaddr2 == NULL) 3681 tm_p = NULL; 3682 else { 3683 error = umtx_copyin_umtx_time32(uap->uaddr2, 3684 (size_t)uap->uaddr1, &timeout); 3685 if (error != 0) 3686 return (error); 3687 tm_p = &timeout; 3688 } 3689 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3690 } 3691 3692 static int 3693 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3694 { 3695 struct timespec *ts, timeout; 3696 int error; 3697 3698 /* Allow a null timespec (wait forever). */ 3699 if (uap->uaddr2 == NULL) 3700 ts = NULL; 3701 else { 3702 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3703 if (error != 0) 3704 return (error); 3705 ts = &timeout; 3706 } 3707 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3708 } 3709 3710 static int 3711 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3712 { 3713 struct _umtx_time timeout; 3714 int error; 3715 3716 /* Allow a null timespec (wait forever). */ 3717 if (uap->uaddr2 == NULL) { 3718 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3719 } else { 3720 error = umtx_copyin_umtx_time32(uap->uaddr2, 3721 (size_t)uap->uaddr1, &timeout); 3722 if (error != 0) 3723 return (error); 3724 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3725 } 3726 return (error); 3727 } 3728 3729 static int 3730 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3731 { 3732 struct _umtx_time timeout; 3733 int error; 3734 3735 /* Allow a null timespec (wait forever). */ 3736 if (uap->uaddr2 == NULL) { 3737 error = do_rw_wrlock(td, uap->obj, 0); 3738 } else { 3739 error = umtx_copyin_umtx_time32(uap->uaddr2, 3740 (size_t)uap->uaddr1, &timeout); 3741 if (error != 0) 3742 return (error); 3743 error = do_rw_wrlock(td, uap->obj, &timeout); 3744 } 3745 return (error); 3746 } 3747 3748 static int 3749 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3750 { 3751 struct _umtx_time *tm_p, timeout; 3752 int error; 3753 3754 if (uap->uaddr2 == NULL) 3755 tm_p = NULL; 3756 else { 3757 error = umtx_copyin_umtx_time32( 3758 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3759 if (error != 0) 3760 return (error); 3761 tm_p = &timeout; 3762 } 3763 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3764 } 3765 3766 static int 3767 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3768 { 3769 struct _umtx_time *tm_p, timeout; 3770 int error; 3771 3772 /* Allow a null timespec (wait forever). */ 3773 if (uap->uaddr2 == NULL) 3774 tm_p = NULL; 3775 else { 3776 error = umtx_copyin_umtx_time32(uap->uaddr2, 3777 (size_t)uap->uaddr1, &timeout); 3778 if (error != 0) 3779 return (error); 3780 tm_p = &timeout; 3781 } 3782 return (do_sem_wait(td, uap->obj, tm_p)); 3783 } 3784 3785 static int 3786 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3787 { 3788 int count = uap->val; 3789 uint32_t uaddrs[BATCH_SIZE]; 3790 uint32_t **upp = (uint32_t **)uap->obj; 3791 int tocopy; 3792 int error = 0; 3793 int i, pos = 0; 3794 3795 while (count > 0) { 3796 tocopy = count; 3797 if (tocopy > BATCH_SIZE) 3798 tocopy = BATCH_SIZE; 3799 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3800 if (error != 0) 3801 break; 3802 for (i = 0; i < tocopy; ++i) 3803 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3804 INT_MAX, 1); 3805 count -= tocopy; 3806 pos += tocopy; 3807 } 3808 return (error); 3809 } 3810 3811 static _umtx_op_func op_table_compat32[] = { 3812 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 3813 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 3814 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3815 __umtx_op_wake, /* UMTX_OP_WAKE */ 3816 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3817 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3818 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3819 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3820 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3821 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3822 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3823 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3824 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3825 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3826 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3827 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3828 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3829 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ 3830 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ 3831 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3832 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3833 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3834 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ 3835 }; 3836 3837 int 3838 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3839 { 3840 if ((unsigned)uap->op < UMTX_OP_MAX) 3841 return (*op_table_compat32[uap->op])(td, 3842 (struct _umtx_op_args *)uap); 3843 return (EINVAL); 3844 } 3845 #endif 3846 3847 void 3848 umtx_thread_init(struct thread *td) 3849 { 3850 td->td_umtxq = umtxq_alloc(); 3851 td->td_umtxq->uq_thread = td; 3852 } 3853 3854 void 3855 umtx_thread_fini(struct thread *td) 3856 { 3857 umtxq_free(td->td_umtxq); 3858 } 3859 3860 /* 3861 * It will be called when new thread is created, e.g fork(). 3862 */ 3863 void 3864 umtx_thread_alloc(struct thread *td) 3865 { 3866 struct umtx_q *uq; 3867 3868 uq = td->td_umtxq; 3869 uq->uq_inherited_pri = PRI_MAX; 3870 3871 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3872 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3873 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3874 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3875 } 3876 3877 /* 3878 * exec() hook. 3879 */ 3880 static void 3881 umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3882 struct image_params *imgp __unused) 3883 { 3884 umtx_thread_cleanup(curthread); 3885 } 3886 3887 /* 3888 * thread_exit() hook. 3889 */ 3890 void 3891 umtx_thread_exit(struct thread *td) 3892 { 3893 umtx_thread_cleanup(td); 3894 } 3895 3896 /* 3897 * clean up umtx data. 3898 */ 3899 static void 3900 umtx_thread_cleanup(struct thread *td) 3901 { 3902 struct umtx_q *uq; 3903 struct umtx_pi *pi; 3904 3905 if ((uq = td->td_umtxq) == NULL) 3906 return; 3907 3908 mtx_lock_spin(&umtx_lock); 3909 uq->uq_inherited_pri = PRI_MAX; 3910 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3911 pi->pi_owner = NULL; 3912 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3913 } 3914 mtx_unlock_spin(&umtx_lock); 3915 thread_lock(td); 3916 sched_lend_user_prio(td, PRI_MAX); 3917 thread_unlock(td); 3918 } 3919