1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 #include <sys/umtxvar.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #include <compat/freebsd32/freebsd32.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32_proto.h> 82 #endif 83 84 #define _UMUTEX_TRY 1 85 #define _UMUTEX_WAIT 2 86 87 #ifdef UMTX_PROFILING 88 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 89 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 90 #endif 91 92 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 93 #ifdef INVARIANTS 94 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 95 struct umtxq_chain *uc; \ 96 \ 97 uc = umtxq_getchain(key); \ 98 mtx_assert(&uc->uc_lock, MA_OWNED); \ 99 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 100 } while (0) 101 #else 102 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 103 #endif 104 105 /* 106 * Don't propagate time-sharing priority, there is a security reason, 107 * a user can simply introduce PI-mutex, let thread A lock the mutex, 108 * and let another thread B block on the mutex, because B is 109 * sleeping, its priority will be boosted, this causes A's priority to 110 * be boosted via priority propagating too and will never be lowered even 111 * if it is using 100%CPU, this is unfair to other processes. 112 */ 113 114 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 115 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 116 PRI_MAX_TIMESHARE : (td)->td_user_pri) 117 118 #define GOLDEN_RATIO_PRIME 2654404609U 119 #ifndef UMTX_CHAINS 120 #define UMTX_CHAINS 512 121 #endif 122 #define UMTX_SHIFTS (__WORD_BIT - 9) 123 124 #define GET_SHARE(flags) \ 125 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 126 127 #define BUSY_SPINS 200 128 129 struct umtx_copyops { 130 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 131 int (*copyin_umtx_time)(const void *uaddr, size_t size, 132 struct _umtx_time *tp); 133 int (*copyin_robust_lists)(const void *uaddr, size_t size, 134 struct umtx_robust_lists_params *rbp); 135 int (*copyout_timeout)(void *uaddr, size_t size, 136 struct timespec *tsp); 137 const size_t timespec_sz; 138 const size_t umtx_time_sz; 139 const bool compat32; 140 }; 141 142 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 143 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 144 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 145 146 int umtx_shm_vnobj_persistent = 0; 147 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 148 &umtx_shm_vnobj_persistent, 0, 149 "False forces destruction of umtx attached to file, on last close"); 150 static int umtx_max_rb = 1000; 151 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 152 &umtx_max_rb, 0, 153 "Maximum number of robust mutexes allowed for each thread"); 154 155 static uma_zone_t umtx_pi_zone; 156 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 157 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 158 static int umtx_pi_allocated; 159 160 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 161 "umtx debug"); 162 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 163 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 164 static int umtx_verbose_rb = 1; 165 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 166 &umtx_verbose_rb, 0, 167 ""); 168 169 #ifdef UMTX_PROFILING 170 static long max_length; 171 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 172 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 173 "umtx chain stats"); 174 #endif 175 176 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 177 const struct _umtx_time *umtxtime); 178 179 static void umtx_shm_init(void); 180 static void umtxq_sysinit(void *); 181 static void umtxq_hash(struct umtx_key *key); 182 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 183 bool rb); 184 static void umtx_thread_cleanup(struct thread *td); 185 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 186 187 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 188 189 static struct mtx umtx_lock; 190 191 #ifdef UMTX_PROFILING 192 static void 193 umtx_init_profiling(void) 194 { 195 struct sysctl_oid *chain_oid; 196 char chain_name[10]; 197 int i; 198 199 for (i = 0; i < UMTX_CHAINS; ++i) { 200 snprintf(chain_name, sizeof(chain_name), "%d", i); 201 chain_oid = SYSCTL_ADD_NODE(NULL, 202 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 203 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 204 "umtx hash stats"); 205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 206 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 207 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 208 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 209 } 210 } 211 212 static int 213 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 214 { 215 char buf[512]; 216 struct sbuf sb; 217 struct umtxq_chain *uc; 218 u_int fract, i, j, tot, whole; 219 u_int sf0, sf1, sf2, sf3, sf4; 220 u_int si0, si1, si2, si3, si4; 221 u_int sw0, sw1, sw2, sw3, sw4; 222 223 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 224 for (i = 0; i < 2; i++) { 225 tot = 0; 226 for (j = 0; j < UMTX_CHAINS; ++j) { 227 uc = &umtxq_chains[i][j]; 228 mtx_lock(&uc->uc_lock); 229 tot += uc->max_length; 230 mtx_unlock(&uc->uc_lock); 231 } 232 if (tot == 0) 233 sbuf_printf(&sb, "%u) Empty ", i); 234 else { 235 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 236 si0 = si1 = si2 = si3 = si4 = 0; 237 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 238 for (j = 0; j < UMTX_CHAINS; j++) { 239 uc = &umtxq_chains[i][j]; 240 mtx_lock(&uc->uc_lock); 241 whole = uc->max_length * 100; 242 mtx_unlock(&uc->uc_lock); 243 fract = (whole % tot) * 100; 244 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 245 sf0 = fract; 246 si0 = j; 247 sw0 = whole; 248 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 249 sf1)) { 250 sf1 = fract; 251 si1 = j; 252 sw1 = whole; 253 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 254 sf2)) { 255 sf2 = fract; 256 si2 = j; 257 sw2 = whole; 258 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 259 sf3)) { 260 sf3 = fract; 261 si3 = j; 262 sw3 = whole; 263 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 264 sf4)) { 265 sf4 = fract; 266 si4 = j; 267 sw4 = whole; 268 } 269 } 270 sbuf_printf(&sb, "queue %u:\n", i); 271 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 272 sf0 / tot, si0); 273 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 274 sf1 / tot, si1); 275 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 276 sf2 / tot, si2); 277 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 278 sf3 / tot, si3); 279 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 280 sf4 / tot, si4); 281 } 282 } 283 sbuf_trim(&sb); 284 sbuf_finish(&sb); 285 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 286 sbuf_delete(&sb); 287 return (0); 288 } 289 290 static int 291 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 292 { 293 struct umtxq_chain *uc; 294 u_int i, j; 295 int clear, error; 296 297 clear = 0; 298 error = sysctl_handle_int(oidp, &clear, 0, req); 299 if (error != 0 || req->newptr == NULL) 300 return (error); 301 302 if (clear != 0) { 303 for (i = 0; i < 2; ++i) { 304 for (j = 0; j < UMTX_CHAINS; ++j) { 305 uc = &umtxq_chains[i][j]; 306 mtx_lock(&uc->uc_lock); 307 uc->length = 0; 308 uc->max_length = 0; 309 mtx_unlock(&uc->uc_lock); 310 } 311 } 312 } 313 return (0); 314 } 315 316 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 317 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 318 sysctl_debug_umtx_chains_clear, "I", 319 "Clear umtx chains statistics"); 320 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 321 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 322 sysctl_debug_umtx_chains_peaks, "A", 323 "Highest peaks in chains max length"); 324 #endif 325 326 static void 327 umtxq_sysinit(void *arg __unused) 328 { 329 int i, j; 330 331 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 332 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 333 for (i = 0; i < 2; ++i) { 334 for (j = 0; j < UMTX_CHAINS; ++j) { 335 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 336 MTX_DEF | MTX_DUPOK); 337 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 338 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 339 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 340 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 341 umtxq_chains[i][j].uc_busy = 0; 342 umtxq_chains[i][j].uc_waiters = 0; 343 #ifdef UMTX_PROFILING 344 umtxq_chains[i][j].length = 0; 345 umtxq_chains[i][j].max_length = 0; 346 #endif 347 } 348 } 349 #ifdef UMTX_PROFILING 350 umtx_init_profiling(); 351 #endif 352 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 353 umtx_shm_init(); 354 } 355 356 struct umtx_q * 357 umtxq_alloc(void) 358 { 359 struct umtx_q *uq; 360 361 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 362 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 363 M_WAITOK | M_ZERO); 364 TAILQ_INIT(&uq->uq_spare_queue->head); 365 TAILQ_INIT(&uq->uq_pi_contested); 366 uq->uq_inherited_pri = PRI_MAX; 367 return (uq); 368 } 369 370 void 371 umtxq_free(struct umtx_q *uq) 372 { 373 374 MPASS(uq->uq_spare_queue != NULL); 375 free(uq->uq_spare_queue, M_UMTX); 376 free(uq, M_UMTX); 377 } 378 379 static inline void 380 umtxq_hash(struct umtx_key *key) 381 { 382 unsigned n; 383 384 n = (uintptr_t)key->info.both.a + key->info.both.b; 385 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 386 } 387 388 struct umtxq_chain * 389 umtxq_getchain(struct umtx_key *key) 390 { 391 392 if (key->type <= TYPE_SEM) 393 return (&umtxq_chains[1][key->hash]); 394 return (&umtxq_chains[0][key->hash]); 395 } 396 397 /* 398 * Set chain to busy state when following operation 399 * may be blocked (kernel mutex can not be used). 400 */ 401 void 402 umtxq_busy(struct umtx_key *key) 403 { 404 struct umtxq_chain *uc; 405 406 uc = umtxq_getchain(key); 407 mtx_assert(&uc->uc_lock, MA_OWNED); 408 if (uc->uc_busy) { 409 #ifdef SMP 410 if (smp_cpus > 1) { 411 int count = BUSY_SPINS; 412 if (count > 0) { 413 umtxq_unlock(key); 414 while (uc->uc_busy && --count > 0) 415 cpu_spinwait(); 416 umtxq_lock(key); 417 } 418 } 419 #endif 420 while (uc->uc_busy) { 421 uc->uc_waiters++; 422 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 423 uc->uc_waiters--; 424 } 425 } 426 uc->uc_busy = 1; 427 } 428 429 /* 430 * Unbusy a chain. 431 */ 432 void 433 umtxq_unbusy(struct umtx_key *key) 434 { 435 struct umtxq_chain *uc; 436 437 uc = umtxq_getchain(key); 438 mtx_assert(&uc->uc_lock, MA_OWNED); 439 KASSERT(uc->uc_busy != 0, ("not busy")); 440 uc->uc_busy = 0; 441 if (uc->uc_waiters) 442 wakeup_one(uc); 443 } 444 445 void 446 umtxq_unbusy_unlocked(struct umtx_key *key) 447 { 448 449 umtxq_lock(key); 450 umtxq_unbusy(key); 451 umtxq_unlock(key); 452 } 453 454 static struct umtxq_queue * 455 umtxq_queue_lookup(struct umtx_key *key, int q) 456 { 457 struct umtxq_queue *uh; 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 UMTXQ_LOCKED_ASSERT(uc); 462 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 463 if (umtx_key_match(&uh->key, key)) 464 return (uh); 465 } 466 467 return (NULL); 468 } 469 470 void 471 umtxq_insert_queue(struct umtx_q *uq, int q) 472 { 473 struct umtxq_queue *uh; 474 struct umtxq_chain *uc; 475 476 uc = umtxq_getchain(&uq->uq_key); 477 UMTXQ_LOCKED_ASSERT(uc); 478 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 479 uh = umtxq_queue_lookup(&uq->uq_key, q); 480 if (uh != NULL) { 481 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 482 } else { 483 uh = uq->uq_spare_queue; 484 uh->key = uq->uq_key; 485 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 486 #ifdef UMTX_PROFILING 487 uc->length++; 488 if (uc->length > uc->max_length) { 489 uc->max_length = uc->length; 490 if (uc->max_length > max_length) 491 max_length = uc->max_length; 492 } 493 #endif 494 } 495 uq->uq_spare_queue = NULL; 496 497 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 498 uh->length++; 499 uq->uq_flags |= UQF_UMTXQ; 500 uq->uq_cur_queue = uh; 501 return; 502 } 503 504 void 505 umtxq_remove_queue(struct umtx_q *uq, int q) 506 { 507 struct umtxq_chain *uc; 508 struct umtxq_queue *uh; 509 510 uc = umtxq_getchain(&uq->uq_key); 511 UMTXQ_LOCKED_ASSERT(uc); 512 if (uq->uq_flags & UQF_UMTXQ) { 513 uh = uq->uq_cur_queue; 514 TAILQ_REMOVE(&uh->head, uq, uq_link); 515 uh->length--; 516 uq->uq_flags &= ~UQF_UMTXQ; 517 if (TAILQ_EMPTY(&uh->head)) { 518 KASSERT(uh->length == 0, 519 ("inconsistent umtxq_queue length")); 520 #ifdef UMTX_PROFILING 521 uc->length--; 522 #endif 523 LIST_REMOVE(uh, link); 524 } else { 525 uh = LIST_FIRST(&uc->uc_spare_queue); 526 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 527 LIST_REMOVE(uh, link); 528 } 529 uq->uq_spare_queue = uh; 530 uq->uq_cur_queue = NULL; 531 } 532 } 533 534 /* 535 * Check if there are multiple waiters 536 */ 537 int 538 umtxq_count(struct umtx_key *key) 539 { 540 struct umtxq_queue *uh; 541 542 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 543 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 544 if (uh != NULL) 545 return (uh->length); 546 return (0); 547 } 548 549 /* 550 * Check if there are multiple PI waiters and returns first 551 * waiter. 552 */ 553 static int 554 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 555 { 556 struct umtxq_queue *uh; 557 558 *first = NULL; 559 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 560 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 561 if (uh != NULL) { 562 *first = TAILQ_FIRST(&uh->head); 563 return (uh->length); 564 } 565 return (0); 566 } 567 568 /* 569 * Wake up threads waiting on an userland object by a bit mask. 570 */ 571 int 572 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 573 { 574 struct umtxq_queue *uh; 575 struct umtx_q *uq, *uq_temp; 576 int ret; 577 578 ret = 0; 579 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 580 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 581 if (uh == NULL) 582 return (0); 583 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 584 if ((uq->uq_bitset & bitset) == 0) 585 continue; 586 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 587 wakeup_one(uq); 588 if (++ret >= n_wake) 589 break; 590 } 591 return (ret); 592 } 593 594 /* 595 * Wake up threads waiting on an userland object. 596 */ 597 598 static int 599 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 600 { 601 struct umtxq_queue *uh; 602 struct umtx_q *uq; 603 int ret; 604 605 ret = 0; 606 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 607 uh = umtxq_queue_lookup(key, q); 608 if (uh != NULL) { 609 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 610 umtxq_remove_queue(uq, q); 611 wakeup(uq); 612 if (++ret >= n_wake) 613 return (ret); 614 } 615 } 616 return (ret); 617 } 618 619 /* 620 * Wake up specified thread. 621 */ 622 static inline void 623 umtxq_signal_thread(struct umtx_q *uq) 624 { 625 626 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 627 umtxq_remove(uq); 628 wakeup(uq); 629 } 630 631 /* 632 * Wake up a maximum of n_wake threads that are waiting on an userland 633 * object identified by key. The remaining threads are removed from queue 634 * identified by key and added to the queue identified by key2 (requeued). 635 * The n_requeue specifies an upper limit on the number of threads that 636 * are requeued to the second queue. 637 */ 638 int 639 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 640 int n_requeue) 641 { 642 struct umtxq_queue *uh; 643 struct umtx_q *uq, *uq_temp; 644 int ret; 645 646 ret = 0; 647 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 648 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 649 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 650 if (uh == NULL) 651 return (0); 652 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 653 if (++ret <= n_wake) { 654 umtxq_remove(uq); 655 wakeup_one(uq); 656 } else { 657 umtxq_remove(uq); 658 uq->uq_key = *key2; 659 umtxq_insert(uq); 660 if (ret - n_wake == n_requeue) 661 break; 662 } 663 } 664 return (ret); 665 } 666 667 static inline int 668 tstohz(const struct timespec *tsp) 669 { 670 struct timeval tv; 671 672 TIMESPEC_TO_TIMEVAL(&tv, tsp); 673 return tvtohz(&tv); 674 } 675 676 void 677 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 678 int absolute, const struct timespec *timeout) 679 { 680 681 timo->clockid = clockid; 682 if (!absolute) { 683 timo->is_abs_real = false; 684 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 685 timespecadd(&timo->cur, timeout, &timo->end); 686 } else { 687 timo->end = *timeout; 688 timo->is_abs_real = clockid == CLOCK_REALTIME || 689 clockid == CLOCK_REALTIME_FAST || 690 clockid == CLOCK_REALTIME_PRECISE || 691 clockid == CLOCK_SECOND; 692 } 693 } 694 695 static void 696 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 697 const struct _umtx_time *umtxtime) 698 { 699 700 umtx_abs_timeout_init(timo, umtxtime->_clockid, 701 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 702 } 703 704 static int 705 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 706 int *flags) 707 { 708 struct bintime bt, bbt; 709 struct timespec tts; 710 711 switch (timo->clockid) { 712 713 /* Clocks that can be converted into absolute time. */ 714 case CLOCK_REALTIME: 715 case CLOCK_REALTIME_PRECISE: 716 case CLOCK_REALTIME_FAST: 717 case CLOCK_MONOTONIC: 718 case CLOCK_MONOTONIC_PRECISE: 719 case CLOCK_MONOTONIC_FAST: 720 case CLOCK_UPTIME: 721 case CLOCK_UPTIME_PRECISE: 722 case CLOCK_UPTIME_FAST: 723 case CLOCK_SECOND: 724 timespec2bintime(&timo->end, &bt); 725 switch (timo->clockid) { 726 case CLOCK_REALTIME: 727 case CLOCK_REALTIME_PRECISE: 728 case CLOCK_REALTIME_FAST: 729 case CLOCK_SECOND: 730 getboottimebin(&bbt); 731 bintime_sub(&bt, &bbt); 732 break; 733 } 734 if (bt.sec < 0) 735 return (ETIMEDOUT); 736 if (bt.sec >= (SBT_MAX >> 32)) { 737 *sbt = 0; 738 *flags = 0; 739 return (0); 740 } 741 *sbt = bttosbt(bt); 742 switch (timo->clockid) { 743 case CLOCK_REALTIME_FAST: 744 case CLOCK_MONOTONIC_FAST: 745 case CLOCK_UPTIME_FAST: 746 *sbt += tc_tick_sbt; 747 break; 748 case CLOCK_SECOND: 749 *sbt += SBT_1S; 750 break; 751 } 752 *flags = C_ABSOLUTE; 753 return (0); 754 755 /* Clocks that has to be periodically polled. */ 756 case CLOCK_VIRTUAL: 757 case CLOCK_PROF: 758 case CLOCK_THREAD_CPUTIME_ID: 759 case CLOCK_PROCESS_CPUTIME_ID: 760 default: 761 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 762 if (timespeccmp(&timo->end, &timo->cur, <=)) 763 return (ETIMEDOUT); 764 timespecsub(&timo->end, &timo->cur, &tts); 765 *sbt = tick_sbt * tstohz(&tts); 766 *flags = C_HARDCLOCK; 767 return (0); 768 } 769 } 770 771 static uint32_t 772 umtx_unlock_val(uint32_t flags, bool rb) 773 { 774 775 if (rb) 776 return (UMUTEX_RB_OWNERDEAD); 777 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 778 return (UMUTEX_RB_NOTRECOV); 779 else 780 return (UMUTEX_UNOWNED); 781 782 } 783 784 /* 785 * Put thread into sleep state, before sleeping, check if 786 * thread was removed from umtx queue. 787 */ 788 int 789 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 790 struct umtx_abs_timeout *timo) 791 { 792 struct umtxq_chain *uc; 793 sbintime_t sbt = 0; 794 int error, flags = 0; 795 796 uc = umtxq_getchain(&uq->uq_key); 797 UMTXQ_LOCKED_ASSERT(uc); 798 for (;;) { 799 if (!(uq->uq_flags & UQF_UMTXQ)) { 800 error = 0; 801 break; 802 } 803 if (timo != NULL) { 804 if (timo->is_abs_real) 805 curthread->td_rtcgen = 806 atomic_load_acq_int(&rtc_generation); 807 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 808 if (error != 0) 809 break; 810 } 811 error = msleep_sbt(uq, &uc->uc_lock, PCATCH, wmesg, 812 sbt, 0, flags); 813 if (error == EINTR || error == ERESTART) 814 break; 815 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 816 error = ETIMEDOUT; 817 break; 818 } 819 } 820 821 curthread->td_rtcgen = 0; 822 return (error); 823 } 824 825 /* 826 * Convert userspace address into unique logical address. 827 */ 828 int 829 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 830 { 831 struct thread *td = curthread; 832 vm_map_t map; 833 vm_map_entry_t entry; 834 vm_pindex_t pindex; 835 vm_prot_t prot; 836 boolean_t wired; 837 838 key->type = type; 839 if (share == THREAD_SHARE) { 840 key->shared = 0; 841 key->info.private.vs = td->td_proc->p_vmspace; 842 key->info.private.addr = (uintptr_t)addr; 843 } else { 844 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 845 map = &td->td_proc->p_vmspace->vm_map; 846 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 847 &entry, &key->info.shared.object, &pindex, &prot, 848 &wired) != KERN_SUCCESS) { 849 return (EFAULT); 850 } 851 852 if ((share == PROCESS_SHARE) || 853 (share == AUTO_SHARE && 854 VM_INHERIT_SHARE == entry->inheritance)) { 855 key->shared = 1; 856 key->info.shared.offset = (vm_offset_t)addr - 857 entry->start + entry->offset; 858 vm_object_reference(key->info.shared.object); 859 } else { 860 key->shared = 0; 861 key->info.private.vs = td->td_proc->p_vmspace; 862 key->info.private.addr = (uintptr_t)addr; 863 } 864 vm_map_lookup_done(map, entry); 865 } 866 867 umtxq_hash(key); 868 return (0); 869 } 870 871 /* 872 * Release key. 873 */ 874 void 875 umtx_key_release(struct umtx_key *key) 876 { 877 if (key->shared) 878 vm_object_deallocate(key->info.shared.object); 879 } 880 881 #ifdef COMPAT_FREEBSD10 882 /* 883 * Lock a umtx object. 884 */ 885 static int 886 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 887 const struct timespec *timeout) 888 { 889 struct umtx_abs_timeout timo; 890 struct umtx_q *uq; 891 u_long owner; 892 u_long old; 893 int error = 0; 894 895 uq = td->td_umtxq; 896 if (timeout != NULL) 897 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 898 899 /* 900 * Care must be exercised when dealing with umtx structure. It 901 * can fault on any access. 902 */ 903 for (;;) { 904 /* 905 * Try the uncontested case. This should be done in userland. 906 */ 907 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 908 909 /* The acquire succeeded. */ 910 if (owner == UMTX_UNOWNED) 911 return (0); 912 913 /* The address was invalid. */ 914 if (owner == -1) 915 return (EFAULT); 916 917 /* If no one owns it but it is contested try to acquire it. */ 918 if (owner == UMTX_CONTESTED) { 919 owner = casuword(&umtx->u_owner, 920 UMTX_CONTESTED, id | UMTX_CONTESTED); 921 922 if (owner == UMTX_CONTESTED) 923 return (0); 924 925 /* The address was invalid. */ 926 if (owner == -1) 927 return (EFAULT); 928 929 error = thread_check_susp(td, false); 930 if (error != 0) 931 break; 932 933 /* If this failed the lock has changed, restart. */ 934 continue; 935 } 936 937 /* 938 * If we caught a signal, we have retried and now 939 * exit immediately. 940 */ 941 if (error != 0) 942 break; 943 944 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 945 AUTO_SHARE, &uq->uq_key)) != 0) 946 return (error); 947 948 umtxq_lock(&uq->uq_key); 949 umtxq_busy(&uq->uq_key); 950 umtxq_insert(uq); 951 umtxq_unbusy(&uq->uq_key); 952 umtxq_unlock(&uq->uq_key); 953 954 /* 955 * Set the contested bit so that a release in user space 956 * knows to use the system call for unlock. If this fails 957 * either some one else has acquired the lock or it has been 958 * released. 959 */ 960 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 961 962 /* The address was invalid. */ 963 if (old == -1) { 964 umtxq_lock(&uq->uq_key); 965 umtxq_remove(uq); 966 umtxq_unlock(&uq->uq_key); 967 umtx_key_release(&uq->uq_key); 968 return (EFAULT); 969 } 970 971 /* 972 * We set the contested bit, sleep. Otherwise the lock changed 973 * and we need to retry or we lost a race to the thread 974 * unlocking the umtx. 975 */ 976 umtxq_lock(&uq->uq_key); 977 if (old == owner) 978 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 979 &timo); 980 umtxq_remove(uq); 981 umtxq_unlock(&uq->uq_key); 982 umtx_key_release(&uq->uq_key); 983 984 if (error == 0) 985 error = thread_check_susp(td, false); 986 } 987 988 if (timeout == NULL) { 989 /* Mutex locking is restarted if it is interrupted. */ 990 if (error == EINTR) 991 error = ERESTART; 992 } else { 993 /* Timed-locking is not restarted. */ 994 if (error == ERESTART) 995 error = EINTR; 996 } 997 return (error); 998 } 999 1000 /* 1001 * Unlock a umtx object. 1002 */ 1003 static int 1004 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1005 { 1006 struct umtx_key key; 1007 u_long owner; 1008 u_long old; 1009 int error; 1010 int count; 1011 1012 /* 1013 * Make sure we own this mtx. 1014 */ 1015 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1016 if (owner == -1) 1017 return (EFAULT); 1018 1019 if ((owner & ~UMTX_CONTESTED) != id) 1020 return (EPERM); 1021 1022 /* This should be done in userland */ 1023 if ((owner & UMTX_CONTESTED) == 0) { 1024 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1025 if (old == -1) 1026 return (EFAULT); 1027 if (old == owner) 1028 return (0); 1029 owner = old; 1030 } 1031 1032 /* We should only ever be in here for contested locks */ 1033 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1034 &key)) != 0) 1035 return (error); 1036 1037 umtxq_lock(&key); 1038 umtxq_busy(&key); 1039 count = umtxq_count(&key); 1040 umtxq_unlock(&key); 1041 1042 /* 1043 * When unlocking the umtx, it must be marked as unowned if 1044 * there is zero or one thread only waiting for it. 1045 * Otherwise, it must be marked as contested. 1046 */ 1047 old = casuword(&umtx->u_owner, owner, 1048 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1049 umtxq_lock(&key); 1050 umtxq_signal(&key,1); 1051 umtxq_unbusy(&key); 1052 umtxq_unlock(&key); 1053 umtx_key_release(&key); 1054 if (old == -1) 1055 return (EFAULT); 1056 if (old != owner) 1057 return (EINVAL); 1058 return (0); 1059 } 1060 1061 #ifdef COMPAT_FREEBSD32 1062 1063 /* 1064 * Lock a umtx object. 1065 */ 1066 static int 1067 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1068 const struct timespec *timeout) 1069 { 1070 struct umtx_abs_timeout timo; 1071 struct umtx_q *uq; 1072 uint32_t owner; 1073 uint32_t old; 1074 int error = 0; 1075 1076 uq = td->td_umtxq; 1077 1078 if (timeout != NULL) 1079 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1080 1081 /* 1082 * Care must be exercised when dealing with umtx structure. It 1083 * can fault on any access. 1084 */ 1085 for (;;) { 1086 /* 1087 * Try the uncontested case. This should be done in userland. 1088 */ 1089 owner = casuword32(m, UMUTEX_UNOWNED, id); 1090 1091 /* The acquire succeeded. */ 1092 if (owner == UMUTEX_UNOWNED) 1093 return (0); 1094 1095 /* The address was invalid. */ 1096 if (owner == -1) 1097 return (EFAULT); 1098 1099 /* If no one owns it but it is contested try to acquire it. */ 1100 if (owner == UMUTEX_CONTESTED) { 1101 owner = casuword32(m, 1102 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1103 if (owner == UMUTEX_CONTESTED) 1104 return (0); 1105 1106 /* The address was invalid. */ 1107 if (owner == -1) 1108 return (EFAULT); 1109 1110 error = thread_check_susp(td, false); 1111 if (error != 0) 1112 break; 1113 1114 /* If this failed the lock has changed, restart. */ 1115 continue; 1116 } 1117 1118 /* 1119 * If we caught a signal, we have retried and now 1120 * exit immediately. 1121 */ 1122 if (error != 0) 1123 return (error); 1124 1125 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1126 AUTO_SHARE, &uq->uq_key)) != 0) 1127 return (error); 1128 1129 umtxq_lock(&uq->uq_key); 1130 umtxq_busy(&uq->uq_key); 1131 umtxq_insert(uq); 1132 umtxq_unbusy(&uq->uq_key); 1133 umtxq_unlock(&uq->uq_key); 1134 1135 /* 1136 * Set the contested bit so that a release in user space 1137 * knows to use the system call for unlock. If this fails 1138 * either some one else has acquired the lock or it has been 1139 * released. 1140 */ 1141 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1142 1143 /* The address was invalid. */ 1144 if (old == -1) { 1145 umtxq_lock(&uq->uq_key); 1146 umtxq_remove(uq); 1147 umtxq_unlock(&uq->uq_key); 1148 umtx_key_release(&uq->uq_key); 1149 return (EFAULT); 1150 } 1151 1152 /* 1153 * We set the contested bit, sleep. Otherwise the lock changed 1154 * and we need to retry or we lost a race to the thread 1155 * unlocking the umtx. 1156 */ 1157 umtxq_lock(&uq->uq_key); 1158 if (old == owner) 1159 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1160 NULL : &timo); 1161 umtxq_remove(uq); 1162 umtxq_unlock(&uq->uq_key); 1163 umtx_key_release(&uq->uq_key); 1164 1165 if (error == 0) 1166 error = thread_check_susp(td, false); 1167 } 1168 1169 if (timeout == NULL) { 1170 /* Mutex locking is restarted if it is interrupted. */ 1171 if (error == EINTR) 1172 error = ERESTART; 1173 } else { 1174 /* Timed-locking is not restarted. */ 1175 if (error == ERESTART) 1176 error = EINTR; 1177 } 1178 return (error); 1179 } 1180 1181 /* 1182 * Unlock a umtx object. 1183 */ 1184 static int 1185 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1186 { 1187 struct umtx_key key; 1188 uint32_t owner; 1189 uint32_t old; 1190 int error; 1191 int count; 1192 1193 /* 1194 * Make sure we own this mtx. 1195 */ 1196 owner = fuword32(m); 1197 if (owner == -1) 1198 return (EFAULT); 1199 1200 if ((owner & ~UMUTEX_CONTESTED) != id) 1201 return (EPERM); 1202 1203 /* This should be done in userland */ 1204 if ((owner & UMUTEX_CONTESTED) == 0) { 1205 old = casuword32(m, owner, UMUTEX_UNOWNED); 1206 if (old == -1) 1207 return (EFAULT); 1208 if (old == owner) 1209 return (0); 1210 owner = old; 1211 } 1212 1213 /* We should only ever be in here for contested locks */ 1214 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1215 &key)) != 0) 1216 return (error); 1217 1218 umtxq_lock(&key); 1219 umtxq_busy(&key); 1220 count = umtxq_count(&key); 1221 umtxq_unlock(&key); 1222 1223 /* 1224 * When unlocking the umtx, it must be marked as unowned if 1225 * there is zero or one thread only waiting for it. 1226 * Otherwise, it must be marked as contested. 1227 */ 1228 old = casuword32(m, owner, 1229 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1230 umtxq_lock(&key); 1231 umtxq_signal(&key,1); 1232 umtxq_unbusy(&key); 1233 umtxq_unlock(&key); 1234 umtx_key_release(&key); 1235 if (old == -1) 1236 return (EFAULT); 1237 if (old != owner) 1238 return (EINVAL); 1239 return (0); 1240 } 1241 #endif /* COMPAT_FREEBSD32 */ 1242 #endif /* COMPAT_FREEBSD10 */ 1243 1244 /* 1245 * Fetch and compare value, sleep on the address if value is not changed. 1246 */ 1247 static int 1248 do_wait(struct thread *td, void *addr, u_long id, 1249 struct _umtx_time *timeout, int compat32, int is_private) 1250 { 1251 struct umtx_abs_timeout timo; 1252 struct umtx_q *uq; 1253 u_long tmp; 1254 uint32_t tmp32; 1255 int error = 0; 1256 1257 uq = td->td_umtxq; 1258 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1259 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1260 return (error); 1261 1262 if (timeout != NULL) 1263 umtx_abs_timeout_init2(&timo, timeout); 1264 1265 umtxq_lock(&uq->uq_key); 1266 umtxq_insert(uq); 1267 umtxq_unlock(&uq->uq_key); 1268 if (compat32 == 0) { 1269 error = fueword(addr, &tmp); 1270 if (error != 0) 1271 error = EFAULT; 1272 } else { 1273 error = fueword32(addr, &tmp32); 1274 if (error == 0) 1275 tmp = tmp32; 1276 else 1277 error = EFAULT; 1278 } 1279 umtxq_lock(&uq->uq_key); 1280 if (error == 0) { 1281 if (tmp == id) 1282 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1283 NULL : &timo); 1284 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1285 error = 0; 1286 else 1287 umtxq_remove(uq); 1288 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1289 umtxq_remove(uq); 1290 } 1291 umtxq_unlock(&uq->uq_key); 1292 umtx_key_release(&uq->uq_key); 1293 if (error == ERESTART) 1294 error = EINTR; 1295 return (error); 1296 } 1297 1298 /* 1299 * Wake up threads sleeping on the specified address. 1300 */ 1301 int 1302 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1303 { 1304 struct umtx_key key; 1305 int ret; 1306 1307 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1308 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1309 return (ret); 1310 umtxq_lock(&key); 1311 umtxq_signal(&key, n_wake); 1312 umtxq_unlock(&key); 1313 umtx_key_release(&key); 1314 return (0); 1315 } 1316 1317 /* 1318 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1319 */ 1320 static int 1321 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1322 struct _umtx_time *timeout, int mode) 1323 { 1324 struct umtx_abs_timeout timo; 1325 struct umtx_q *uq; 1326 uint32_t owner, old, id; 1327 int error, rv; 1328 1329 id = td->td_tid; 1330 uq = td->td_umtxq; 1331 error = 0; 1332 if (timeout != NULL) 1333 umtx_abs_timeout_init2(&timo, timeout); 1334 1335 /* 1336 * Care must be exercised when dealing with umtx structure. It 1337 * can fault on any access. 1338 */ 1339 for (;;) { 1340 rv = fueword32(&m->m_owner, &owner); 1341 if (rv == -1) 1342 return (EFAULT); 1343 if (mode == _UMUTEX_WAIT) { 1344 if (owner == UMUTEX_UNOWNED || 1345 owner == UMUTEX_CONTESTED || 1346 owner == UMUTEX_RB_OWNERDEAD || 1347 owner == UMUTEX_RB_NOTRECOV) 1348 return (0); 1349 } else { 1350 /* 1351 * Robust mutex terminated. Kernel duty is to 1352 * return EOWNERDEAD to the userspace. The 1353 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1354 * by the common userspace code. 1355 */ 1356 if (owner == UMUTEX_RB_OWNERDEAD) { 1357 rv = casueword32(&m->m_owner, 1358 UMUTEX_RB_OWNERDEAD, &owner, 1359 id | UMUTEX_CONTESTED); 1360 if (rv == -1) 1361 return (EFAULT); 1362 if (rv == 0) { 1363 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1364 return (EOWNERDEAD); /* success */ 1365 } 1366 MPASS(rv == 1); 1367 rv = thread_check_susp(td, false); 1368 if (rv != 0) 1369 return (rv); 1370 continue; 1371 } 1372 if (owner == UMUTEX_RB_NOTRECOV) 1373 return (ENOTRECOVERABLE); 1374 1375 /* 1376 * Try the uncontested case. This should be 1377 * done in userland. 1378 */ 1379 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1380 &owner, id); 1381 /* The address was invalid. */ 1382 if (rv == -1) 1383 return (EFAULT); 1384 1385 /* The acquire succeeded. */ 1386 if (rv == 0) { 1387 MPASS(owner == UMUTEX_UNOWNED); 1388 return (0); 1389 } 1390 1391 /* 1392 * If no one owns it but it is contested try 1393 * to acquire it. 1394 */ 1395 MPASS(rv == 1); 1396 if (owner == UMUTEX_CONTESTED) { 1397 rv = casueword32(&m->m_owner, 1398 UMUTEX_CONTESTED, &owner, 1399 id | UMUTEX_CONTESTED); 1400 /* The address was invalid. */ 1401 if (rv == -1) 1402 return (EFAULT); 1403 if (rv == 0) { 1404 MPASS(owner == UMUTEX_CONTESTED); 1405 return (0); 1406 } 1407 if (rv == 1) { 1408 rv = thread_check_susp(td, false); 1409 if (rv != 0) 1410 return (rv); 1411 } 1412 1413 /* 1414 * If this failed the lock has 1415 * changed, restart. 1416 */ 1417 continue; 1418 } 1419 1420 /* rv == 1 but not contested, likely store failure */ 1421 rv = thread_check_susp(td, false); 1422 if (rv != 0) 1423 return (rv); 1424 } 1425 1426 if (mode == _UMUTEX_TRY) 1427 return (EBUSY); 1428 1429 /* 1430 * If we caught a signal, we have retried and now 1431 * exit immediately. 1432 */ 1433 if (error != 0) 1434 return (error); 1435 1436 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1437 GET_SHARE(flags), &uq->uq_key)) != 0) 1438 return (error); 1439 1440 umtxq_lock(&uq->uq_key); 1441 umtxq_busy(&uq->uq_key); 1442 umtxq_insert(uq); 1443 umtxq_unlock(&uq->uq_key); 1444 1445 /* 1446 * Set the contested bit so that a release in user space 1447 * knows to use the system call for unlock. If this fails 1448 * either some one else has acquired the lock or it has been 1449 * released. 1450 */ 1451 rv = casueword32(&m->m_owner, owner, &old, 1452 owner | UMUTEX_CONTESTED); 1453 1454 /* The address was invalid or casueword failed to store. */ 1455 if (rv == -1 || rv == 1) { 1456 umtxq_lock(&uq->uq_key); 1457 umtxq_remove(uq); 1458 umtxq_unbusy(&uq->uq_key); 1459 umtxq_unlock(&uq->uq_key); 1460 umtx_key_release(&uq->uq_key); 1461 if (rv == -1) 1462 return (EFAULT); 1463 if (rv == 1) { 1464 rv = thread_check_susp(td, false); 1465 if (rv != 0) 1466 return (rv); 1467 } 1468 continue; 1469 } 1470 1471 /* 1472 * We set the contested bit, sleep. Otherwise the lock changed 1473 * and we need to retry or we lost a race to the thread 1474 * unlocking the umtx. 1475 */ 1476 umtxq_lock(&uq->uq_key); 1477 umtxq_unbusy(&uq->uq_key); 1478 MPASS(old == owner); 1479 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1480 NULL : &timo); 1481 umtxq_remove(uq); 1482 umtxq_unlock(&uq->uq_key); 1483 umtx_key_release(&uq->uq_key); 1484 1485 if (error == 0) 1486 error = thread_check_susp(td, false); 1487 } 1488 1489 return (0); 1490 } 1491 1492 /* 1493 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1494 */ 1495 static int 1496 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1497 { 1498 struct umtx_key key; 1499 uint32_t owner, old, id, newlock; 1500 int error, count; 1501 1502 id = td->td_tid; 1503 1504 again: 1505 /* 1506 * Make sure we own this mtx. 1507 */ 1508 error = fueword32(&m->m_owner, &owner); 1509 if (error == -1) 1510 return (EFAULT); 1511 1512 if ((owner & ~UMUTEX_CONTESTED) != id) 1513 return (EPERM); 1514 1515 newlock = umtx_unlock_val(flags, rb); 1516 if ((owner & UMUTEX_CONTESTED) == 0) { 1517 error = casueword32(&m->m_owner, owner, &old, newlock); 1518 if (error == -1) 1519 return (EFAULT); 1520 if (error == 1) { 1521 error = thread_check_susp(td, false); 1522 if (error != 0) 1523 return (error); 1524 goto again; 1525 } 1526 MPASS(old == owner); 1527 return (0); 1528 } 1529 1530 /* We should only ever be in here for contested locks */ 1531 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1532 &key)) != 0) 1533 return (error); 1534 1535 umtxq_lock(&key); 1536 umtxq_busy(&key); 1537 count = umtxq_count(&key); 1538 umtxq_unlock(&key); 1539 1540 /* 1541 * When unlocking the umtx, it must be marked as unowned if 1542 * there is zero or one thread only waiting for it. 1543 * Otherwise, it must be marked as contested. 1544 */ 1545 if (count > 1) 1546 newlock |= UMUTEX_CONTESTED; 1547 error = casueword32(&m->m_owner, owner, &old, newlock); 1548 umtxq_lock(&key); 1549 umtxq_signal(&key, 1); 1550 umtxq_unbusy(&key); 1551 umtxq_unlock(&key); 1552 umtx_key_release(&key); 1553 if (error == -1) 1554 return (EFAULT); 1555 if (error == 1) { 1556 if (old != owner) 1557 return (EINVAL); 1558 error = thread_check_susp(td, false); 1559 if (error != 0) 1560 return (error); 1561 goto again; 1562 } 1563 return (0); 1564 } 1565 1566 /* 1567 * Check if the mutex is available and wake up a waiter, 1568 * only for simple mutex. 1569 */ 1570 static int 1571 do_wake_umutex(struct thread *td, struct umutex *m) 1572 { 1573 struct umtx_key key; 1574 uint32_t owner; 1575 uint32_t flags; 1576 int error; 1577 int count; 1578 1579 again: 1580 error = fueword32(&m->m_owner, &owner); 1581 if (error == -1) 1582 return (EFAULT); 1583 1584 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1585 owner != UMUTEX_RB_NOTRECOV) 1586 return (0); 1587 1588 error = fueword32(&m->m_flags, &flags); 1589 if (error == -1) 1590 return (EFAULT); 1591 1592 /* We should only ever be in here for contested locks */ 1593 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1594 &key)) != 0) 1595 return (error); 1596 1597 umtxq_lock(&key); 1598 umtxq_busy(&key); 1599 count = umtxq_count(&key); 1600 umtxq_unlock(&key); 1601 1602 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1603 owner != UMUTEX_RB_NOTRECOV) { 1604 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1605 UMUTEX_UNOWNED); 1606 if (error == -1) { 1607 error = EFAULT; 1608 } else if (error == 1) { 1609 umtxq_lock(&key); 1610 umtxq_unbusy(&key); 1611 umtxq_unlock(&key); 1612 umtx_key_release(&key); 1613 error = thread_check_susp(td, false); 1614 if (error != 0) 1615 return (error); 1616 goto again; 1617 } 1618 } 1619 1620 umtxq_lock(&key); 1621 if (error == 0 && count != 0) { 1622 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1623 owner == UMUTEX_RB_OWNERDEAD || 1624 owner == UMUTEX_RB_NOTRECOV); 1625 umtxq_signal(&key, 1); 1626 } 1627 umtxq_unbusy(&key); 1628 umtxq_unlock(&key); 1629 umtx_key_release(&key); 1630 return (error); 1631 } 1632 1633 /* 1634 * Check if the mutex has waiters and tries to fix contention bit. 1635 */ 1636 static int 1637 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1638 { 1639 struct umtx_key key; 1640 uint32_t owner, old; 1641 int type; 1642 int error; 1643 int count; 1644 1645 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1646 UMUTEX_ROBUST)) { 1647 case 0: 1648 case UMUTEX_ROBUST: 1649 type = TYPE_NORMAL_UMUTEX; 1650 break; 1651 case UMUTEX_PRIO_INHERIT: 1652 type = TYPE_PI_UMUTEX; 1653 break; 1654 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1655 type = TYPE_PI_ROBUST_UMUTEX; 1656 break; 1657 case UMUTEX_PRIO_PROTECT: 1658 type = TYPE_PP_UMUTEX; 1659 break; 1660 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1661 type = TYPE_PP_ROBUST_UMUTEX; 1662 break; 1663 default: 1664 return (EINVAL); 1665 } 1666 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1667 return (error); 1668 1669 owner = 0; 1670 umtxq_lock(&key); 1671 umtxq_busy(&key); 1672 count = umtxq_count(&key); 1673 umtxq_unlock(&key); 1674 1675 error = fueword32(&m->m_owner, &owner); 1676 if (error == -1) 1677 error = EFAULT; 1678 1679 /* 1680 * Only repair contention bit if there is a waiter, this means 1681 * the mutex is still being referenced by userland code, 1682 * otherwise don't update any memory. 1683 */ 1684 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1685 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1686 error = casueword32(&m->m_owner, owner, &old, 1687 owner | UMUTEX_CONTESTED); 1688 if (error == -1) { 1689 error = EFAULT; 1690 break; 1691 } 1692 if (error == 0) { 1693 MPASS(old == owner); 1694 break; 1695 } 1696 owner = old; 1697 error = thread_check_susp(td, false); 1698 } 1699 1700 umtxq_lock(&key); 1701 if (error == EFAULT) { 1702 umtxq_signal(&key, INT_MAX); 1703 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1704 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1705 umtxq_signal(&key, 1); 1706 umtxq_unbusy(&key); 1707 umtxq_unlock(&key); 1708 umtx_key_release(&key); 1709 return (error); 1710 } 1711 1712 struct umtx_pi * 1713 umtx_pi_alloc(int flags) 1714 { 1715 struct umtx_pi *pi; 1716 1717 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1718 TAILQ_INIT(&pi->pi_blocked); 1719 atomic_add_int(&umtx_pi_allocated, 1); 1720 return (pi); 1721 } 1722 1723 void 1724 umtx_pi_free(struct umtx_pi *pi) 1725 { 1726 uma_zfree(umtx_pi_zone, pi); 1727 atomic_add_int(&umtx_pi_allocated, -1); 1728 } 1729 1730 /* 1731 * Adjust the thread's position on a pi_state after its priority has been 1732 * changed. 1733 */ 1734 static int 1735 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1736 { 1737 struct umtx_q *uq, *uq1, *uq2; 1738 struct thread *td1; 1739 1740 mtx_assert(&umtx_lock, MA_OWNED); 1741 if (pi == NULL) 1742 return (0); 1743 1744 uq = td->td_umtxq; 1745 1746 /* 1747 * Check if the thread needs to be moved on the blocked chain. 1748 * It needs to be moved if either its priority is lower than 1749 * the previous thread or higher than the next thread. 1750 */ 1751 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1752 uq2 = TAILQ_NEXT(uq, uq_lockq); 1753 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1754 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1755 /* 1756 * Remove thread from blocked chain and determine where 1757 * it should be moved to. 1758 */ 1759 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1760 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1761 td1 = uq1->uq_thread; 1762 MPASS(td1->td_proc->p_magic == P_MAGIC); 1763 if (UPRI(td1) > UPRI(td)) 1764 break; 1765 } 1766 1767 if (uq1 == NULL) 1768 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1769 else 1770 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1771 } 1772 return (1); 1773 } 1774 1775 static struct umtx_pi * 1776 umtx_pi_next(struct umtx_pi *pi) 1777 { 1778 struct umtx_q *uq_owner; 1779 1780 if (pi->pi_owner == NULL) 1781 return (NULL); 1782 uq_owner = pi->pi_owner->td_umtxq; 1783 if (uq_owner == NULL) 1784 return (NULL); 1785 return (uq_owner->uq_pi_blocked); 1786 } 1787 1788 /* 1789 * Floyd's Cycle-Finding Algorithm. 1790 */ 1791 static bool 1792 umtx_pi_check_loop(struct umtx_pi *pi) 1793 { 1794 struct umtx_pi *pi1; /* fast iterator */ 1795 1796 mtx_assert(&umtx_lock, MA_OWNED); 1797 if (pi == NULL) 1798 return (false); 1799 pi1 = pi; 1800 for (;;) { 1801 pi = umtx_pi_next(pi); 1802 if (pi == NULL) 1803 break; 1804 pi1 = umtx_pi_next(pi1); 1805 if (pi1 == NULL) 1806 break; 1807 pi1 = umtx_pi_next(pi1); 1808 if (pi1 == NULL) 1809 break; 1810 if (pi == pi1) 1811 return (true); 1812 } 1813 return (false); 1814 } 1815 1816 /* 1817 * Propagate priority when a thread is blocked on POSIX 1818 * PI mutex. 1819 */ 1820 static void 1821 umtx_propagate_priority(struct thread *td) 1822 { 1823 struct umtx_q *uq; 1824 struct umtx_pi *pi; 1825 int pri; 1826 1827 mtx_assert(&umtx_lock, MA_OWNED); 1828 pri = UPRI(td); 1829 uq = td->td_umtxq; 1830 pi = uq->uq_pi_blocked; 1831 if (pi == NULL) 1832 return; 1833 if (umtx_pi_check_loop(pi)) 1834 return; 1835 1836 for (;;) { 1837 td = pi->pi_owner; 1838 if (td == NULL || td == curthread) 1839 return; 1840 1841 MPASS(td->td_proc != NULL); 1842 MPASS(td->td_proc->p_magic == P_MAGIC); 1843 1844 thread_lock(td); 1845 if (td->td_lend_user_pri > pri) 1846 sched_lend_user_prio(td, pri); 1847 else { 1848 thread_unlock(td); 1849 break; 1850 } 1851 thread_unlock(td); 1852 1853 /* 1854 * Pick up the lock that td is blocked on. 1855 */ 1856 uq = td->td_umtxq; 1857 pi = uq->uq_pi_blocked; 1858 if (pi == NULL) 1859 break; 1860 /* Resort td on the list if needed. */ 1861 umtx_pi_adjust_thread(pi, td); 1862 } 1863 } 1864 1865 /* 1866 * Unpropagate priority for a PI mutex when a thread blocked on 1867 * it is interrupted by signal or resumed by others. 1868 */ 1869 static void 1870 umtx_repropagate_priority(struct umtx_pi *pi) 1871 { 1872 struct umtx_q *uq, *uq_owner; 1873 struct umtx_pi *pi2; 1874 int pri; 1875 1876 mtx_assert(&umtx_lock, MA_OWNED); 1877 1878 if (umtx_pi_check_loop(pi)) 1879 return; 1880 while (pi != NULL && pi->pi_owner != NULL) { 1881 pri = PRI_MAX; 1882 uq_owner = pi->pi_owner->td_umtxq; 1883 1884 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1885 uq = TAILQ_FIRST(&pi2->pi_blocked); 1886 if (uq != NULL) { 1887 if (pri > UPRI(uq->uq_thread)) 1888 pri = UPRI(uq->uq_thread); 1889 } 1890 } 1891 1892 if (pri > uq_owner->uq_inherited_pri) 1893 pri = uq_owner->uq_inherited_pri; 1894 thread_lock(pi->pi_owner); 1895 sched_lend_user_prio(pi->pi_owner, pri); 1896 thread_unlock(pi->pi_owner); 1897 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1898 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1899 } 1900 } 1901 1902 /* 1903 * Insert a PI mutex into owned list. 1904 */ 1905 static void 1906 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1907 { 1908 struct umtx_q *uq_owner; 1909 1910 uq_owner = owner->td_umtxq; 1911 mtx_assert(&umtx_lock, MA_OWNED); 1912 MPASS(pi->pi_owner == NULL); 1913 pi->pi_owner = owner; 1914 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1915 } 1916 1917 /* 1918 * Disown a PI mutex, and remove it from the owned list. 1919 */ 1920 static void 1921 umtx_pi_disown(struct umtx_pi *pi) 1922 { 1923 1924 mtx_assert(&umtx_lock, MA_OWNED); 1925 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1926 pi->pi_owner = NULL; 1927 } 1928 1929 /* 1930 * Claim ownership of a PI mutex. 1931 */ 1932 int 1933 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1934 { 1935 struct umtx_q *uq; 1936 int pri; 1937 1938 mtx_lock(&umtx_lock); 1939 if (pi->pi_owner == owner) { 1940 mtx_unlock(&umtx_lock); 1941 return (0); 1942 } 1943 1944 if (pi->pi_owner != NULL) { 1945 /* 1946 * userland may have already messed the mutex, sigh. 1947 */ 1948 mtx_unlock(&umtx_lock); 1949 return (EPERM); 1950 } 1951 umtx_pi_setowner(pi, owner); 1952 uq = TAILQ_FIRST(&pi->pi_blocked); 1953 if (uq != NULL) { 1954 pri = UPRI(uq->uq_thread); 1955 thread_lock(owner); 1956 if (pri < UPRI(owner)) 1957 sched_lend_user_prio(owner, pri); 1958 thread_unlock(owner); 1959 } 1960 mtx_unlock(&umtx_lock); 1961 return (0); 1962 } 1963 1964 /* 1965 * Adjust a thread's order position in its blocked PI mutex, 1966 * this may result new priority propagating process. 1967 */ 1968 void 1969 umtx_pi_adjust(struct thread *td, u_char oldpri) 1970 { 1971 struct umtx_q *uq; 1972 struct umtx_pi *pi; 1973 1974 uq = td->td_umtxq; 1975 mtx_lock(&umtx_lock); 1976 /* 1977 * Pick up the lock that td is blocked on. 1978 */ 1979 pi = uq->uq_pi_blocked; 1980 if (pi != NULL) { 1981 umtx_pi_adjust_thread(pi, td); 1982 umtx_repropagate_priority(pi); 1983 } 1984 mtx_unlock(&umtx_lock); 1985 } 1986 1987 /* 1988 * Sleep on a PI mutex. 1989 */ 1990 int 1991 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1992 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1993 { 1994 struct thread *td, *td1; 1995 struct umtx_q *uq1; 1996 int error, pri; 1997 #ifdef INVARIANTS 1998 struct umtxq_chain *uc; 1999 2000 uc = umtxq_getchain(&pi->pi_key); 2001 #endif 2002 error = 0; 2003 td = uq->uq_thread; 2004 KASSERT(td == curthread, ("inconsistent uq_thread")); 2005 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2006 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2007 umtxq_insert(uq); 2008 mtx_lock(&umtx_lock); 2009 if (pi->pi_owner == NULL) { 2010 mtx_unlock(&umtx_lock); 2011 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2012 mtx_lock(&umtx_lock); 2013 if (td1 != NULL) { 2014 if (pi->pi_owner == NULL) 2015 umtx_pi_setowner(pi, td1); 2016 PROC_UNLOCK(td1->td_proc); 2017 } 2018 } 2019 2020 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2021 pri = UPRI(uq1->uq_thread); 2022 if (pri > UPRI(td)) 2023 break; 2024 } 2025 2026 if (uq1 != NULL) 2027 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2028 else 2029 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2030 2031 uq->uq_pi_blocked = pi; 2032 thread_lock(td); 2033 td->td_flags |= TDF_UPIBLOCKED; 2034 thread_unlock(td); 2035 umtx_propagate_priority(td); 2036 mtx_unlock(&umtx_lock); 2037 umtxq_unbusy(&uq->uq_key); 2038 2039 error = umtxq_sleep(uq, wmesg, timo); 2040 umtxq_remove(uq); 2041 2042 mtx_lock(&umtx_lock); 2043 uq->uq_pi_blocked = NULL; 2044 thread_lock(td); 2045 td->td_flags &= ~TDF_UPIBLOCKED; 2046 thread_unlock(td); 2047 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2048 umtx_repropagate_priority(pi); 2049 mtx_unlock(&umtx_lock); 2050 umtxq_unlock(&uq->uq_key); 2051 2052 return (error); 2053 } 2054 2055 /* 2056 * Add reference count for a PI mutex. 2057 */ 2058 void 2059 umtx_pi_ref(struct umtx_pi *pi) 2060 { 2061 2062 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2063 pi->pi_refcount++; 2064 } 2065 2066 /* 2067 * Decrease reference count for a PI mutex, if the counter 2068 * is decreased to zero, its memory space is freed. 2069 */ 2070 void 2071 umtx_pi_unref(struct umtx_pi *pi) 2072 { 2073 struct umtxq_chain *uc; 2074 2075 uc = umtxq_getchain(&pi->pi_key); 2076 UMTXQ_LOCKED_ASSERT(uc); 2077 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2078 if (--pi->pi_refcount == 0) { 2079 mtx_lock(&umtx_lock); 2080 if (pi->pi_owner != NULL) 2081 umtx_pi_disown(pi); 2082 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2083 ("blocked queue not empty")); 2084 mtx_unlock(&umtx_lock); 2085 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2086 umtx_pi_free(pi); 2087 } 2088 } 2089 2090 /* 2091 * Find a PI mutex in hash table. 2092 */ 2093 struct umtx_pi * 2094 umtx_pi_lookup(struct umtx_key *key) 2095 { 2096 struct umtxq_chain *uc; 2097 struct umtx_pi *pi; 2098 2099 uc = umtxq_getchain(key); 2100 UMTXQ_LOCKED_ASSERT(uc); 2101 2102 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2103 if (umtx_key_match(&pi->pi_key, key)) { 2104 return (pi); 2105 } 2106 } 2107 return (NULL); 2108 } 2109 2110 /* 2111 * Insert a PI mutex into hash table. 2112 */ 2113 void 2114 umtx_pi_insert(struct umtx_pi *pi) 2115 { 2116 struct umtxq_chain *uc; 2117 2118 uc = umtxq_getchain(&pi->pi_key); 2119 UMTXQ_LOCKED_ASSERT(uc); 2120 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2121 } 2122 2123 /* 2124 * Drop a PI mutex and wakeup a top waiter. 2125 */ 2126 int 2127 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2128 { 2129 struct umtx_q *uq_first, *uq_first2, *uq_me; 2130 struct umtx_pi *pi, *pi2; 2131 int pri; 2132 2133 UMTXQ_ASSERT_LOCKED_BUSY(key); 2134 *count = umtxq_count_pi(key, &uq_first); 2135 if (uq_first != NULL) { 2136 mtx_lock(&umtx_lock); 2137 pi = uq_first->uq_pi_blocked; 2138 KASSERT(pi != NULL, ("pi == NULL?")); 2139 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2140 mtx_unlock(&umtx_lock); 2141 /* userland messed the mutex */ 2142 return (EPERM); 2143 } 2144 uq_me = td->td_umtxq; 2145 if (pi->pi_owner == td) 2146 umtx_pi_disown(pi); 2147 /* get highest priority thread which is still sleeping. */ 2148 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2149 while (uq_first != NULL && 2150 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2151 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2152 } 2153 pri = PRI_MAX; 2154 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2155 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2156 if (uq_first2 != NULL) { 2157 if (pri > UPRI(uq_first2->uq_thread)) 2158 pri = UPRI(uq_first2->uq_thread); 2159 } 2160 } 2161 thread_lock(td); 2162 sched_lend_user_prio(td, pri); 2163 thread_unlock(td); 2164 mtx_unlock(&umtx_lock); 2165 if (uq_first) 2166 umtxq_signal_thread(uq_first); 2167 } else { 2168 pi = umtx_pi_lookup(key); 2169 /* 2170 * A umtx_pi can exist if a signal or timeout removed the 2171 * last waiter from the umtxq, but there is still 2172 * a thread in do_lock_pi() holding the umtx_pi. 2173 */ 2174 if (pi != NULL) { 2175 /* 2176 * The umtx_pi can be unowned, such as when a thread 2177 * has just entered do_lock_pi(), allocated the 2178 * umtx_pi, and unlocked the umtxq. 2179 * If the current thread owns it, it must disown it. 2180 */ 2181 mtx_lock(&umtx_lock); 2182 if (pi->pi_owner == td) 2183 umtx_pi_disown(pi); 2184 mtx_unlock(&umtx_lock); 2185 } 2186 } 2187 return (0); 2188 } 2189 2190 /* 2191 * Lock a PI mutex. 2192 */ 2193 static int 2194 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2195 struct _umtx_time *timeout, int try) 2196 { 2197 struct umtx_abs_timeout timo; 2198 struct umtx_q *uq; 2199 struct umtx_pi *pi, *new_pi; 2200 uint32_t id, old_owner, owner, old; 2201 int error, rv; 2202 2203 id = td->td_tid; 2204 uq = td->td_umtxq; 2205 2206 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2207 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2208 &uq->uq_key)) != 0) 2209 return (error); 2210 2211 if (timeout != NULL) 2212 umtx_abs_timeout_init2(&timo, timeout); 2213 2214 umtxq_lock(&uq->uq_key); 2215 pi = umtx_pi_lookup(&uq->uq_key); 2216 if (pi == NULL) { 2217 new_pi = umtx_pi_alloc(M_NOWAIT); 2218 if (new_pi == NULL) { 2219 umtxq_unlock(&uq->uq_key); 2220 new_pi = umtx_pi_alloc(M_WAITOK); 2221 umtxq_lock(&uq->uq_key); 2222 pi = umtx_pi_lookup(&uq->uq_key); 2223 if (pi != NULL) { 2224 umtx_pi_free(new_pi); 2225 new_pi = NULL; 2226 } 2227 } 2228 if (new_pi != NULL) { 2229 new_pi->pi_key = uq->uq_key; 2230 umtx_pi_insert(new_pi); 2231 pi = new_pi; 2232 } 2233 } 2234 umtx_pi_ref(pi); 2235 umtxq_unlock(&uq->uq_key); 2236 2237 /* 2238 * Care must be exercised when dealing with umtx structure. It 2239 * can fault on any access. 2240 */ 2241 for (;;) { 2242 /* 2243 * Try the uncontested case. This should be done in userland. 2244 */ 2245 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2246 /* The address was invalid. */ 2247 if (rv == -1) { 2248 error = EFAULT; 2249 break; 2250 } 2251 /* The acquire succeeded. */ 2252 if (rv == 0) { 2253 MPASS(owner == UMUTEX_UNOWNED); 2254 error = 0; 2255 break; 2256 } 2257 2258 if (owner == UMUTEX_RB_NOTRECOV) { 2259 error = ENOTRECOVERABLE; 2260 break; 2261 } 2262 2263 /* 2264 * Avoid overwriting a possible error from sleep due 2265 * to the pending signal with suspension check result. 2266 */ 2267 if (error == 0) { 2268 error = thread_check_susp(td, true); 2269 if (error != 0) 2270 break; 2271 } 2272 2273 /* If no one owns it but it is contested try to acquire it. */ 2274 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2275 old_owner = owner; 2276 rv = casueword32(&m->m_owner, owner, &owner, 2277 id | UMUTEX_CONTESTED); 2278 /* The address was invalid. */ 2279 if (rv == -1) { 2280 error = EFAULT; 2281 break; 2282 } 2283 if (rv == 1) { 2284 if (error == 0) { 2285 error = thread_check_susp(td, true); 2286 if (error != 0) 2287 break; 2288 } 2289 2290 /* 2291 * If this failed the lock could 2292 * changed, restart. 2293 */ 2294 continue; 2295 } 2296 2297 MPASS(rv == 0); 2298 MPASS(owner == old_owner); 2299 umtxq_lock(&uq->uq_key); 2300 umtxq_busy(&uq->uq_key); 2301 error = umtx_pi_claim(pi, td); 2302 umtxq_unbusy(&uq->uq_key); 2303 umtxq_unlock(&uq->uq_key); 2304 if (error != 0) { 2305 /* 2306 * Since we're going to return an 2307 * error, restore the m_owner to its 2308 * previous, unowned state to avoid 2309 * compounding the problem. 2310 */ 2311 (void)casuword32(&m->m_owner, 2312 id | UMUTEX_CONTESTED, old_owner); 2313 } 2314 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2315 error = EOWNERDEAD; 2316 break; 2317 } 2318 2319 if ((owner & ~UMUTEX_CONTESTED) == id) { 2320 error = EDEADLK; 2321 break; 2322 } 2323 2324 if (try != 0) { 2325 error = EBUSY; 2326 break; 2327 } 2328 2329 /* 2330 * If we caught a signal, we have retried and now 2331 * exit immediately. 2332 */ 2333 if (error != 0) 2334 break; 2335 2336 umtxq_lock(&uq->uq_key); 2337 umtxq_busy(&uq->uq_key); 2338 umtxq_unlock(&uq->uq_key); 2339 2340 /* 2341 * Set the contested bit so that a release in user space 2342 * knows to use the system call for unlock. If this fails 2343 * either some one else has acquired the lock or it has been 2344 * released. 2345 */ 2346 rv = casueword32(&m->m_owner, owner, &old, owner | 2347 UMUTEX_CONTESTED); 2348 2349 /* The address was invalid. */ 2350 if (rv == -1) { 2351 umtxq_unbusy_unlocked(&uq->uq_key); 2352 error = EFAULT; 2353 break; 2354 } 2355 if (rv == 1) { 2356 umtxq_unbusy_unlocked(&uq->uq_key); 2357 error = thread_check_susp(td, true); 2358 if (error != 0) 2359 break; 2360 2361 /* 2362 * The lock changed and we need to retry or we 2363 * lost a race to the thread unlocking the 2364 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2365 * value for owner is impossible there. 2366 */ 2367 continue; 2368 } 2369 2370 umtxq_lock(&uq->uq_key); 2371 2372 /* We set the contested bit, sleep. */ 2373 MPASS(old == owner); 2374 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2375 "umtxpi", timeout == NULL ? NULL : &timo, 2376 (flags & USYNC_PROCESS_SHARED) != 0); 2377 if (error != 0) 2378 continue; 2379 2380 error = thread_check_susp(td, false); 2381 if (error != 0) 2382 break; 2383 } 2384 2385 umtxq_lock(&uq->uq_key); 2386 umtx_pi_unref(pi); 2387 umtxq_unlock(&uq->uq_key); 2388 2389 umtx_key_release(&uq->uq_key); 2390 return (error); 2391 } 2392 2393 /* 2394 * Unlock a PI mutex. 2395 */ 2396 static int 2397 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2398 { 2399 struct umtx_key key; 2400 uint32_t id, new_owner, old, owner; 2401 int count, error; 2402 2403 id = td->td_tid; 2404 2405 usrloop: 2406 /* 2407 * Make sure we own this mtx. 2408 */ 2409 error = fueword32(&m->m_owner, &owner); 2410 if (error == -1) 2411 return (EFAULT); 2412 2413 if ((owner & ~UMUTEX_CONTESTED) != id) 2414 return (EPERM); 2415 2416 new_owner = umtx_unlock_val(flags, rb); 2417 2418 /* This should be done in userland */ 2419 if ((owner & UMUTEX_CONTESTED) == 0) { 2420 error = casueword32(&m->m_owner, owner, &old, new_owner); 2421 if (error == -1) 2422 return (EFAULT); 2423 if (error == 1) { 2424 error = thread_check_susp(td, true); 2425 if (error != 0) 2426 return (error); 2427 goto usrloop; 2428 } 2429 if (old == owner) 2430 return (0); 2431 owner = old; 2432 } 2433 2434 /* We should only ever be in here for contested locks */ 2435 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2436 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2437 &key)) != 0) 2438 return (error); 2439 2440 umtxq_lock(&key); 2441 umtxq_busy(&key); 2442 error = umtx_pi_drop(td, &key, rb, &count); 2443 if (error != 0) { 2444 umtxq_unbusy(&key); 2445 umtxq_unlock(&key); 2446 umtx_key_release(&key); 2447 /* userland messed the mutex */ 2448 return (error); 2449 } 2450 umtxq_unlock(&key); 2451 2452 /* 2453 * When unlocking the umtx, it must be marked as unowned if 2454 * there is zero or one thread only waiting for it. 2455 * Otherwise, it must be marked as contested. 2456 */ 2457 2458 if (count > 1) 2459 new_owner |= UMUTEX_CONTESTED; 2460 again: 2461 error = casueword32(&m->m_owner, owner, &old, new_owner); 2462 if (error == 1) { 2463 error = thread_check_susp(td, false); 2464 if (error == 0) 2465 goto again; 2466 } 2467 umtxq_unbusy_unlocked(&key); 2468 umtx_key_release(&key); 2469 if (error == -1) 2470 return (EFAULT); 2471 if (error == 0 && old != owner) 2472 return (EINVAL); 2473 return (error); 2474 } 2475 2476 /* 2477 * Lock a PP mutex. 2478 */ 2479 static int 2480 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2481 struct _umtx_time *timeout, int try) 2482 { 2483 struct umtx_abs_timeout timo; 2484 struct umtx_q *uq, *uq2; 2485 struct umtx_pi *pi; 2486 uint32_t ceiling; 2487 uint32_t owner, id; 2488 int error, pri, old_inherited_pri, su, rv; 2489 2490 id = td->td_tid; 2491 uq = td->td_umtxq; 2492 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2493 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2494 &uq->uq_key)) != 0) 2495 return (error); 2496 2497 if (timeout != NULL) 2498 umtx_abs_timeout_init2(&timo, timeout); 2499 2500 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2501 for (;;) { 2502 old_inherited_pri = uq->uq_inherited_pri; 2503 umtxq_lock(&uq->uq_key); 2504 umtxq_busy(&uq->uq_key); 2505 umtxq_unlock(&uq->uq_key); 2506 2507 rv = fueword32(&m->m_ceilings[0], &ceiling); 2508 if (rv == -1) { 2509 error = EFAULT; 2510 goto out; 2511 } 2512 ceiling = RTP_PRIO_MAX - ceiling; 2513 if (ceiling > RTP_PRIO_MAX) { 2514 error = EINVAL; 2515 goto out; 2516 } 2517 2518 mtx_lock(&umtx_lock); 2519 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2520 mtx_unlock(&umtx_lock); 2521 error = EINVAL; 2522 goto out; 2523 } 2524 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2525 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2526 thread_lock(td); 2527 if (uq->uq_inherited_pri < UPRI(td)) 2528 sched_lend_user_prio(td, uq->uq_inherited_pri); 2529 thread_unlock(td); 2530 } 2531 mtx_unlock(&umtx_lock); 2532 2533 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2534 id | UMUTEX_CONTESTED); 2535 /* The address was invalid. */ 2536 if (rv == -1) { 2537 error = EFAULT; 2538 break; 2539 } 2540 if (rv == 0) { 2541 MPASS(owner == UMUTEX_CONTESTED); 2542 error = 0; 2543 break; 2544 } 2545 /* rv == 1 */ 2546 if (owner == UMUTEX_RB_OWNERDEAD) { 2547 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2548 &owner, id | UMUTEX_CONTESTED); 2549 if (rv == -1) { 2550 error = EFAULT; 2551 break; 2552 } 2553 if (rv == 0) { 2554 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2555 error = EOWNERDEAD; /* success */ 2556 break; 2557 } 2558 2559 /* 2560 * rv == 1, only check for suspension if we 2561 * did not already catched a signal. If we 2562 * get an error from the check, the same 2563 * condition is checked by the umtxq_sleep() 2564 * call below, so we should obliterate the 2565 * error to not skip the last loop iteration. 2566 */ 2567 if (error == 0) { 2568 error = thread_check_susp(td, false); 2569 if (error == 0) { 2570 if (try != 0) 2571 error = EBUSY; 2572 else 2573 continue; 2574 } 2575 error = 0; 2576 } 2577 } else if (owner == UMUTEX_RB_NOTRECOV) { 2578 error = ENOTRECOVERABLE; 2579 } 2580 2581 if (try != 0) 2582 error = EBUSY; 2583 2584 /* 2585 * If we caught a signal, we have retried and now 2586 * exit immediately. 2587 */ 2588 if (error != 0) 2589 break; 2590 2591 umtxq_lock(&uq->uq_key); 2592 umtxq_insert(uq); 2593 umtxq_unbusy(&uq->uq_key); 2594 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2595 NULL : &timo); 2596 umtxq_remove(uq); 2597 umtxq_unlock(&uq->uq_key); 2598 2599 mtx_lock(&umtx_lock); 2600 uq->uq_inherited_pri = old_inherited_pri; 2601 pri = PRI_MAX; 2602 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2603 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2604 if (uq2 != NULL) { 2605 if (pri > UPRI(uq2->uq_thread)) 2606 pri = UPRI(uq2->uq_thread); 2607 } 2608 } 2609 if (pri > uq->uq_inherited_pri) 2610 pri = uq->uq_inherited_pri; 2611 thread_lock(td); 2612 sched_lend_user_prio(td, pri); 2613 thread_unlock(td); 2614 mtx_unlock(&umtx_lock); 2615 } 2616 2617 if (error != 0 && error != EOWNERDEAD) { 2618 mtx_lock(&umtx_lock); 2619 uq->uq_inherited_pri = old_inherited_pri; 2620 pri = PRI_MAX; 2621 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2622 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2623 if (uq2 != NULL) { 2624 if (pri > UPRI(uq2->uq_thread)) 2625 pri = UPRI(uq2->uq_thread); 2626 } 2627 } 2628 if (pri > uq->uq_inherited_pri) 2629 pri = uq->uq_inherited_pri; 2630 thread_lock(td); 2631 sched_lend_user_prio(td, pri); 2632 thread_unlock(td); 2633 mtx_unlock(&umtx_lock); 2634 } 2635 2636 out: 2637 umtxq_unbusy_unlocked(&uq->uq_key); 2638 umtx_key_release(&uq->uq_key); 2639 return (error); 2640 } 2641 2642 /* 2643 * Unlock a PP mutex. 2644 */ 2645 static int 2646 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2647 { 2648 struct umtx_key key; 2649 struct umtx_q *uq, *uq2; 2650 struct umtx_pi *pi; 2651 uint32_t id, owner, rceiling; 2652 int error, pri, new_inherited_pri, su; 2653 2654 id = td->td_tid; 2655 uq = td->td_umtxq; 2656 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2657 2658 /* 2659 * Make sure we own this mtx. 2660 */ 2661 error = fueword32(&m->m_owner, &owner); 2662 if (error == -1) 2663 return (EFAULT); 2664 2665 if ((owner & ~UMUTEX_CONTESTED) != id) 2666 return (EPERM); 2667 2668 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2669 if (error != 0) 2670 return (error); 2671 2672 if (rceiling == -1) 2673 new_inherited_pri = PRI_MAX; 2674 else { 2675 rceiling = RTP_PRIO_MAX - rceiling; 2676 if (rceiling > RTP_PRIO_MAX) 2677 return (EINVAL); 2678 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2679 } 2680 2681 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2682 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2683 &key)) != 0) 2684 return (error); 2685 umtxq_lock(&key); 2686 umtxq_busy(&key); 2687 umtxq_unlock(&key); 2688 /* 2689 * For priority protected mutex, always set unlocked state 2690 * to UMUTEX_CONTESTED, so that userland always enters kernel 2691 * to lock the mutex, it is necessary because thread priority 2692 * has to be adjusted for such mutex. 2693 */ 2694 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2695 UMUTEX_CONTESTED); 2696 2697 umtxq_lock(&key); 2698 if (error == 0) 2699 umtxq_signal(&key, 1); 2700 umtxq_unbusy(&key); 2701 umtxq_unlock(&key); 2702 2703 if (error == -1) 2704 error = EFAULT; 2705 else { 2706 mtx_lock(&umtx_lock); 2707 if (su != 0) 2708 uq->uq_inherited_pri = new_inherited_pri; 2709 pri = PRI_MAX; 2710 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2711 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2712 if (uq2 != NULL) { 2713 if (pri > UPRI(uq2->uq_thread)) 2714 pri = UPRI(uq2->uq_thread); 2715 } 2716 } 2717 if (pri > uq->uq_inherited_pri) 2718 pri = uq->uq_inherited_pri; 2719 thread_lock(td); 2720 sched_lend_user_prio(td, pri); 2721 thread_unlock(td); 2722 mtx_unlock(&umtx_lock); 2723 } 2724 umtx_key_release(&key); 2725 return (error); 2726 } 2727 2728 static int 2729 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2730 uint32_t *old_ceiling) 2731 { 2732 struct umtx_q *uq; 2733 uint32_t flags, id, owner, save_ceiling; 2734 int error, rv, rv1; 2735 2736 error = fueword32(&m->m_flags, &flags); 2737 if (error == -1) 2738 return (EFAULT); 2739 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2740 return (EINVAL); 2741 if (ceiling > RTP_PRIO_MAX) 2742 return (EINVAL); 2743 id = td->td_tid; 2744 uq = td->td_umtxq; 2745 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2746 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2747 &uq->uq_key)) != 0) 2748 return (error); 2749 for (;;) { 2750 umtxq_lock(&uq->uq_key); 2751 umtxq_busy(&uq->uq_key); 2752 umtxq_unlock(&uq->uq_key); 2753 2754 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2755 if (rv == -1) { 2756 error = EFAULT; 2757 break; 2758 } 2759 2760 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2761 id | UMUTEX_CONTESTED); 2762 if (rv == -1) { 2763 error = EFAULT; 2764 break; 2765 } 2766 2767 if (rv == 0) { 2768 MPASS(owner == UMUTEX_CONTESTED); 2769 rv = suword32(&m->m_ceilings[0], ceiling); 2770 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2771 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2772 break; 2773 } 2774 2775 if ((owner & ~UMUTEX_CONTESTED) == id) { 2776 rv = suword32(&m->m_ceilings[0], ceiling); 2777 error = rv == 0 ? 0 : EFAULT; 2778 break; 2779 } 2780 2781 if (owner == UMUTEX_RB_OWNERDEAD) { 2782 error = EOWNERDEAD; 2783 break; 2784 } else if (owner == UMUTEX_RB_NOTRECOV) { 2785 error = ENOTRECOVERABLE; 2786 break; 2787 } 2788 2789 /* 2790 * If we caught a signal, we have retried and now 2791 * exit immediately. 2792 */ 2793 if (error != 0) 2794 break; 2795 2796 /* 2797 * We set the contested bit, sleep. Otherwise the lock changed 2798 * and we need to retry or we lost a race to the thread 2799 * unlocking the umtx. 2800 */ 2801 umtxq_lock(&uq->uq_key); 2802 umtxq_insert(uq); 2803 umtxq_unbusy(&uq->uq_key); 2804 error = umtxq_sleep(uq, "umtxpp", NULL); 2805 umtxq_remove(uq); 2806 umtxq_unlock(&uq->uq_key); 2807 } 2808 umtxq_lock(&uq->uq_key); 2809 if (error == 0) 2810 umtxq_signal(&uq->uq_key, INT_MAX); 2811 umtxq_unbusy(&uq->uq_key); 2812 umtxq_unlock(&uq->uq_key); 2813 umtx_key_release(&uq->uq_key); 2814 if (error == 0 && old_ceiling != NULL) { 2815 rv = suword32(old_ceiling, save_ceiling); 2816 error = rv == 0 ? 0 : EFAULT; 2817 } 2818 return (error); 2819 } 2820 2821 /* 2822 * Lock a userland POSIX mutex. 2823 */ 2824 static int 2825 do_lock_umutex(struct thread *td, struct umutex *m, 2826 struct _umtx_time *timeout, int mode) 2827 { 2828 uint32_t flags; 2829 int error; 2830 2831 error = fueword32(&m->m_flags, &flags); 2832 if (error == -1) 2833 return (EFAULT); 2834 2835 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2836 case 0: 2837 error = do_lock_normal(td, m, flags, timeout, mode); 2838 break; 2839 case UMUTEX_PRIO_INHERIT: 2840 error = do_lock_pi(td, m, flags, timeout, mode); 2841 break; 2842 case UMUTEX_PRIO_PROTECT: 2843 error = do_lock_pp(td, m, flags, timeout, mode); 2844 break; 2845 default: 2846 return (EINVAL); 2847 } 2848 if (timeout == NULL) { 2849 if (error == EINTR && mode != _UMUTEX_WAIT) 2850 error = ERESTART; 2851 } else { 2852 /* Timed-locking is not restarted. */ 2853 if (error == ERESTART) 2854 error = EINTR; 2855 } 2856 return (error); 2857 } 2858 2859 /* 2860 * Unlock a userland POSIX mutex. 2861 */ 2862 static int 2863 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2864 { 2865 uint32_t flags; 2866 int error; 2867 2868 error = fueword32(&m->m_flags, &flags); 2869 if (error == -1) 2870 return (EFAULT); 2871 2872 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2873 case 0: 2874 return (do_unlock_normal(td, m, flags, rb)); 2875 case UMUTEX_PRIO_INHERIT: 2876 return (do_unlock_pi(td, m, flags, rb)); 2877 case UMUTEX_PRIO_PROTECT: 2878 return (do_unlock_pp(td, m, flags, rb)); 2879 } 2880 2881 return (EINVAL); 2882 } 2883 2884 static int 2885 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2886 struct timespec *timeout, u_long wflags) 2887 { 2888 struct umtx_abs_timeout timo; 2889 struct umtx_q *uq; 2890 uint32_t flags, clockid, hasw; 2891 int error; 2892 2893 uq = td->td_umtxq; 2894 error = fueword32(&cv->c_flags, &flags); 2895 if (error == -1) 2896 return (EFAULT); 2897 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2898 if (error != 0) 2899 return (error); 2900 2901 if ((wflags & CVWAIT_CLOCKID) != 0) { 2902 error = fueword32(&cv->c_clockid, &clockid); 2903 if (error == -1) { 2904 umtx_key_release(&uq->uq_key); 2905 return (EFAULT); 2906 } 2907 if (clockid < CLOCK_REALTIME || 2908 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2909 /* hmm, only HW clock id will work. */ 2910 umtx_key_release(&uq->uq_key); 2911 return (EINVAL); 2912 } 2913 } else { 2914 clockid = CLOCK_REALTIME; 2915 } 2916 2917 umtxq_lock(&uq->uq_key); 2918 umtxq_busy(&uq->uq_key); 2919 umtxq_insert(uq); 2920 umtxq_unlock(&uq->uq_key); 2921 2922 /* 2923 * Set c_has_waiters to 1 before releasing user mutex, also 2924 * don't modify cache line when unnecessary. 2925 */ 2926 error = fueword32(&cv->c_has_waiters, &hasw); 2927 if (error == 0 && hasw == 0) 2928 suword32(&cv->c_has_waiters, 1); 2929 2930 umtxq_unbusy_unlocked(&uq->uq_key); 2931 2932 error = do_unlock_umutex(td, m, false); 2933 2934 if (timeout != NULL) 2935 umtx_abs_timeout_init(&timo, clockid, 2936 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2937 2938 umtxq_lock(&uq->uq_key); 2939 if (error == 0) { 2940 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2941 NULL : &timo); 2942 } 2943 2944 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2945 error = 0; 2946 else { 2947 /* 2948 * This must be timeout,interrupted by signal or 2949 * surprious wakeup, clear c_has_waiter flag when 2950 * necessary. 2951 */ 2952 umtxq_busy(&uq->uq_key); 2953 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2954 int oldlen = uq->uq_cur_queue->length; 2955 umtxq_remove(uq); 2956 if (oldlen == 1) { 2957 umtxq_unlock(&uq->uq_key); 2958 suword32(&cv->c_has_waiters, 0); 2959 umtxq_lock(&uq->uq_key); 2960 } 2961 } 2962 umtxq_unbusy(&uq->uq_key); 2963 if (error == ERESTART) 2964 error = EINTR; 2965 } 2966 2967 umtxq_unlock(&uq->uq_key); 2968 umtx_key_release(&uq->uq_key); 2969 return (error); 2970 } 2971 2972 /* 2973 * Signal a userland condition variable. 2974 */ 2975 static int 2976 do_cv_signal(struct thread *td, struct ucond *cv) 2977 { 2978 struct umtx_key key; 2979 int error, cnt, nwake; 2980 uint32_t flags; 2981 2982 error = fueword32(&cv->c_flags, &flags); 2983 if (error == -1) 2984 return (EFAULT); 2985 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2986 return (error); 2987 umtxq_lock(&key); 2988 umtxq_busy(&key); 2989 cnt = umtxq_count(&key); 2990 nwake = umtxq_signal(&key, 1); 2991 if (cnt <= nwake) { 2992 umtxq_unlock(&key); 2993 error = suword32(&cv->c_has_waiters, 0); 2994 if (error == -1) 2995 error = EFAULT; 2996 umtxq_lock(&key); 2997 } 2998 umtxq_unbusy(&key); 2999 umtxq_unlock(&key); 3000 umtx_key_release(&key); 3001 return (error); 3002 } 3003 3004 static int 3005 do_cv_broadcast(struct thread *td, struct ucond *cv) 3006 { 3007 struct umtx_key key; 3008 int error; 3009 uint32_t flags; 3010 3011 error = fueword32(&cv->c_flags, &flags); 3012 if (error == -1) 3013 return (EFAULT); 3014 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3015 return (error); 3016 3017 umtxq_lock(&key); 3018 umtxq_busy(&key); 3019 umtxq_signal(&key, INT_MAX); 3020 umtxq_unlock(&key); 3021 3022 error = suword32(&cv->c_has_waiters, 0); 3023 if (error == -1) 3024 error = EFAULT; 3025 3026 umtxq_unbusy_unlocked(&key); 3027 3028 umtx_key_release(&key); 3029 return (error); 3030 } 3031 3032 static int 3033 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3034 struct _umtx_time *timeout) 3035 { 3036 struct umtx_abs_timeout timo; 3037 struct umtx_q *uq; 3038 uint32_t flags, wrflags; 3039 int32_t state, oldstate; 3040 int32_t blocked_readers; 3041 int error, error1, rv; 3042 3043 uq = td->td_umtxq; 3044 error = fueword32(&rwlock->rw_flags, &flags); 3045 if (error == -1) 3046 return (EFAULT); 3047 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3048 if (error != 0) 3049 return (error); 3050 3051 if (timeout != NULL) 3052 umtx_abs_timeout_init2(&timo, timeout); 3053 3054 wrflags = URWLOCK_WRITE_OWNER; 3055 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3056 wrflags |= URWLOCK_WRITE_WAITERS; 3057 3058 for (;;) { 3059 rv = fueword32(&rwlock->rw_state, &state); 3060 if (rv == -1) { 3061 umtx_key_release(&uq->uq_key); 3062 return (EFAULT); 3063 } 3064 3065 /* try to lock it */ 3066 while (!(state & wrflags)) { 3067 if (__predict_false(URWLOCK_READER_COUNT(state) == 3068 URWLOCK_MAX_READERS)) { 3069 umtx_key_release(&uq->uq_key); 3070 return (EAGAIN); 3071 } 3072 rv = casueword32(&rwlock->rw_state, state, 3073 &oldstate, state + 1); 3074 if (rv == -1) { 3075 umtx_key_release(&uq->uq_key); 3076 return (EFAULT); 3077 } 3078 if (rv == 0) { 3079 MPASS(oldstate == state); 3080 umtx_key_release(&uq->uq_key); 3081 return (0); 3082 } 3083 error = thread_check_susp(td, true); 3084 if (error != 0) 3085 break; 3086 state = oldstate; 3087 } 3088 3089 if (error) 3090 break; 3091 3092 /* grab monitor lock */ 3093 umtxq_lock(&uq->uq_key); 3094 umtxq_busy(&uq->uq_key); 3095 umtxq_unlock(&uq->uq_key); 3096 3097 /* 3098 * re-read the state, in case it changed between the try-lock above 3099 * and the check below 3100 */ 3101 rv = fueword32(&rwlock->rw_state, &state); 3102 if (rv == -1) 3103 error = EFAULT; 3104 3105 /* set read contention bit */ 3106 while (error == 0 && (state & wrflags) && 3107 !(state & URWLOCK_READ_WAITERS)) { 3108 rv = casueword32(&rwlock->rw_state, state, 3109 &oldstate, state | URWLOCK_READ_WAITERS); 3110 if (rv == -1) { 3111 error = EFAULT; 3112 break; 3113 } 3114 if (rv == 0) { 3115 MPASS(oldstate == state); 3116 goto sleep; 3117 } 3118 state = oldstate; 3119 error = thread_check_susp(td, false); 3120 if (error != 0) 3121 break; 3122 } 3123 if (error != 0) { 3124 umtxq_unbusy_unlocked(&uq->uq_key); 3125 break; 3126 } 3127 3128 /* state is changed while setting flags, restart */ 3129 if (!(state & wrflags)) { 3130 umtxq_unbusy_unlocked(&uq->uq_key); 3131 error = thread_check_susp(td, true); 3132 if (error != 0) 3133 break; 3134 continue; 3135 } 3136 3137 sleep: 3138 /* 3139 * Contention bit is set, before sleeping, increase 3140 * read waiter count. 3141 */ 3142 rv = fueword32(&rwlock->rw_blocked_readers, 3143 &blocked_readers); 3144 if (rv == -1) { 3145 umtxq_unbusy_unlocked(&uq->uq_key); 3146 error = EFAULT; 3147 break; 3148 } 3149 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3150 3151 while (state & wrflags) { 3152 umtxq_lock(&uq->uq_key); 3153 umtxq_insert(uq); 3154 umtxq_unbusy(&uq->uq_key); 3155 3156 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3157 NULL : &timo); 3158 3159 umtxq_busy(&uq->uq_key); 3160 umtxq_remove(uq); 3161 umtxq_unlock(&uq->uq_key); 3162 if (error) 3163 break; 3164 rv = fueword32(&rwlock->rw_state, &state); 3165 if (rv == -1) { 3166 error = EFAULT; 3167 break; 3168 } 3169 } 3170 3171 /* decrease read waiter count, and may clear read contention bit */ 3172 rv = fueword32(&rwlock->rw_blocked_readers, 3173 &blocked_readers); 3174 if (rv == -1) { 3175 umtxq_unbusy_unlocked(&uq->uq_key); 3176 error = EFAULT; 3177 break; 3178 } 3179 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3180 if (blocked_readers == 1) { 3181 rv = fueword32(&rwlock->rw_state, &state); 3182 if (rv == -1) { 3183 umtxq_unbusy_unlocked(&uq->uq_key); 3184 error = EFAULT; 3185 break; 3186 } 3187 for (;;) { 3188 rv = casueword32(&rwlock->rw_state, state, 3189 &oldstate, state & ~URWLOCK_READ_WAITERS); 3190 if (rv == -1) { 3191 error = EFAULT; 3192 break; 3193 } 3194 if (rv == 0) { 3195 MPASS(oldstate == state); 3196 break; 3197 } 3198 state = oldstate; 3199 error1 = thread_check_susp(td, false); 3200 if (error1 != 0) { 3201 if (error == 0) 3202 error = error1; 3203 break; 3204 } 3205 } 3206 } 3207 3208 umtxq_unbusy_unlocked(&uq->uq_key); 3209 if (error != 0) 3210 break; 3211 } 3212 umtx_key_release(&uq->uq_key); 3213 if (error == ERESTART) 3214 error = EINTR; 3215 return (error); 3216 } 3217 3218 static int 3219 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3220 { 3221 struct umtx_abs_timeout timo; 3222 struct umtx_q *uq; 3223 uint32_t flags; 3224 int32_t state, oldstate; 3225 int32_t blocked_writers; 3226 int32_t blocked_readers; 3227 int error, error1, rv; 3228 3229 uq = td->td_umtxq; 3230 error = fueword32(&rwlock->rw_flags, &flags); 3231 if (error == -1) 3232 return (EFAULT); 3233 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3234 if (error != 0) 3235 return (error); 3236 3237 if (timeout != NULL) 3238 umtx_abs_timeout_init2(&timo, timeout); 3239 3240 blocked_readers = 0; 3241 for (;;) { 3242 rv = fueword32(&rwlock->rw_state, &state); 3243 if (rv == -1) { 3244 umtx_key_release(&uq->uq_key); 3245 return (EFAULT); 3246 } 3247 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3248 URWLOCK_READER_COUNT(state) == 0) { 3249 rv = casueword32(&rwlock->rw_state, state, 3250 &oldstate, state | URWLOCK_WRITE_OWNER); 3251 if (rv == -1) { 3252 umtx_key_release(&uq->uq_key); 3253 return (EFAULT); 3254 } 3255 if (rv == 0) { 3256 MPASS(oldstate == state); 3257 umtx_key_release(&uq->uq_key); 3258 return (0); 3259 } 3260 state = oldstate; 3261 error = thread_check_susp(td, true); 3262 if (error != 0) 3263 break; 3264 } 3265 3266 if (error) { 3267 if ((state & (URWLOCK_WRITE_OWNER | 3268 URWLOCK_WRITE_WAITERS)) == 0 && 3269 blocked_readers != 0) { 3270 umtxq_lock(&uq->uq_key); 3271 umtxq_busy(&uq->uq_key); 3272 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3273 UMTX_SHARED_QUEUE); 3274 umtxq_unbusy(&uq->uq_key); 3275 umtxq_unlock(&uq->uq_key); 3276 } 3277 3278 break; 3279 } 3280 3281 /* grab monitor lock */ 3282 umtxq_lock(&uq->uq_key); 3283 umtxq_busy(&uq->uq_key); 3284 umtxq_unlock(&uq->uq_key); 3285 3286 /* 3287 * Re-read the state, in case it changed between the 3288 * try-lock above and the check below. 3289 */ 3290 rv = fueword32(&rwlock->rw_state, &state); 3291 if (rv == -1) 3292 error = EFAULT; 3293 3294 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3295 URWLOCK_READER_COUNT(state) != 0) && 3296 (state & URWLOCK_WRITE_WAITERS) == 0) { 3297 rv = casueword32(&rwlock->rw_state, state, 3298 &oldstate, state | URWLOCK_WRITE_WAITERS); 3299 if (rv == -1) { 3300 error = EFAULT; 3301 break; 3302 } 3303 if (rv == 0) { 3304 MPASS(oldstate == state); 3305 goto sleep; 3306 } 3307 state = oldstate; 3308 error = thread_check_susp(td, false); 3309 if (error != 0) 3310 break; 3311 } 3312 if (error != 0) { 3313 umtxq_unbusy_unlocked(&uq->uq_key); 3314 break; 3315 } 3316 3317 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3318 URWLOCK_READER_COUNT(state) == 0) { 3319 umtxq_unbusy_unlocked(&uq->uq_key); 3320 error = thread_check_susp(td, false); 3321 if (error != 0) 3322 break; 3323 continue; 3324 } 3325 sleep: 3326 rv = fueword32(&rwlock->rw_blocked_writers, 3327 &blocked_writers); 3328 if (rv == -1) { 3329 umtxq_unbusy_unlocked(&uq->uq_key); 3330 error = EFAULT; 3331 break; 3332 } 3333 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3334 3335 while ((state & URWLOCK_WRITE_OWNER) || 3336 URWLOCK_READER_COUNT(state) != 0) { 3337 umtxq_lock(&uq->uq_key); 3338 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3339 umtxq_unbusy(&uq->uq_key); 3340 3341 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3342 NULL : &timo); 3343 3344 umtxq_busy(&uq->uq_key); 3345 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3346 umtxq_unlock(&uq->uq_key); 3347 if (error) 3348 break; 3349 rv = fueword32(&rwlock->rw_state, &state); 3350 if (rv == -1) { 3351 error = EFAULT; 3352 break; 3353 } 3354 } 3355 3356 rv = fueword32(&rwlock->rw_blocked_writers, 3357 &blocked_writers); 3358 if (rv == -1) { 3359 umtxq_unbusy_unlocked(&uq->uq_key); 3360 error = EFAULT; 3361 break; 3362 } 3363 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3364 if (blocked_writers == 1) { 3365 rv = fueword32(&rwlock->rw_state, &state); 3366 if (rv == -1) { 3367 umtxq_unbusy_unlocked(&uq->uq_key); 3368 error = EFAULT; 3369 break; 3370 } 3371 for (;;) { 3372 rv = casueword32(&rwlock->rw_state, state, 3373 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3374 if (rv == -1) { 3375 error = EFAULT; 3376 break; 3377 } 3378 if (rv == 0) { 3379 MPASS(oldstate == state); 3380 break; 3381 } 3382 state = oldstate; 3383 error1 = thread_check_susp(td, false); 3384 /* 3385 * We are leaving the URWLOCK_WRITE_WAITERS 3386 * behind, but this should not harm the 3387 * correctness. 3388 */ 3389 if (error1 != 0) { 3390 if (error == 0) 3391 error = error1; 3392 break; 3393 } 3394 } 3395 rv = fueword32(&rwlock->rw_blocked_readers, 3396 &blocked_readers); 3397 if (rv == -1) { 3398 umtxq_unbusy_unlocked(&uq->uq_key); 3399 error = EFAULT; 3400 break; 3401 } 3402 } else 3403 blocked_readers = 0; 3404 3405 umtxq_unbusy_unlocked(&uq->uq_key); 3406 } 3407 3408 umtx_key_release(&uq->uq_key); 3409 if (error == ERESTART) 3410 error = EINTR; 3411 return (error); 3412 } 3413 3414 static int 3415 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3416 { 3417 struct umtx_q *uq; 3418 uint32_t flags; 3419 int32_t state, oldstate; 3420 int error, rv, q, count; 3421 3422 uq = td->td_umtxq; 3423 error = fueword32(&rwlock->rw_flags, &flags); 3424 if (error == -1) 3425 return (EFAULT); 3426 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3427 if (error != 0) 3428 return (error); 3429 3430 error = fueword32(&rwlock->rw_state, &state); 3431 if (error == -1) { 3432 error = EFAULT; 3433 goto out; 3434 } 3435 if (state & URWLOCK_WRITE_OWNER) { 3436 for (;;) { 3437 rv = casueword32(&rwlock->rw_state, state, 3438 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3439 if (rv == -1) { 3440 error = EFAULT; 3441 goto out; 3442 } 3443 if (rv == 1) { 3444 state = oldstate; 3445 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3446 error = EPERM; 3447 goto out; 3448 } 3449 error = thread_check_susp(td, true); 3450 if (error != 0) 3451 goto out; 3452 } else 3453 break; 3454 } 3455 } else if (URWLOCK_READER_COUNT(state) != 0) { 3456 for (;;) { 3457 rv = casueword32(&rwlock->rw_state, state, 3458 &oldstate, state - 1); 3459 if (rv == -1) { 3460 error = EFAULT; 3461 goto out; 3462 } 3463 if (rv == 1) { 3464 state = oldstate; 3465 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3466 error = EPERM; 3467 goto out; 3468 } 3469 error = thread_check_susp(td, true); 3470 if (error != 0) 3471 goto out; 3472 } else 3473 break; 3474 } 3475 } else { 3476 error = EPERM; 3477 goto out; 3478 } 3479 3480 count = 0; 3481 3482 if (!(flags & URWLOCK_PREFER_READER)) { 3483 if (state & URWLOCK_WRITE_WAITERS) { 3484 count = 1; 3485 q = UMTX_EXCLUSIVE_QUEUE; 3486 } else if (state & URWLOCK_READ_WAITERS) { 3487 count = INT_MAX; 3488 q = UMTX_SHARED_QUEUE; 3489 } 3490 } else { 3491 if (state & URWLOCK_READ_WAITERS) { 3492 count = INT_MAX; 3493 q = UMTX_SHARED_QUEUE; 3494 } else if (state & URWLOCK_WRITE_WAITERS) { 3495 count = 1; 3496 q = UMTX_EXCLUSIVE_QUEUE; 3497 } 3498 } 3499 3500 if (count) { 3501 umtxq_lock(&uq->uq_key); 3502 umtxq_busy(&uq->uq_key); 3503 umtxq_signal_queue(&uq->uq_key, count, q); 3504 umtxq_unbusy(&uq->uq_key); 3505 umtxq_unlock(&uq->uq_key); 3506 } 3507 out: 3508 umtx_key_release(&uq->uq_key); 3509 return (error); 3510 } 3511 3512 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3513 static int 3514 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3515 { 3516 struct umtx_abs_timeout timo; 3517 struct umtx_q *uq; 3518 uint32_t flags, count, count1; 3519 int error, rv, rv1; 3520 3521 uq = td->td_umtxq; 3522 error = fueword32(&sem->_flags, &flags); 3523 if (error == -1) 3524 return (EFAULT); 3525 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3526 if (error != 0) 3527 return (error); 3528 3529 if (timeout != NULL) 3530 umtx_abs_timeout_init2(&timo, timeout); 3531 3532 again: 3533 umtxq_lock(&uq->uq_key); 3534 umtxq_busy(&uq->uq_key); 3535 umtxq_insert(uq); 3536 umtxq_unlock(&uq->uq_key); 3537 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3538 if (rv == 0) 3539 rv1 = fueword32(&sem->_count, &count); 3540 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3541 (rv == 1 && count1 == 0)) { 3542 umtxq_lock(&uq->uq_key); 3543 umtxq_unbusy(&uq->uq_key); 3544 umtxq_remove(uq); 3545 umtxq_unlock(&uq->uq_key); 3546 if (rv == 1) { 3547 rv = thread_check_susp(td, true); 3548 if (rv == 0) 3549 goto again; 3550 error = rv; 3551 goto out; 3552 } 3553 if (rv == 0) 3554 rv = rv1; 3555 error = rv == -1 ? EFAULT : 0; 3556 goto out; 3557 } 3558 umtxq_lock(&uq->uq_key); 3559 umtxq_unbusy(&uq->uq_key); 3560 3561 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3562 3563 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3564 error = 0; 3565 else { 3566 umtxq_remove(uq); 3567 /* A relative timeout cannot be restarted. */ 3568 if (error == ERESTART && timeout != NULL && 3569 (timeout->_flags & UMTX_ABSTIME) == 0) 3570 error = EINTR; 3571 } 3572 umtxq_unlock(&uq->uq_key); 3573 out: 3574 umtx_key_release(&uq->uq_key); 3575 return (error); 3576 } 3577 3578 /* 3579 * Signal a userland semaphore. 3580 */ 3581 static int 3582 do_sem_wake(struct thread *td, struct _usem *sem) 3583 { 3584 struct umtx_key key; 3585 int error, cnt; 3586 uint32_t flags; 3587 3588 error = fueword32(&sem->_flags, &flags); 3589 if (error == -1) 3590 return (EFAULT); 3591 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3592 return (error); 3593 umtxq_lock(&key); 3594 umtxq_busy(&key); 3595 cnt = umtxq_count(&key); 3596 if (cnt > 0) { 3597 /* 3598 * Check if count is greater than 0, this means the memory is 3599 * still being referenced by user code, so we can safely 3600 * update _has_waiters flag. 3601 */ 3602 if (cnt == 1) { 3603 umtxq_unlock(&key); 3604 error = suword32(&sem->_has_waiters, 0); 3605 umtxq_lock(&key); 3606 if (error == -1) 3607 error = EFAULT; 3608 } 3609 umtxq_signal(&key, 1); 3610 } 3611 umtxq_unbusy(&key); 3612 umtxq_unlock(&key); 3613 umtx_key_release(&key); 3614 return (error); 3615 } 3616 #endif 3617 3618 static int 3619 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3620 { 3621 struct umtx_abs_timeout timo; 3622 struct umtx_q *uq; 3623 uint32_t count, flags; 3624 int error, rv; 3625 3626 uq = td->td_umtxq; 3627 flags = fuword32(&sem->_flags); 3628 if (timeout != NULL) 3629 umtx_abs_timeout_init2(&timo, timeout); 3630 3631 again: 3632 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3633 if (error != 0) 3634 return (error); 3635 umtxq_lock(&uq->uq_key); 3636 umtxq_busy(&uq->uq_key); 3637 umtxq_insert(uq); 3638 umtxq_unlock(&uq->uq_key); 3639 rv = fueword32(&sem->_count, &count); 3640 if (rv == -1) { 3641 umtxq_lock(&uq->uq_key); 3642 umtxq_unbusy(&uq->uq_key); 3643 umtxq_remove(uq); 3644 umtxq_unlock(&uq->uq_key); 3645 umtx_key_release(&uq->uq_key); 3646 return (EFAULT); 3647 } 3648 for (;;) { 3649 if (USEM_COUNT(count) != 0) { 3650 umtxq_lock(&uq->uq_key); 3651 umtxq_unbusy(&uq->uq_key); 3652 umtxq_remove(uq); 3653 umtxq_unlock(&uq->uq_key); 3654 umtx_key_release(&uq->uq_key); 3655 return (0); 3656 } 3657 if (count == USEM_HAS_WAITERS) 3658 break; 3659 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3660 if (rv == 0) 3661 break; 3662 umtxq_lock(&uq->uq_key); 3663 umtxq_unbusy(&uq->uq_key); 3664 umtxq_remove(uq); 3665 umtxq_unlock(&uq->uq_key); 3666 umtx_key_release(&uq->uq_key); 3667 if (rv == -1) 3668 return (EFAULT); 3669 rv = thread_check_susp(td, true); 3670 if (rv != 0) 3671 return (rv); 3672 goto again; 3673 } 3674 umtxq_lock(&uq->uq_key); 3675 umtxq_unbusy(&uq->uq_key); 3676 3677 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3678 3679 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3680 error = 0; 3681 else { 3682 umtxq_remove(uq); 3683 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3684 /* A relative timeout cannot be restarted. */ 3685 if (error == ERESTART) 3686 error = EINTR; 3687 if (error == EINTR) { 3688 kern_clock_gettime(curthread, timo.clockid, 3689 &timo.cur); 3690 timespecsub(&timo.end, &timo.cur, 3691 &timeout->_timeout); 3692 } 3693 } 3694 } 3695 umtxq_unlock(&uq->uq_key); 3696 umtx_key_release(&uq->uq_key); 3697 return (error); 3698 } 3699 3700 /* 3701 * Signal a userland semaphore. 3702 */ 3703 static int 3704 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3705 { 3706 struct umtx_key key; 3707 int error, cnt, rv; 3708 uint32_t count, flags; 3709 3710 rv = fueword32(&sem->_flags, &flags); 3711 if (rv == -1) 3712 return (EFAULT); 3713 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3714 return (error); 3715 umtxq_lock(&key); 3716 umtxq_busy(&key); 3717 cnt = umtxq_count(&key); 3718 if (cnt > 0) { 3719 /* 3720 * If this was the last sleeping thread, clear the waiters 3721 * flag in _count. 3722 */ 3723 if (cnt == 1) { 3724 umtxq_unlock(&key); 3725 rv = fueword32(&sem->_count, &count); 3726 while (rv != -1 && count & USEM_HAS_WAITERS) { 3727 rv = casueword32(&sem->_count, count, &count, 3728 count & ~USEM_HAS_WAITERS); 3729 if (rv == 1) { 3730 rv = thread_check_susp(td, true); 3731 if (rv != 0) 3732 break; 3733 } 3734 } 3735 if (rv == -1) 3736 error = EFAULT; 3737 else if (rv > 0) { 3738 error = rv; 3739 } 3740 umtxq_lock(&key); 3741 } 3742 3743 umtxq_signal(&key, 1); 3744 } 3745 umtxq_unbusy(&key); 3746 umtxq_unlock(&key); 3747 umtx_key_release(&key); 3748 return (error); 3749 } 3750 3751 #ifdef COMPAT_FREEBSD10 3752 int 3753 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3754 { 3755 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3756 } 3757 3758 int 3759 freebsd10__umtx_unlock(struct thread *td, 3760 struct freebsd10__umtx_unlock_args *uap) 3761 { 3762 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3763 } 3764 #endif 3765 3766 inline int 3767 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3768 { 3769 int error; 3770 3771 error = copyin(uaddr, tsp, sizeof(*tsp)); 3772 if (error == 0) { 3773 if (tsp->tv_sec < 0 || 3774 tsp->tv_nsec >= 1000000000 || 3775 tsp->tv_nsec < 0) 3776 error = EINVAL; 3777 } 3778 return (error); 3779 } 3780 3781 static inline int 3782 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3783 { 3784 int error; 3785 3786 if (size <= sizeof(tp->_timeout)) { 3787 tp->_clockid = CLOCK_REALTIME; 3788 tp->_flags = 0; 3789 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3790 } else 3791 error = copyin(uaddr, tp, sizeof(*tp)); 3792 if (error != 0) 3793 return (error); 3794 if (tp->_timeout.tv_sec < 0 || 3795 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3796 return (EINVAL); 3797 return (0); 3798 } 3799 3800 static int 3801 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3802 struct umtx_robust_lists_params *rb) 3803 { 3804 3805 if (size > sizeof(*rb)) 3806 return (EINVAL); 3807 return (copyin(uaddr, rb, size)); 3808 } 3809 3810 static int 3811 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3812 { 3813 3814 /* 3815 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3816 * and we're only called if sz >= sizeof(timespec) as supplied in the 3817 * copyops. 3818 */ 3819 KASSERT(sz >= sizeof(*tsp), 3820 ("umtx_copyops specifies incorrect sizes")); 3821 3822 return (copyout(tsp, uaddr, sizeof(*tsp))); 3823 } 3824 3825 #ifdef COMPAT_FREEBSD10 3826 static int 3827 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3828 const struct umtx_copyops *ops) 3829 { 3830 struct timespec *ts, timeout; 3831 int error; 3832 3833 /* Allow a null timespec (wait forever). */ 3834 if (uap->uaddr2 == NULL) 3835 ts = NULL; 3836 else { 3837 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3838 if (error != 0) 3839 return (error); 3840 ts = &timeout; 3841 } 3842 #ifdef COMPAT_FREEBSD32 3843 if (ops->compat32) 3844 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3845 #endif 3846 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3847 } 3848 3849 static int 3850 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3851 const struct umtx_copyops *ops) 3852 { 3853 #ifdef COMPAT_FREEBSD32 3854 if (ops->compat32) 3855 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3856 #endif 3857 return (do_unlock_umtx(td, uap->obj, uap->val)); 3858 } 3859 #endif /* COMPAT_FREEBSD10 */ 3860 3861 #if !defined(COMPAT_FREEBSD10) 3862 static int 3863 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3864 const struct umtx_copyops *ops __unused) 3865 { 3866 return (EOPNOTSUPP); 3867 } 3868 #endif /* COMPAT_FREEBSD10 */ 3869 3870 static int 3871 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3872 const struct umtx_copyops *ops) 3873 { 3874 struct _umtx_time timeout, *tm_p; 3875 int error; 3876 3877 if (uap->uaddr2 == NULL) 3878 tm_p = NULL; 3879 else { 3880 error = ops->copyin_umtx_time( 3881 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3882 if (error != 0) 3883 return (error); 3884 tm_p = &timeout; 3885 } 3886 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3887 } 3888 3889 static int 3890 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3891 const struct umtx_copyops *ops) 3892 { 3893 struct _umtx_time timeout, *tm_p; 3894 int error; 3895 3896 if (uap->uaddr2 == NULL) 3897 tm_p = NULL; 3898 else { 3899 error = ops->copyin_umtx_time( 3900 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3901 if (error != 0) 3902 return (error); 3903 tm_p = &timeout; 3904 } 3905 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3906 } 3907 3908 static int 3909 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3910 const struct umtx_copyops *ops) 3911 { 3912 struct _umtx_time *tm_p, timeout; 3913 int error; 3914 3915 if (uap->uaddr2 == NULL) 3916 tm_p = NULL; 3917 else { 3918 error = ops->copyin_umtx_time( 3919 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3920 if (error != 0) 3921 return (error); 3922 tm_p = &timeout; 3923 } 3924 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3925 } 3926 3927 static int 3928 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3929 const struct umtx_copyops *ops __unused) 3930 { 3931 3932 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3933 } 3934 3935 #define BATCH_SIZE 128 3936 static int 3937 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3938 { 3939 char *uaddrs[BATCH_SIZE], **upp; 3940 int count, error, i, pos, tocopy; 3941 3942 upp = (char **)uap->obj; 3943 error = 0; 3944 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3945 pos += tocopy) { 3946 tocopy = MIN(count, BATCH_SIZE); 3947 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3948 if (error != 0) 3949 break; 3950 for (i = 0; i < tocopy; ++i) { 3951 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3952 } 3953 maybe_yield(); 3954 } 3955 return (error); 3956 } 3957 3958 static int 3959 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3960 { 3961 uint32_t uaddrs[BATCH_SIZE], *upp; 3962 int count, error, i, pos, tocopy; 3963 3964 upp = (uint32_t *)uap->obj; 3965 error = 0; 3966 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3967 pos += tocopy) { 3968 tocopy = MIN(count, BATCH_SIZE); 3969 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3970 if (error != 0) 3971 break; 3972 for (i = 0; i < tocopy; ++i) { 3973 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3974 INT_MAX, 1); 3975 } 3976 maybe_yield(); 3977 } 3978 return (error); 3979 } 3980 3981 static int 3982 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3983 const struct umtx_copyops *ops) 3984 { 3985 3986 if (ops->compat32) 3987 return (__umtx_op_nwake_private_compat32(td, uap)); 3988 return (__umtx_op_nwake_private_native(td, uap)); 3989 } 3990 3991 static int 3992 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3993 const struct umtx_copyops *ops __unused) 3994 { 3995 3996 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3997 } 3998 3999 static int 4000 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4001 const struct umtx_copyops *ops) 4002 { 4003 struct _umtx_time *tm_p, timeout; 4004 int error; 4005 4006 /* Allow a null timespec (wait forever). */ 4007 if (uap->uaddr2 == NULL) 4008 tm_p = NULL; 4009 else { 4010 error = ops->copyin_umtx_time( 4011 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4012 if (error != 0) 4013 return (error); 4014 tm_p = &timeout; 4015 } 4016 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4017 } 4018 4019 static int 4020 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4021 const struct umtx_copyops *ops __unused) 4022 { 4023 4024 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4025 } 4026 4027 static int 4028 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4029 const struct umtx_copyops *ops) 4030 { 4031 struct _umtx_time *tm_p, timeout; 4032 int error; 4033 4034 /* Allow a null timespec (wait forever). */ 4035 if (uap->uaddr2 == NULL) 4036 tm_p = NULL; 4037 else { 4038 error = ops->copyin_umtx_time( 4039 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4040 if (error != 0) 4041 return (error); 4042 tm_p = &timeout; 4043 } 4044 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4045 } 4046 4047 static int 4048 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4049 const struct umtx_copyops *ops __unused) 4050 { 4051 4052 return (do_wake_umutex(td, uap->obj)); 4053 } 4054 4055 static int 4056 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4057 const struct umtx_copyops *ops __unused) 4058 { 4059 4060 return (do_unlock_umutex(td, uap->obj, false)); 4061 } 4062 4063 static int 4064 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4065 const struct umtx_copyops *ops __unused) 4066 { 4067 4068 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4069 } 4070 4071 static int 4072 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4073 const struct umtx_copyops *ops) 4074 { 4075 struct timespec *ts, timeout; 4076 int error; 4077 4078 /* Allow a null timespec (wait forever). */ 4079 if (uap->uaddr2 == NULL) 4080 ts = NULL; 4081 else { 4082 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4083 if (error != 0) 4084 return (error); 4085 ts = &timeout; 4086 } 4087 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4088 } 4089 4090 static int 4091 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4092 const struct umtx_copyops *ops __unused) 4093 { 4094 4095 return (do_cv_signal(td, uap->obj)); 4096 } 4097 4098 static int 4099 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4100 const struct umtx_copyops *ops __unused) 4101 { 4102 4103 return (do_cv_broadcast(td, uap->obj)); 4104 } 4105 4106 static int 4107 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4108 const struct umtx_copyops *ops) 4109 { 4110 struct _umtx_time timeout; 4111 int error; 4112 4113 /* Allow a null timespec (wait forever). */ 4114 if (uap->uaddr2 == NULL) { 4115 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4116 } else { 4117 error = ops->copyin_umtx_time(uap->uaddr2, 4118 (size_t)uap->uaddr1, &timeout); 4119 if (error != 0) 4120 return (error); 4121 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4122 } 4123 return (error); 4124 } 4125 4126 static int 4127 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4128 const struct umtx_copyops *ops) 4129 { 4130 struct _umtx_time timeout; 4131 int error; 4132 4133 /* Allow a null timespec (wait forever). */ 4134 if (uap->uaddr2 == NULL) { 4135 error = do_rw_wrlock(td, uap->obj, 0); 4136 } else { 4137 error = ops->copyin_umtx_time(uap->uaddr2, 4138 (size_t)uap->uaddr1, &timeout); 4139 if (error != 0) 4140 return (error); 4141 4142 error = do_rw_wrlock(td, uap->obj, &timeout); 4143 } 4144 return (error); 4145 } 4146 4147 static int 4148 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4149 const struct umtx_copyops *ops __unused) 4150 { 4151 4152 return (do_rw_unlock(td, uap->obj)); 4153 } 4154 4155 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4156 static int 4157 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4158 const struct umtx_copyops *ops) 4159 { 4160 struct _umtx_time *tm_p, timeout; 4161 int error; 4162 4163 /* Allow a null timespec (wait forever). */ 4164 if (uap->uaddr2 == NULL) 4165 tm_p = NULL; 4166 else { 4167 error = ops->copyin_umtx_time( 4168 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4169 if (error != 0) 4170 return (error); 4171 tm_p = &timeout; 4172 } 4173 return (do_sem_wait(td, uap->obj, tm_p)); 4174 } 4175 4176 static int 4177 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4178 const struct umtx_copyops *ops __unused) 4179 { 4180 4181 return (do_sem_wake(td, uap->obj)); 4182 } 4183 #endif 4184 4185 static int 4186 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4187 const struct umtx_copyops *ops __unused) 4188 { 4189 4190 return (do_wake2_umutex(td, uap->obj, uap->val)); 4191 } 4192 4193 static int 4194 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4195 const struct umtx_copyops *ops) 4196 { 4197 struct _umtx_time *tm_p, timeout; 4198 size_t uasize; 4199 int error; 4200 4201 /* Allow a null timespec (wait forever). */ 4202 if (uap->uaddr2 == NULL) { 4203 uasize = 0; 4204 tm_p = NULL; 4205 } else { 4206 uasize = (size_t)uap->uaddr1; 4207 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4208 if (error != 0) 4209 return (error); 4210 tm_p = &timeout; 4211 } 4212 error = do_sem2_wait(td, uap->obj, tm_p); 4213 if (error == EINTR && uap->uaddr2 != NULL && 4214 (timeout._flags & UMTX_ABSTIME) == 0 && 4215 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4216 error = ops->copyout_timeout( 4217 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4218 uasize - ops->umtx_time_sz, &timeout._timeout); 4219 if (error == 0) { 4220 error = EINTR; 4221 } 4222 } 4223 4224 return (error); 4225 } 4226 4227 static int 4228 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4229 const struct umtx_copyops *ops __unused) 4230 { 4231 4232 return (do_sem2_wake(td, uap->obj)); 4233 } 4234 4235 #define USHM_OBJ_UMTX(o) \ 4236 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4237 4238 #define USHMF_REG_LINKED 0x0001 4239 #define USHMF_OBJ_LINKED 0x0002 4240 struct umtx_shm_reg { 4241 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4242 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4243 struct umtx_key ushm_key; 4244 struct ucred *ushm_cred; 4245 struct shmfd *ushm_obj; 4246 u_int ushm_refcnt; 4247 u_int ushm_flags; 4248 }; 4249 4250 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4251 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4252 4253 static uma_zone_t umtx_shm_reg_zone; 4254 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4255 static struct mtx umtx_shm_lock; 4256 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4257 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4258 4259 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4260 4261 static void 4262 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4263 { 4264 struct umtx_shm_reg_head d; 4265 struct umtx_shm_reg *reg, *reg1; 4266 4267 TAILQ_INIT(&d); 4268 mtx_lock(&umtx_shm_lock); 4269 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4270 mtx_unlock(&umtx_shm_lock); 4271 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4272 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4273 umtx_shm_free_reg(reg); 4274 } 4275 } 4276 4277 static struct task umtx_shm_reg_delfree_task = 4278 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4279 4280 static struct umtx_shm_reg * 4281 umtx_shm_find_reg_locked(const struct umtx_key *key) 4282 { 4283 struct umtx_shm_reg *reg; 4284 struct umtx_shm_reg_head *reg_head; 4285 4286 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4287 mtx_assert(&umtx_shm_lock, MA_OWNED); 4288 reg_head = &umtx_shm_registry[key->hash]; 4289 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4290 KASSERT(reg->ushm_key.shared, 4291 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4292 if (reg->ushm_key.info.shared.object == 4293 key->info.shared.object && 4294 reg->ushm_key.info.shared.offset == 4295 key->info.shared.offset) { 4296 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4297 KASSERT(reg->ushm_refcnt > 0, 4298 ("reg %p refcnt 0 onlist", reg)); 4299 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4300 ("reg %p not linked", reg)); 4301 reg->ushm_refcnt++; 4302 return (reg); 4303 } 4304 } 4305 return (NULL); 4306 } 4307 4308 static struct umtx_shm_reg * 4309 umtx_shm_find_reg(const struct umtx_key *key) 4310 { 4311 struct umtx_shm_reg *reg; 4312 4313 mtx_lock(&umtx_shm_lock); 4314 reg = umtx_shm_find_reg_locked(key); 4315 mtx_unlock(&umtx_shm_lock); 4316 return (reg); 4317 } 4318 4319 static void 4320 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4321 { 4322 4323 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4324 crfree(reg->ushm_cred); 4325 shm_drop(reg->ushm_obj); 4326 uma_zfree(umtx_shm_reg_zone, reg); 4327 } 4328 4329 static bool 4330 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4331 { 4332 bool res; 4333 4334 mtx_assert(&umtx_shm_lock, MA_OWNED); 4335 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4336 reg->ushm_refcnt--; 4337 res = reg->ushm_refcnt == 0; 4338 if (res || force) { 4339 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4340 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4341 reg, ushm_reg_link); 4342 reg->ushm_flags &= ~USHMF_REG_LINKED; 4343 } 4344 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4345 LIST_REMOVE(reg, ushm_obj_link); 4346 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4347 } 4348 } 4349 return (res); 4350 } 4351 4352 static void 4353 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4354 { 4355 vm_object_t object; 4356 bool dofree; 4357 4358 if (force) { 4359 object = reg->ushm_obj->shm_object; 4360 VM_OBJECT_WLOCK(object); 4361 object->flags |= OBJ_UMTXDEAD; 4362 VM_OBJECT_WUNLOCK(object); 4363 } 4364 mtx_lock(&umtx_shm_lock); 4365 dofree = umtx_shm_unref_reg_locked(reg, force); 4366 mtx_unlock(&umtx_shm_lock); 4367 if (dofree) 4368 umtx_shm_free_reg(reg); 4369 } 4370 4371 void 4372 umtx_shm_object_init(vm_object_t object) 4373 { 4374 4375 LIST_INIT(USHM_OBJ_UMTX(object)); 4376 } 4377 4378 void 4379 umtx_shm_object_terminated(vm_object_t object) 4380 { 4381 struct umtx_shm_reg *reg, *reg1; 4382 bool dofree; 4383 4384 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4385 return; 4386 4387 dofree = false; 4388 mtx_lock(&umtx_shm_lock); 4389 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4390 if (umtx_shm_unref_reg_locked(reg, true)) { 4391 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4392 ushm_reg_link); 4393 dofree = true; 4394 } 4395 } 4396 mtx_unlock(&umtx_shm_lock); 4397 if (dofree) 4398 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4399 } 4400 4401 static int 4402 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4403 struct umtx_shm_reg **res) 4404 { 4405 struct umtx_shm_reg *reg, *reg1; 4406 struct ucred *cred; 4407 int error; 4408 4409 reg = umtx_shm_find_reg(key); 4410 if (reg != NULL) { 4411 *res = reg; 4412 return (0); 4413 } 4414 cred = td->td_ucred; 4415 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4416 return (ENOMEM); 4417 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4418 reg->ushm_refcnt = 1; 4419 bcopy(key, ®->ushm_key, sizeof(*key)); 4420 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4421 reg->ushm_cred = crhold(cred); 4422 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4423 if (error != 0) { 4424 umtx_shm_free_reg(reg); 4425 return (error); 4426 } 4427 mtx_lock(&umtx_shm_lock); 4428 reg1 = umtx_shm_find_reg_locked(key); 4429 if (reg1 != NULL) { 4430 mtx_unlock(&umtx_shm_lock); 4431 umtx_shm_free_reg(reg); 4432 *res = reg1; 4433 return (0); 4434 } 4435 reg->ushm_refcnt++; 4436 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4437 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4438 ushm_obj_link); 4439 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4440 mtx_unlock(&umtx_shm_lock); 4441 *res = reg; 4442 return (0); 4443 } 4444 4445 static int 4446 umtx_shm_alive(struct thread *td, void *addr) 4447 { 4448 vm_map_t map; 4449 vm_map_entry_t entry; 4450 vm_object_t object; 4451 vm_pindex_t pindex; 4452 vm_prot_t prot; 4453 int res, ret; 4454 boolean_t wired; 4455 4456 map = &td->td_proc->p_vmspace->vm_map; 4457 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4458 &object, &pindex, &prot, &wired); 4459 if (res != KERN_SUCCESS) 4460 return (EFAULT); 4461 if (object == NULL) 4462 ret = EINVAL; 4463 else 4464 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4465 vm_map_lookup_done(map, entry); 4466 return (ret); 4467 } 4468 4469 static void 4470 umtx_shm_init(void) 4471 { 4472 int i; 4473 4474 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4475 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4476 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4477 for (i = 0; i < nitems(umtx_shm_registry); i++) 4478 TAILQ_INIT(&umtx_shm_registry[i]); 4479 } 4480 4481 static int 4482 umtx_shm(struct thread *td, void *addr, u_int flags) 4483 { 4484 struct umtx_key key; 4485 struct umtx_shm_reg *reg; 4486 struct file *fp; 4487 int error, fd; 4488 4489 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4490 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4491 return (EINVAL); 4492 if ((flags & UMTX_SHM_ALIVE) != 0) 4493 return (umtx_shm_alive(td, addr)); 4494 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4495 if (error != 0) 4496 return (error); 4497 KASSERT(key.shared == 1, ("non-shared key")); 4498 if ((flags & UMTX_SHM_CREAT) != 0) { 4499 error = umtx_shm_create_reg(td, &key, ®); 4500 } else { 4501 reg = umtx_shm_find_reg(&key); 4502 if (reg == NULL) 4503 error = ESRCH; 4504 } 4505 umtx_key_release(&key); 4506 if (error != 0) 4507 return (error); 4508 KASSERT(reg != NULL, ("no reg")); 4509 if ((flags & UMTX_SHM_DESTROY) != 0) { 4510 umtx_shm_unref_reg(reg, true); 4511 } else { 4512 #if 0 4513 #ifdef MAC 4514 error = mac_posixshm_check_open(td->td_ucred, 4515 reg->ushm_obj, FFLAGS(O_RDWR)); 4516 if (error == 0) 4517 #endif 4518 error = shm_access(reg->ushm_obj, td->td_ucred, 4519 FFLAGS(O_RDWR)); 4520 if (error == 0) 4521 #endif 4522 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4523 if (error == 0) { 4524 shm_hold(reg->ushm_obj); 4525 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4526 &shm_ops); 4527 td->td_retval[0] = fd; 4528 fdrop(fp, td); 4529 } 4530 } 4531 umtx_shm_unref_reg(reg, false); 4532 return (error); 4533 } 4534 4535 static int 4536 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4537 const struct umtx_copyops *ops __unused) 4538 { 4539 4540 return (umtx_shm(td, uap->uaddr1, uap->val)); 4541 } 4542 4543 static int 4544 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4545 const struct umtx_copyops *ops) 4546 { 4547 struct umtx_robust_lists_params rb; 4548 int error; 4549 4550 if (ops->compat32) { 4551 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4552 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4553 td->td_rb_inact != 0)) 4554 return (EBUSY); 4555 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4556 return (EBUSY); 4557 } 4558 4559 bzero(&rb, sizeof(rb)); 4560 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4561 if (error != 0) 4562 return (error); 4563 4564 if (ops->compat32) 4565 td->td_pflags2 |= TDP2_COMPAT32RB; 4566 4567 td->td_rb_list = rb.robust_list_offset; 4568 td->td_rbp_list = rb.robust_priv_list_offset; 4569 td->td_rb_inact = rb.robust_inact_offset; 4570 return (0); 4571 } 4572 4573 #if defined(__i386__) || defined(__amd64__) 4574 /* 4575 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4576 * 32-bit time_t there. Other architectures just need the i386 definitions 4577 * along with their standard compat32. 4578 */ 4579 struct timespecx32 { 4580 int64_t tv_sec; 4581 int32_t tv_nsec; 4582 }; 4583 4584 struct umtx_timex32 { 4585 struct timespecx32 _timeout; 4586 uint32_t _flags; 4587 uint32_t _clockid; 4588 }; 4589 4590 #ifndef __i386__ 4591 #define timespeci386 timespec32 4592 #define umtx_timei386 umtx_time32 4593 #endif 4594 #else /* !__i386__ && !__amd64__ */ 4595 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4596 struct timespeci386 { 4597 int32_t tv_sec; 4598 int32_t tv_nsec; 4599 }; 4600 4601 struct umtx_timei386 { 4602 struct timespeci386 _timeout; 4603 uint32_t _flags; 4604 uint32_t _clockid; 4605 }; 4606 4607 #if defined(__LP64__) 4608 #define timespecx32 timespec32 4609 #define umtx_timex32 umtx_time32 4610 #endif 4611 #endif 4612 4613 static int 4614 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4615 struct umtx_robust_lists_params *rbp) 4616 { 4617 struct umtx_robust_lists_params_compat32 rb32; 4618 int error; 4619 4620 if (size > sizeof(rb32)) 4621 return (EINVAL); 4622 bzero(&rb32, sizeof(rb32)); 4623 error = copyin(uaddr, &rb32, size); 4624 if (error != 0) 4625 return (error); 4626 CP(rb32, *rbp, robust_list_offset); 4627 CP(rb32, *rbp, robust_priv_list_offset); 4628 CP(rb32, *rbp, robust_inact_offset); 4629 return (0); 4630 } 4631 4632 #ifndef __i386__ 4633 static inline int 4634 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4635 { 4636 struct timespeci386 ts32; 4637 int error; 4638 4639 error = copyin(uaddr, &ts32, sizeof(ts32)); 4640 if (error == 0) { 4641 if (ts32.tv_sec < 0 || 4642 ts32.tv_nsec >= 1000000000 || 4643 ts32.tv_nsec < 0) 4644 error = EINVAL; 4645 else { 4646 CP(ts32, *tsp, tv_sec); 4647 CP(ts32, *tsp, tv_nsec); 4648 } 4649 } 4650 return (error); 4651 } 4652 4653 static inline int 4654 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4655 { 4656 struct umtx_timei386 t32; 4657 int error; 4658 4659 t32._clockid = CLOCK_REALTIME; 4660 t32._flags = 0; 4661 if (size <= sizeof(t32._timeout)) 4662 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4663 else 4664 error = copyin(uaddr, &t32, sizeof(t32)); 4665 if (error != 0) 4666 return (error); 4667 if (t32._timeout.tv_sec < 0 || 4668 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4669 return (EINVAL); 4670 TS_CP(t32, *tp, _timeout); 4671 CP(t32, *tp, _flags); 4672 CP(t32, *tp, _clockid); 4673 return (0); 4674 } 4675 4676 static int 4677 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4678 { 4679 struct timespeci386 remain32 = { 4680 .tv_sec = tsp->tv_sec, 4681 .tv_nsec = tsp->tv_nsec, 4682 }; 4683 4684 /* 4685 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4686 * and we're only called if sz >= sizeof(timespec) as supplied in the 4687 * copyops. 4688 */ 4689 KASSERT(sz >= sizeof(remain32), 4690 ("umtx_copyops specifies incorrect sizes")); 4691 4692 return (copyout(&remain32, uaddr, sizeof(remain32))); 4693 } 4694 #endif /* !__i386__ */ 4695 4696 #if defined(__i386__) || defined(__LP64__) 4697 static inline int 4698 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4699 { 4700 struct timespecx32 ts32; 4701 int error; 4702 4703 error = copyin(uaddr, &ts32, sizeof(ts32)); 4704 if (error == 0) { 4705 if (ts32.tv_sec < 0 || 4706 ts32.tv_nsec >= 1000000000 || 4707 ts32.tv_nsec < 0) 4708 error = EINVAL; 4709 else { 4710 CP(ts32, *tsp, tv_sec); 4711 CP(ts32, *tsp, tv_nsec); 4712 } 4713 } 4714 return (error); 4715 } 4716 4717 static inline int 4718 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4719 { 4720 struct umtx_timex32 t32; 4721 int error; 4722 4723 t32._clockid = CLOCK_REALTIME; 4724 t32._flags = 0; 4725 if (size <= sizeof(t32._timeout)) 4726 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4727 else 4728 error = copyin(uaddr, &t32, sizeof(t32)); 4729 if (error != 0) 4730 return (error); 4731 if (t32._timeout.tv_sec < 0 || 4732 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4733 return (EINVAL); 4734 TS_CP(t32, *tp, _timeout); 4735 CP(t32, *tp, _flags); 4736 CP(t32, *tp, _clockid); 4737 return (0); 4738 } 4739 4740 static int 4741 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4742 { 4743 struct timespecx32 remain32 = { 4744 .tv_sec = tsp->tv_sec, 4745 .tv_nsec = tsp->tv_nsec, 4746 }; 4747 4748 /* 4749 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4750 * and we're only called if sz >= sizeof(timespec) as supplied in the 4751 * copyops. 4752 */ 4753 KASSERT(sz >= sizeof(remain32), 4754 ("umtx_copyops specifies incorrect sizes")); 4755 4756 return (copyout(&remain32, uaddr, sizeof(remain32))); 4757 } 4758 #endif /* __i386__ || __LP64__ */ 4759 4760 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4761 const struct umtx_copyops *umtx_ops); 4762 4763 static const _umtx_op_func op_table[] = { 4764 #ifdef COMPAT_FREEBSD10 4765 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4766 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4767 #else 4768 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4769 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4770 #endif 4771 [UMTX_OP_WAIT] = __umtx_op_wait, 4772 [UMTX_OP_WAKE] = __umtx_op_wake, 4773 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4774 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4775 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4776 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4777 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4778 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4779 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4780 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4781 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4782 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4783 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4784 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4785 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4786 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4787 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4788 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4789 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4790 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4791 #else 4792 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4793 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4794 #endif 4795 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4796 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4797 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4798 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4799 [UMTX_OP_SHM] = __umtx_op_shm, 4800 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4801 }; 4802 4803 static const struct umtx_copyops umtx_native_ops = { 4804 .copyin_timeout = umtx_copyin_timeout, 4805 .copyin_umtx_time = umtx_copyin_umtx_time, 4806 .copyin_robust_lists = umtx_copyin_robust_lists, 4807 .copyout_timeout = umtx_copyout_timeout, 4808 .timespec_sz = sizeof(struct timespec), 4809 .umtx_time_sz = sizeof(struct _umtx_time), 4810 }; 4811 4812 #ifndef __i386__ 4813 static const struct umtx_copyops umtx_native_opsi386 = { 4814 .copyin_timeout = umtx_copyin_timeouti386, 4815 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4816 .copyin_robust_lists = umtx_copyin_robust_lists32, 4817 .copyout_timeout = umtx_copyout_timeouti386, 4818 .timespec_sz = sizeof(struct timespeci386), 4819 .umtx_time_sz = sizeof(struct umtx_timei386), 4820 .compat32 = true, 4821 }; 4822 #endif 4823 4824 #if defined(__i386__) || defined(__LP64__) 4825 /* i386 can emulate other 32-bit archs, too! */ 4826 static const struct umtx_copyops umtx_native_opsx32 = { 4827 .copyin_timeout = umtx_copyin_timeoutx32, 4828 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4829 .copyin_robust_lists = umtx_copyin_robust_lists32, 4830 .copyout_timeout = umtx_copyout_timeoutx32, 4831 .timespec_sz = sizeof(struct timespecx32), 4832 .umtx_time_sz = sizeof(struct umtx_timex32), 4833 .compat32 = true, 4834 }; 4835 4836 #ifdef COMPAT_FREEBSD32 4837 #ifdef __amd64__ 4838 #define umtx_native_ops32 umtx_native_opsi386 4839 #else 4840 #define umtx_native_ops32 umtx_native_opsx32 4841 #endif 4842 #endif /* COMPAT_FREEBSD32 */ 4843 #endif /* __i386__ || __LP64__ */ 4844 4845 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4846 4847 static int 4848 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4849 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4850 { 4851 struct _umtx_op_args uap = { 4852 .obj = obj, 4853 .op = op & ~UMTX_OP__FLAGS, 4854 .val = val, 4855 .uaddr1 = uaddr1, 4856 .uaddr2 = uaddr2 4857 }; 4858 4859 if ((uap.op >= nitems(op_table))) 4860 return (EINVAL); 4861 return ((*op_table[uap.op])(td, &uap, ops)); 4862 } 4863 4864 int 4865 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4866 { 4867 static const struct umtx_copyops *umtx_ops; 4868 4869 umtx_ops = &umtx_native_ops; 4870 #ifdef __LP64__ 4871 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4872 if ((uap->op & UMTX_OP__I386) != 0) 4873 umtx_ops = &umtx_native_opsi386; 4874 else 4875 umtx_ops = &umtx_native_opsx32; 4876 } 4877 #elif !defined(__i386__) 4878 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4879 if ((uap->op & UMTX_OP__I386) != 0) 4880 umtx_ops = &umtx_native_opsi386; 4881 #else 4882 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4883 if ((uap->op & UMTX_OP__32BIT) != 0) 4884 umtx_ops = &umtx_native_opsx32; 4885 #endif 4886 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4887 uap->uaddr2, umtx_ops)); 4888 } 4889 4890 #ifdef COMPAT_FREEBSD32 4891 #ifdef COMPAT_FREEBSD10 4892 int 4893 freebsd10_freebsd32__umtx_lock(struct thread *td, 4894 struct freebsd10_freebsd32__umtx_lock_args *uap) 4895 { 4896 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4897 } 4898 4899 int 4900 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4901 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4902 { 4903 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4904 } 4905 #endif /* COMPAT_FREEBSD10 */ 4906 4907 int 4908 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4909 { 4910 4911 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4912 uap->uaddr2, &umtx_native_ops32)); 4913 } 4914 #endif /* COMPAT_FREEBSD32 */ 4915 4916 void 4917 umtx_thread_init(struct thread *td) 4918 { 4919 4920 td->td_umtxq = umtxq_alloc(); 4921 td->td_umtxq->uq_thread = td; 4922 } 4923 4924 void 4925 umtx_thread_fini(struct thread *td) 4926 { 4927 4928 umtxq_free(td->td_umtxq); 4929 } 4930 4931 /* 4932 * It will be called when new thread is created, e.g fork(). 4933 */ 4934 void 4935 umtx_thread_alloc(struct thread *td) 4936 { 4937 struct umtx_q *uq; 4938 4939 uq = td->td_umtxq; 4940 uq->uq_inherited_pri = PRI_MAX; 4941 4942 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4943 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4944 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4945 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4946 } 4947 4948 /* 4949 * exec() hook. 4950 * 4951 * Clear robust lists for all process' threads, not delaying the 4952 * cleanup to thread exit, since the relevant address space is 4953 * destroyed right now. 4954 */ 4955 void 4956 umtx_exec(struct proc *p) 4957 { 4958 struct thread *td; 4959 4960 KASSERT(p == curproc, ("need curproc")); 4961 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4962 (p->p_flag & P_STOPPED_SINGLE) != 0, 4963 ("curproc must be single-threaded")); 4964 /* 4965 * There is no need to lock the list as only this thread can be 4966 * running. 4967 */ 4968 FOREACH_THREAD_IN_PROC(p, td) { 4969 KASSERT(td == curthread || 4970 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4971 ("running thread %p %p", p, td)); 4972 umtx_thread_cleanup(td); 4973 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4974 } 4975 } 4976 4977 /* 4978 * thread exit hook. 4979 */ 4980 void 4981 umtx_thread_exit(struct thread *td) 4982 { 4983 4984 umtx_thread_cleanup(td); 4985 } 4986 4987 static int 4988 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4989 { 4990 u_long res1; 4991 uint32_t res32; 4992 int error; 4993 4994 if (compat32) { 4995 error = fueword32((void *)ptr, &res32); 4996 if (error == 0) 4997 res1 = res32; 4998 } else { 4999 error = fueword((void *)ptr, &res1); 5000 } 5001 if (error == 0) 5002 *res = res1; 5003 else 5004 error = EFAULT; 5005 return (error); 5006 } 5007 5008 static void 5009 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5010 bool compat32) 5011 { 5012 struct umutex32 m32; 5013 5014 if (compat32) { 5015 memcpy(&m32, m, sizeof(m32)); 5016 *rb_list = m32.m_rb_lnk; 5017 } else { 5018 *rb_list = m->m_rb_lnk; 5019 } 5020 } 5021 5022 static int 5023 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5024 bool compat32) 5025 { 5026 struct umutex m; 5027 int error; 5028 5029 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5030 error = copyin((void *)rbp, &m, sizeof(m)); 5031 if (error != 0) 5032 return (error); 5033 if (rb_list != NULL) 5034 umtx_read_rb_list(td, &m, rb_list, compat32); 5035 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5036 return (EINVAL); 5037 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5038 /* inact is cleared after unlock, allow the inconsistency */ 5039 return (inact ? 0 : EINVAL); 5040 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5041 } 5042 5043 static void 5044 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5045 const char *name, bool compat32) 5046 { 5047 int error, i; 5048 uintptr_t rbp; 5049 bool inact; 5050 5051 if (rb_list == 0) 5052 return; 5053 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5054 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5055 if (rbp == *rb_inact) { 5056 inact = true; 5057 *rb_inact = 0; 5058 } else 5059 inact = false; 5060 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5061 } 5062 if (i == umtx_max_rb && umtx_verbose_rb) { 5063 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5064 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5065 } 5066 if (error != 0 && umtx_verbose_rb) { 5067 uprintf("comm %s pid %d: handling %srb error %d\n", 5068 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5069 } 5070 } 5071 5072 /* 5073 * Clean up umtx data. 5074 */ 5075 static void 5076 umtx_thread_cleanup(struct thread *td) 5077 { 5078 struct umtx_q *uq; 5079 struct umtx_pi *pi; 5080 uintptr_t rb_inact; 5081 bool compat32; 5082 5083 /* 5084 * Disown pi mutexes. 5085 */ 5086 uq = td->td_umtxq; 5087 if (uq != NULL) { 5088 if (uq->uq_inherited_pri != PRI_MAX || 5089 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5090 mtx_lock(&umtx_lock); 5091 uq->uq_inherited_pri = PRI_MAX; 5092 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5093 pi->pi_owner = NULL; 5094 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5095 } 5096 mtx_unlock(&umtx_lock); 5097 } 5098 sched_lend_user_prio_cond(td, PRI_MAX); 5099 } 5100 5101 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5102 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5103 5104 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5105 return; 5106 5107 /* 5108 * Handle terminated robust mutexes. Must be done after 5109 * robust pi disown, otherwise unlock could see unowned 5110 * entries. 5111 */ 5112 rb_inact = td->td_rb_inact; 5113 if (rb_inact != 0) 5114 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5115 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5116 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5117 if (rb_inact != 0) 5118 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5119 } 5120