1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 #include <sys/umtxvar.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #include <compat/freebsd32/freebsd32.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32_proto.h> 82 #endif 83 84 #define _UMUTEX_TRY 1 85 #define _UMUTEX_WAIT 2 86 87 #ifdef UMTX_PROFILING 88 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 89 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 90 #endif 91 92 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 93 #ifdef INVARIANTS 94 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 95 struct umtxq_chain *uc; \ 96 \ 97 uc = umtxq_getchain(key); \ 98 mtx_assert(&uc->uc_lock, MA_OWNED); \ 99 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 100 } while (0) 101 #else 102 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 103 #endif 104 105 /* 106 * Don't propagate time-sharing priority, there is a security reason, 107 * a user can simply introduce PI-mutex, let thread A lock the mutex, 108 * and let another thread B block on the mutex, because B is 109 * sleeping, its priority will be boosted, this causes A's priority to 110 * be boosted via priority propagating too and will never be lowered even 111 * if it is using 100%CPU, this is unfair to other processes. 112 */ 113 114 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 115 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 116 PRI_MAX_TIMESHARE : (td)->td_user_pri) 117 118 #define GOLDEN_RATIO_PRIME 2654404609U 119 #ifndef UMTX_CHAINS 120 #define UMTX_CHAINS 512 121 #endif 122 #define UMTX_SHIFTS (__WORD_BIT - 9) 123 124 #define GET_SHARE(flags) \ 125 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 126 127 #define BUSY_SPINS 200 128 129 struct umtx_copyops { 130 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 131 int (*copyin_umtx_time)(const void *uaddr, size_t size, 132 struct _umtx_time *tp); 133 int (*copyin_robust_lists)(const void *uaddr, size_t size, 134 struct umtx_robust_lists_params *rbp); 135 int (*copyout_timeout)(void *uaddr, size_t size, 136 struct timespec *tsp); 137 const size_t timespec_sz; 138 const size_t umtx_time_sz; 139 const bool compat32; 140 }; 141 142 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 143 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 144 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 145 146 int umtx_shm_vnobj_persistent = 0; 147 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 148 &umtx_shm_vnobj_persistent, 0, 149 "False forces destruction of umtx attached to file, on last close"); 150 static int umtx_max_rb = 1000; 151 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 152 &umtx_max_rb, 0, 153 "Maximum number of robust mutexes allowed for each thread"); 154 155 static uma_zone_t umtx_pi_zone; 156 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 157 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 158 static int umtx_pi_allocated; 159 160 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 161 "umtx debug"); 162 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 163 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 164 static int umtx_verbose_rb = 1; 165 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 166 &umtx_verbose_rb, 0, 167 ""); 168 169 #ifdef UMTX_PROFILING 170 static long max_length; 171 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 172 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 173 "umtx chain stats"); 174 #endif 175 176 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 177 const struct _umtx_time *umtxtime); 178 static int umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo); 179 static inline void umtx_abs_timeout_update(struct umtx_abs_timeout *timo); 180 181 static void umtx_shm_init(void); 182 static void umtxq_sysinit(void *); 183 static void umtxq_hash(struct umtx_key *key); 184 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 185 bool rb); 186 static void umtx_thread_cleanup(struct thread *td); 187 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 188 189 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 190 191 static struct mtx umtx_lock; 192 193 #ifdef UMTX_PROFILING 194 static void 195 umtx_init_profiling(void) 196 { 197 struct sysctl_oid *chain_oid; 198 char chain_name[10]; 199 int i; 200 201 for (i = 0; i < UMTX_CHAINS; ++i) { 202 snprintf(chain_name, sizeof(chain_name), "%d", i); 203 chain_oid = SYSCTL_ADD_NODE(NULL, 204 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 205 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 206 "umtx hash stats"); 207 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 208 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 209 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 210 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 211 } 212 } 213 214 static int 215 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 216 { 217 char buf[512]; 218 struct sbuf sb; 219 struct umtxq_chain *uc; 220 u_int fract, i, j, tot, whole; 221 u_int sf0, sf1, sf2, sf3, sf4; 222 u_int si0, si1, si2, si3, si4; 223 u_int sw0, sw1, sw2, sw3, sw4; 224 225 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 226 for (i = 0; i < 2; i++) { 227 tot = 0; 228 for (j = 0; j < UMTX_CHAINS; ++j) { 229 uc = &umtxq_chains[i][j]; 230 mtx_lock(&uc->uc_lock); 231 tot += uc->max_length; 232 mtx_unlock(&uc->uc_lock); 233 } 234 if (tot == 0) 235 sbuf_printf(&sb, "%u) Empty ", i); 236 else { 237 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 238 si0 = si1 = si2 = si3 = si4 = 0; 239 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 240 for (j = 0; j < UMTX_CHAINS; j++) { 241 uc = &umtxq_chains[i][j]; 242 mtx_lock(&uc->uc_lock); 243 whole = uc->max_length * 100; 244 mtx_unlock(&uc->uc_lock); 245 fract = (whole % tot) * 100; 246 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 247 sf0 = fract; 248 si0 = j; 249 sw0 = whole; 250 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 251 sf1)) { 252 sf1 = fract; 253 si1 = j; 254 sw1 = whole; 255 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 256 sf2)) { 257 sf2 = fract; 258 si2 = j; 259 sw2 = whole; 260 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 261 sf3)) { 262 sf3 = fract; 263 si3 = j; 264 sw3 = whole; 265 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 266 sf4)) { 267 sf4 = fract; 268 si4 = j; 269 sw4 = whole; 270 } 271 } 272 sbuf_printf(&sb, "queue %u:\n", i); 273 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 274 sf0 / tot, si0); 275 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 276 sf1 / tot, si1); 277 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 278 sf2 / tot, si2); 279 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 280 sf3 / tot, si3); 281 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 282 sf4 / tot, si4); 283 } 284 } 285 sbuf_trim(&sb); 286 sbuf_finish(&sb); 287 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 288 sbuf_delete(&sb); 289 return (0); 290 } 291 292 static int 293 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 294 { 295 struct umtxq_chain *uc; 296 u_int i, j; 297 int clear, error; 298 299 clear = 0; 300 error = sysctl_handle_int(oidp, &clear, 0, req); 301 if (error != 0 || req->newptr == NULL) 302 return (error); 303 304 if (clear != 0) { 305 for (i = 0; i < 2; ++i) { 306 for (j = 0; j < UMTX_CHAINS; ++j) { 307 uc = &umtxq_chains[i][j]; 308 mtx_lock(&uc->uc_lock); 309 uc->length = 0; 310 uc->max_length = 0; 311 mtx_unlock(&uc->uc_lock); 312 } 313 } 314 } 315 return (0); 316 } 317 318 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 319 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 320 sysctl_debug_umtx_chains_clear, "I", 321 "Clear umtx chains statistics"); 322 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 323 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 324 sysctl_debug_umtx_chains_peaks, "A", 325 "Highest peaks in chains max length"); 326 #endif 327 328 static void 329 umtxq_sysinit(void *arg __unused) 330 { 331 int i, j; 332 333 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 334 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 335 for (i = 0; i < 2; ++i) { 336 for (j = 0; j < UMTX_CHAINS; ++j) { 337 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 338 MTX_DEF | MTX_DUPOK); 339 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 340 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 341 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 342 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 343 umtxq_chains[i][j].uc_busy = 0; 344 umtxq_chains[i][j].uc_waiters = 0; 345 #ifdef UMTX_PROFILING 346 umtxq_chains[i][j].length = 0; 347 umtxq_chains[i][j].max_length = 0; 348 #endif 349 } 350 } 351 #ifdef UMTX_PROFILING 352 umtx_init_profiling(); 353 #endif 354 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 355 umtx_shm_init(); 356 } 357 358 struct umtx_q * 359 umtxq_alloc(void) 360 { 361 struct umtx_q *uq; 362 363 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 364 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 365 M_WAITOK | M_ZERO); 366 TAILQ_INIT(&uq->uq_spare_queue->head); 367 TAILQ_INIT(&uq->uq_pi_contested); 368 uq->uq_inherited_pri = PRI_MAX; 369 return (uq); 370 } 371 372 void 373 umtxq_free(struct umtx_q *uq) 374 { 375 376 MPASS(uq->uq_spare_queue != NULL); 377 free(uq->uq_spare_queue, M_UMTX); 378 free(uq, M_UMTX); 379 } 380 381 static inline void 382 umtxq_hash(struct umtx_key *key) 383 { 384 unsigned n; 385 386 n = (uintptr_t)key->info.both.a + key->info.both.b; 387 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 388 } 389 390 struct umtxq_chain * 391 umtxq_getchain(struct umtx_key *key) 392 { 393 394 if (key->type <= TYPE_SEM) 395 return (&umtxq_chains[1][key->hash]); 396 return (&umtxq_chains[0][key->hash]); 397 } 398 399 /* 400 * Set chain to busy state when following operation 401 * may be blocked (kernel mutex can not be used). 402 */ 403 void 404 umtxq_busy(struct umtx_key *key) 405 { 406 struct umtxq_chain *uc; 407 408 uc = umtxq_getchain(key); 409 mtx_assert(&uc->uc_lock, MA_OWNED); 410 if (uc->uc_busy) { 411 #ifdef SMP 412 if (smp_cpus > 1) { 413 int count = BUSY_SPINS; 414 if (count > 0) { 415 umtxq_unlock(key); 416 while (uc->uc_busy && --count > 0) 417 cpu_spinwait(); 418 umtxq_lock(key); 419 } 420 } 421 #endif 422 while (uc->uc_busy) { 423 uc->uc_waiters++; 424 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 425 uc->uc_waiters--; 426 } 427 } 428 uc->uc_busy = 1; 429 } 430 431 /* 432 * Unbusy a chain. 433 */ 434 void 435 umtxq_unbusy(struct umtx_key *key) 436 { 437 struct umtxq_chain *uc; 438 439 uc = umtxq_getchain(key); 440 mtx_assert(&uc->uc_lock, MA_OWNED); 441 KASSERT(uc->uc_busy != 0, ("not busy")); 442 uc->uc_busy = 0; 443 if (uc->uc_waiters) 444 wakeup_one(uc); 445 } 446 447 void 448 umtxq_unbusy_unlocked(struct umtx_key *key) 449 { 450 451 umtxq_lock(key); 452 umtxq_unbusy(key); 453 umtxq_unlock(key); 454 } 455 456 static struct umtxq_queue * 457 umtxq_queue_lookup(struct umtx_key *key, int q) 458 { 459 struct umtxq_queue *uh; 460 struct umtxq_chain *uc; 461 462 uc = umtxq_getchain(key); 463 UMTXQ_LOCKED_ASSERT(uc); 464 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 465 if (umtx_key_match(&uh->key, key)) 466 return (uh); 467 } 468 469 return (NULL); 470 } 471 472 void 473 umtxq_insert_queue(struct umtx_q *uq, int q) 474 { 475 struct umtxq_queue *uh; 476 struct umtxq_chain *uc; 477 478 uc = umtxq_getchain(&uq->uq_key); 479 UMTXQ_LOCKED_ASSERT(uc); 480 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 481 uh = umtxq_queue_lookup(&uq->uq_key, q); 482 if (uh != NULL) { 483 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 484 } else { 485 uh = uq->uq_spare_queue; 486 uh->key = uq->uq_key; 487 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 488 #ifdef UMTX_PROFILING 489 uc->length++; 490 if (uc->length > uc->max_length) { 491 uc->max_length = uc->length; 492 if (uc->max_length > max_length) 493 max_length = uc->max_length; 494 } 495 #endif 496 } 497 uq->uq_spare_queue = NULL; 498 499 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 500 uh->length++; 501 uq->uq_flags |= UQF_UMTXQ; 502 uq->uq_cur_queue = uh; 503 return; 504 } 505 506 void 507 umtxq_remove_queue(struct umtx_q *uq, int q) 508 { 509 struct umtxq_chain *uc; 510 struct umtxq_queue *uh; 511 512 uc = umtxq_getchain(&uq->uq_key); 513 UMTXQ_LOCKED_ASSERT(uc); 514 if (uq->uq_flags & UQF_UMTXQ) { 515 uh = uq->uq_cur_queue; 516 TAILQ_REMOVE(&uh->head, uq, uq_link); 517 uh->length--; 518 uq->uq_flags &= ~UQF_UMTXQ; 519 if (TAILQ_EMPTY(&uh->head)) { 520 KASSERT(uh->length == 0, 521 ("inconsistent umtxq_queue length")); 522 #ifdef UMTX_PROFILING 523 uc->length--; 524 #endif 525 LIST_REMOVE(uh, link); 526 } else { 527 uh = LIST_FIRST(&uc->uc_spare_queue); 528 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 529 LIST_REMOVE(uh, link); 530 } 531 uq->uq_spare_queue = uh; 532 uq->uq_cur_queue = NULL; 533 } 534 } 535 536 /* 537 * Check if there are multiple waiters 538 */ 539 int 540 umtxq_count(struct umtx_key *key) 541 { 542 struct umtxq_queue *uh; 543 544 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 545 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 546 if (uh != NULL) 547 return (uh->length); 548 return (0); 549 } 550 551 /* 552 * Check if there are multiple PI waiters and returns first 553 * waiter. 554 */ 555 static int 556 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 557 { 558 struct umtxq_queue *uh; 559 560 *first = NULL; 561 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 562 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 563 if (uh != NULL) { 564 *first = TAILQ_FIRST(&uh->head); 565 return (uh->length); 566 } 567 return (0); 568 } 569 570 /* 571 * Wake up threads waiting on an userland object by a bit mask. 572 */ 573 int 574 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 575 { 576 struct umtxq_queue *uh; 577 struct umtx_q *uq, *uq_temp; 578 int ret; 579 580 ret = 0; 581 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 582 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 583 if (uh == NULL) 584 return (0); 585 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 586 if ((uq->uq_bitset & bitset) == 0) 587 continue; 588 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 589 wakeup_one(uq); 590 if (++ret >= n_wake) 591 break; 592 } 593 return (ret); 594 } 595 596 /* 597 * Wake up threads waiting on an userland object. 598 */ 599 600 static int 601 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 602 { 603 struct umtxq_queue *uh; 604 struct umtx_q *uq; 605 int ret; 606 607 ret = 0; 608 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 609 uh = umtxq_queue_lookup(key, q); 610 if (uh != NULL) { 611 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 612 umtxq_remove_queue(uq, q); 613 wakeup(uq); 614 if (++ret >= n_wake) 615 return (ret); 616 } 617 } 618 return (ret); 619 } 620 621 /* 622 * Wake up specified thread. 623 */ 624 static inline void 625 umtxq_signal_thread(struct umtx_q *uq) 626 { 627 628 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 629 umtxq_remove(uq); 630 wakeup(uq); 631 } 632 633 /* 634 * Wake up a maximum of n_wake threads that are waiting on an userland 635 * object identified by key. The remaining threads are removed from queue 636 * identified by key and added to the queue identified by key2 (requeued). 637 * The n_requeue specifies an upper limit on the number of threads that 638 * are requeued to the second queue. 639 */ 640 int 641 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 642 int n_requeue) 643 { 644 struct umtxq_queue *uh, *uh2; 645 struct umtx_q *uq, *uq_temp; 646 int ret; 647 648 ret = 0; 649 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 650 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 651 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 652 uh2 = umtxq_queue_lookup(key2, UMTX_SHARED_QUEUE); 653 if (uh == NULL) 654 return (0); 655 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 656 if (++ret <= n_wake) { 657 umtxq_remove(uq); 658 wakeup_one(uq); 659 } else { 660 umtxq_remove(uq); 661 uq->uq_key = *key2; 662 umtxq_insert(uq); 663 if (ret - n_wake == n_requeue) 664 break; 665 } 666 } 667 return (ret); 668 } 669 670 static inline int 671 tstohz(const struct timespec *tsp) 672 { 673 struct timeval tv; 674 675 TIMESPEC_TO_TIMEVAL(&tv, tsp); 676 return tvtohz(&tv); 677 } 678 679 void 680 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 681 int absolute, const struct timespec *timeout) 682 { 683 684 timo->clockid = clockid; 685 if (!absolute) { 686 timo->is_abs_real = false; 687 umtx_abs_timeout_update(timo); 688 timespecadd(&timo->cur, timeout, &timo->end); 689 } else { 690 timo->end = *timeout; 691 timo->is_abs_real = clockid == CLOCK_REALTIME || 692 clockid == CLOCK_REALTIME_FAST || 693 clockid == CLOCK_REALTIME_PRECISE; 694 /* 695 * If is_abs_real, umtxq_sleep will read the clock 696 * after setting td_rtcgen; otherwise, read it here. 697 */ 698 if (!timo->is_abs_real) { 699 umtx_abs_timeout_update(timo); 700 } 701 } 702 } 703 704 static void 705 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 706 const struct _umtx_time *umtxtime) 707 { 708 709 umtx_abs_timeout_init(timo, umtxtime->_clockid, 710 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 711 } 712 713 static void 714 umtx_abs_timeout_update(struct umtx_abs_timeout *timo) 715 { 716 717 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 718 } 719 720 static int 721 umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo) 722 { 723 struct timespec tts; 724 725 if (timespeccmp(&timo->end, &timo->cur, <=)) 726 return (-1); 727 timespecsub(&timo->end, &timo->cur, &tts); 728 return (tstohz(&tts)); 729 } 730 731 static uint32_t 732 umtx_unlock_val(uint32_t flags, bool rb) 733 { 734 735 if (rb) 736 return (UMUTEX_RB_OWNERDEAD); 737 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 738 return (UMUTEX_RB_NOTRECOV); 739 else 740 return (UMUTEX_UNOWNED); 741 742 } 743 744 /* 745 * Put thread into sleep state, before sleeping, check if 746 * thread was removed from umtx queue. 747 */ 748 int 749 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 750 struct umtx_abs_timeout *abstime) 751 { 752 struct umtxq_chain *uc; 753 int error, timo; 754 755 if (abstime != NULL && abstime->is_abs_real) { 756 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 757 umtx_abs_timeout_update(abstime); 758 } 759 760 uc = umtxq_getchain(&uq->uq_key); 761 UMTXQ_LOCKED_ASSERT(uc); 762 for (;;) { 763 if (!(uq->uq_flags & UQF_UMTXQ)) { 764 error = 0; 765 break; 766 } 767 if (abstime != NULL) { 768 timo = umtx_abs_timeout_gethz(abstime); 769 if (timo < 0) { 770 error = ETIMEDOUT; 771 break; 772 } 773 } else 774 timo = 0; 775 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 776 if (error == EINTR || error == ERESTART) { 777 umtxq_lock(&uq->uq_key); 778 break; 779 } 780 if (abstime != NULL) { 781 if (abstime->is_abs_real) 782 curthread->td_rtcgen = 783 atomic_load_acq_int(&rtc_generation); 784 umtx_abs_timeout_update(abstime); 785 } 786 umtxq_lock(&uq->uq_key); 787 } 788 789 curthread->td_rtcgen = 0; 790 return (error); 791 } 792 793 /* 794 * Convert userspace address into unique logical address. 795 */ 796 int 797 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 798 { 799 struct thread *td = curthread; 800 vm_map_t map; 801 vm_map_entry_t entry; 802 vm_pindex_t pindex; 803 vm_prot_t prot; 804 boolean_t wired; 805 806 key->type = type; 807 if (share == THREAD_SHARE) { 808 key->shared = 0; 809 key->info.private.vs = td->td_proc->p_vmspace; 810 key->info.private.addr = (uintptr_t)addr; 811 } else { 812 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 813 map = &td->td_proc->p_vmspace->vm_map; 814 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 815 &entry, &key->info.shared.object, &pindex, &prot, 816 &wired) != KERN_SUCCESS) { 817 return (EFAULT); 818 } 819 820 if ((share == PROCESS_SHARE) || 821 (share == AUTO_SHARE && 822 VM_INHERIT_SHARE == entry->inheritance)) { 823 key->shared = 1; 824 key->info.shared.offset = (vm_offset_t)addr - 825 entry->start + entry->offset; 826 vm_object_reference(key->info.shared.object); 827 } else { 828 key->shared = 0; 829 key->info.private.vs = td->td_proc->p_vmspace; 830 key->info.private.addr = (uintptr_t)addr; 831 } 832 vm_map_lookup_done(map, entry); 833 } 834 835 umtxq_hash(key); 836 return (0); 837 } 838 839 /* 840 * Release key. 841 */ 842 void 843 umtx_key_release(struct umtx_key *key) 844 { 845 if (key->shared) 846 vm_object_deallocate(key->info.shared.object); 847 } 848 849 #ifdef COMPAT_FREEBSD10 850 /* 851 * Lock a umtx object. 852 */ 853 static int 854 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 855 const struct timespec *timeout) 856 { 857 struct umtx_abs_timeout timo; 858 struct umtx_q *uq; 859 u_long owner; 860 u_long old; 861 int error = 0; 862 863 uq = td->td_umtxq; 864 if (timeout != NULL) 865 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 866 867 /* 868 * Care must be exercised when dealing with umtx structure. It 869 * can fault on any access. 870 */ 871 for (;;) { 872 /* 873 * Try the uncontested case. This should be done in userland. 874 */ 875 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 876 877 /* The acquire succeeded. */ 878 if (owner == UMTX_UNOWNED) 879 return (0); 880 881 /* The address was invalid. */ 882 if (owner == -1) 883 return (EFAULT); 884 885 /* If no one owns it but it is contested try to acquire it. */ 886 if (owner == UMTX_CONTESTED) { 887 owner = casuword(&umtx->u_owner, 888 UMTX_CONTESTED, id | UMTX_CONTESTED); 889 890 if (owner == UMTX_CONTESTED) 891 return (0); 892 893 /* The address was invalid. */ 894 if (owner == -1) 895 return (EFAULT); 896 897 error = thread_check_susp(td, false); 898 if (error != 0) 899 break; 900 901 /* If this failed the lock has changed, restart. */ 902 continue; 903 } 904 905 /* 906 * If we caught a signal, we have retried and now 907 * exit immediately. 908 */ 909 if (error != 0) 910 break; 911 912 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 913 AUTO_SHARE, &uq->uq_key)) != 0) 914 return (error); 915 916 umtxq_lock(&uq->uq_key); 917 umtxq_busy(&uq->uq_key); 918 umtxq_insert(uq); 919 umtxq_unbusy(&uq->uq_key); 920 umtxq_unlock(&uq->uq_key); 921 922 /* 923 * Set the contested bit so that a release in user space 924 * knows to use the system call for unlock. If this fails 925 * either some one else has acquired the lock or it has been 926 * released. 927 */ 928 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 929 930 /* The address was invalid. */ 931 if (old == -1) { 932 umtxq_lock(&uq->uq_key); 933 umtxq_remove(uq); 934 umtxq_unlock(&uq->uq_key); 935 umtx_key_release(&uq->uq_key); 936 return (EFAULT); 937 } 938 939 /* 940 * We set the contested bit, sleep. Otherwise the lock changed 941 * and we need to retry or we lost a race to the thread 942 * unlocking the umtx. 943 */ 944 umtxq_lock(&uq->uq_key); 945 if (old == owner) 946 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 947 &timo); 948 umtxq_remove(uq); 949 umtxq_unlock(&uq->uq_key); 950 umtx_key_release(&uq->uq_key); 951 952 if (error == 0) 953 error = thread_check_susp(td, false); 954 } 955 956 if (timeout == NULL) { 957 /* Mutex locking is restarted if it is interrupted. */ 958 if (error == EINTR) 959 error = ERESTART; 960 } else { 961 /* Timed-locking is not restarted. */ 962 if (error == ERESTART) 963 error = EINTR; 964 } 965 return (error); 966 } 967 968 /* 969 * Unlock a umtx object. 970 */ 971 static int 972 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 973 { 974 struct umtx_key key; 975 u_long owner; 976 u_long old; 977 int error; 978 int count; 979 980 /* 981 * Make sure we own this mtx. 982 */ 983 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 984 if (owner == -1) 985 return (EFAULT); 986 987 if ((owner & ~UMTX_CONTESTED) != id) 988 return (EPERM); 989 990 /* This should be done in userland */ 991 if ((owner & UMTX_CONTESTED) == 0) { 992 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 993 if (old == -1) 994 return (EFAULT); 995 if (old == owner) 996 return (0); 997 owner = old; 998 } 999 1000 /* We should only ever be in here for contested locks */ 1001 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1002 &key)) != 0) 1003 return (error); 1004 1005 umtxq_lock(&key); 1006 umtxq_busy(&key); 1007 count = umtxq_count(&key); 1008 umtxq_unlock(&key); 1009 1010 /* 1011 * When unlocking the umtx, it must be marked as unowned if 1012 * there is zero or one thread only waiting for it. 1013 * Otherwise, it must be marked as contested. 1014 */ 1015 old = casuword(&umtx->u_owner, owner, 1016 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1017 umtxq_lock(&key); 1018 umtxq_signal(&key,1); 1019 umtxq_unbusy(&key); 1020 umtxq_unlock(&key); 1021 umtx_key_release(&key); 1022 if (old == -1) 1023 return (EFAULT); 1024 if (old != owner) 1025 return (EINVAL); 1026 return (0); 1027 } 1028 1029 #ifdef COMPAT_FREEBSD32 1030 1031 /* 1032 * Lock a umtx object. 1033 */ 1034 static int 1035 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1036 const struct timespec *timeout) 1037 { 1038 struct umtx_abs_timeout timo; 1039 struct umtx_q *uq; 1040 uint32_t owner; 1041 uint32_t old; 1042 int error = 0; 1043 1044 uq = td->td_umtxq; 1045 1046 if (timeout != NULL) 1047 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1048 1049 /* 1050 * Care must be exercised when dealing with umtx structure. It 1051 * can fault on any access. 1052 */ 1053 for (;;) { 1054 /* 1055 * Try the uncontested case. This should be done in userland. 1056 */ 1057 owner = casuword32(m, UMUTEX_UNOWNED, id); 1058 1059 /* The acquire succeeded. */ 1060 if (owner == UMUTEX_UNOWNED) 1061 return (0); 1062 1063 /* The address was invalid. */ 1064 if (owner == -1) 1065 return (EFAULT); 1066 1067 /* If no one owns it but it is contested try to acquire it. */ 1068 if (owner == UMUTEX_CONTESTED) { 1069 owner = casuword32(m, 1070 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1071 if (owner == UMUTEX_CONTESTED) 1072 return (0); 1073 1074 /* The address was invalid. */ 1075 if (owner == -1) 1076 return (EFAULT); 1077 1078 error = thread_check_susp(td, false); 1079 if (error != 0) 1080 break; 1081 1082 /* If this failed the lock has changed, restart. */ 1083 continue; 1084 } 1085 1086 /* 1087 * If we caught a signal, we have retried and now 1088 * exit immediately. 1089 */ 1090 if (error != 0) 1091 return (error); 1092 1093 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1094 AUTO_SHARE, &uq->uq_key)) != 0) 1095 return (error); 1096 1097 umtxq_lock(&uq->uq_key); 1098 umtxq_busy(&uq->uq_key); 1099 umtxq_insert(uq); 1100 umtxq_unbusy(&uq->uq_key); 1101 umtxq_unlock(&uq->uq_key); 1102 1103 /* 1104 * Set the contested bit so that a release in user space 1105 * knows to use the system call for unlock. If this fails 1106 * either some one else has acquired the lock or it has been 1107 * released. 1108 */ 1109 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1110 1111 /* The address was invalid. */ 1112 if (old == -1) { 1113 umtxq_lock(&uq->uq_key); 1114 umtxq_remove(uq); 1115 umtxq_unlock(&uq->uq_key); 1116 umtx_key_release(&uq->uq_key); 1117 return (EFAULT); 1118 } 1119 1120 /* 1121 * We set the contested bit, sleep. Otherwise the lock changed 1122 * and we need to retry or we lost a race to the thread 1123 * unlocking the umtx. 1124 */ 1125 umtxq_lock(&uq->uq_key); 1126 if (old == owner) 1127 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1128 NULL : &timo); 1129 umtxq_remove(uq); 1130 umtxq_unlock(&uq->uq_key); 1131 umtx_key_release(&uq->uq_key); 1132 1133 if (error == 0) 1134 error = thread_check_susp(td, false); 1135 } 1136 1137 if (timeout == NULL) { 1138 /* Mutex locking is restarted if it is interrupted. */ 1139 if (error == EINTR) 1140 error = ERESTART; 1141 } else { 1142 /* Timed-locking is not restarted. */ 1143 if (error == ERESTART) 1144 error = EINTR; 1145 } 1146 return (error); 1147 } 1148 1149 /* 1150 * Unlock a umtx object. 1151 */ 1152 static int 1153 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1154 { 1155 struct umtx_key key; 1156 uint32_t owner; 1157 uint32_t old; 1158 int error; 1159 int count; 1160 1161 /* 1162 * Make sure we own this mtx. 1163 */ 1164 owner = fuword32(m); 1165 if (owner == -1) 1166 return (EFAULT); 1167 1168 if ((owner & ~UMUTEX_CONTESTED) != id) 1169 return (EPERM); 1170 1171 /* This should be done in userland */ 1172 if ((owner & UMUTEX_CONTESTED) == 0) { 1173 old = casuword32(m, owner, UMUTEX_UNOWNED); 1174 if (old == -1) 1175 return (EFAULT); 1176 if (old == owner) 1177 return (0); 1178 owner = old; 1179 } 1180 1181 /* We should only ever be in here for contested locks */ 1182 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1183 &key)) != 0) 1184 return (error); 1185 1186 umtxq_lock(&key); 1187 umtxq_busy(&key); 1188 count = umtxq_count(&key); 1189 umtxq_unlock(&key); 1190 1191 /* 1192 * When unlocking the umtx, it must be marked as unowned if 1193 * there is zero or one thread only waiting for it. 1194 * Otherwise, it must be marked as contested. 1195 */ 1196 old = casuword32(m, owner, 1197 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1198 umtxq_lock(&key); 1199 umtxq_signal(&key,1); 1200 umtxq_unbusy(&key); 1201 umtxq_unlock(&key); 1202 umtx_key_release(&key); 1203 if (old == -1) 1204 return (EFAULT); 1205 if (old != owner) 1206 return (EINVAL); 1207 return (0); 1208 } 1209 #endif /* COMPAT_FREEBSD32 */ 1210 #endif /* COMPAT_FREEBSD10 */ 1211 1212 /* 1213 * Fetch and compare value, sleep on the address if value is not changed. 1214 */ 1215 static int 1216 do_wait(struct thread *td, void *addr, u_long id, 1217 struct _umtx_time *timeout, int compat32, int is_private) 1218 { 1219 struct umtx_abs_timeout timo; 1220 struct umtx_q *uq; 1221 u_long tmp; 1222 uint32_t tmp32; 1223 int error = 0; 1224 1225 uq = td->td_umtxq; 1226 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1227 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1228 return (error); 1229 1230 if (timeout != NULL) 1231 umtx_abs_timeout_init2(&timo, timeout); 1232 1233 umtxq_lock(&uq->uq_key); 1234 umtxq_insert(uq); 1235 umtxq_unlock(&uq->uq_key); 1236 if (compat32 == 0) { 1237 error = fueword(addr, &tmp); 1238 if (error != 0) 1239 error = EFAULT; 1240 } else { 1241 error = fueword32(addr, &tmp32); 1242 if (error == 0) 1243 tmp = tmp32; 1244 else 1245 error = EFAULT; 1246 } 1247 umtxq_lock(&uq->uq_key); 1248 if (error == 0) { 1249 if (tmp == id) 1250 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1251 NULL : &timo); 1252 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1253 error = 0; 1254 else 1255 umtxq_remove(uq); 1256 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1257 umtxq_remove(uq); 1258 } 1259 umtxq_unlock(&uq->uq_key); 1260 umtx_key_release(&uq->uq_key); 1261 if (error == ERESTART) 1262 error = EINTR; 1263 return (error); 1264 } 1265 1266 /* 1267 * Wake up threads sleeping on the specified address. 1268 */ 1269 int 1270 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1271 { 1272 struct umtx_key key; 1273 int ret; 1274 1275 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1276 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1277 return (ret); 1278 umtxq_lock(&key); 1279 umtxq_signal(&key, n_wake); 1280 umtxq_unlock(&key); 1281 umtx_key_release(&key); 1282 return (0); 1283 } 1284 1285 /* 1286 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1287 */ 1288 static int 1289 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1290 struct _umtx_time *timeout, int mode) 1291 { 1292 struct umtx_abs_timeout timo; 1293 struct umtx_q *uq; 1294 uint32_t owner, old, id; 1295 int error, rv; 1296 1297 id = td->td_tid; 1298 uq = td->td_umtxq; 1299 error = 0; 1300 if (timeout != NULL) 1301 umtx_abs_timeout_init2(&timo, timeout); 1302 1303 /* 1304 * Care must be exercised when dealing with umtx structure. It 1305 * can fault on any access. 1306 */ 1307 for (;;) { 1308 rv = fueword32(&m->m_owner, &owner); 1309 if (rv == -1) 1310 return (EFAULT); 1311 if (mode == _UMUTEX_WAIT) { 1312 if (owner == UMUTEX_UNOWNED || 1313 owner == UMUTEX_CONTESTED || 1314 owner == UMUTEX_RB_OWNERDEAD || 1315 owner == UMUTEX_RB_NOTRECOV) 1316 return (0); 1317 } else { 1318 /* 1319 * Robust mutex terminated. Kernel duty is to 1320 * return EOWNERDEAD to the userspace. The 1321 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1322 * by the common userspace code. 1323 */ 1324 if (owner == UMUTEX_RB_OWNERDEAD) { 1325 rv = casueword32(&m->m_owner, 1326 UMUTEX_RB_OWNERDEAD, &owner, 1327 id | UMUTEX_CONTESTED); 1328 if (rv == -1) 1329 return (EFAULT); 1330 if (rv == 0) { 1331 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1332 return (EOWNERDEAD); /* success */ 1333 } 1334 MPASS(rv == 1); 1335 rv = thread_check_susp(td, false); 1336 if (rv != 0) 1337 return (rv); 1338 continue; 1339 } 1340 if (owner == UMUTEX_RB_NOTRECOV) 1341 return (ENOTRECOVERABLE); 1342 1343 /* 1344 * Try the uncontested case. This should be 1345 * done in userland. 1346 */ 1347 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1348 &owner, id); 1349 /* The address was invalid. */ 1350 if (rv == -1) 1351 return (EFAULT); 1352 1353 /* The acquire succeeded. */ 1354 if (rv == 0) { 1355 MPASS(owner == UMUTEX_UNOWNED); 1356 return (0); 1357 } 1358 1359 /* 1360 * If no one owns it but it is contested try 1361 * to acquire it. 1362 */ 1363 MPASS(rv == 1); 1364 if (owner == UMUTEX_CONTESTED) { 1365 rv = casueword32(&m->m_owner, 1366 UMUTEX_CONTESTED, &owner, 1367 id | UMUTEX_CONTESTED); 1368 /* The address was invalid. */ 1369 if (rv == -1) 1370 return (EFAULT); 1371 if (rv == 0) { 1372 MPASS(owner == UMUTEX_CONTESTED); 1373 return (0); 1374 } 1375 if (rv == 1) { 1376 rv = thread_check_susp(td, false); 1377 if (rv != 0) 1378 return (rv); 1379 } 1380 1381 /* 1382 * If this failed the lock has 1383 * changed, restart. 1384 */ 1385 continue; 1386 } 1387 1388 /* rv == 1 but not contested, likely store failure */ 1389 rv = thread_check_susp(td, false); 1390 if (rv != 0) 1391 return (rv); 1392 } 1393 1394 if (mode == _UMUTEX_TRY) 1395 return (EBUSY); 1396 1397 /* 1398 * If we caught a signal, we have retried and now 1399 * exit immediately. 1400 */ 1401 if (error != 0) 1402 return (error); 1403 1404 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1405 GET_SHARE(flags), &uq->uq_key)) != 0) 1406 return (error); 1407 1408 umtxq_lock(&uq->uq_key); 1409 umtxq_busy(&uq->uq_key); 1410 umtxq_insert(uq); 1411 umtxq_unlock(&uq->uq_key); 1412 1413 /* 1414 * Set the contested bit so that a release in user space 1415 * knows to use the system call for unlock. If this fails 1416 * either some one else has acquired the lock or it has been 1417 * released. 1418 */ 1419 rv = casueword32(&m->m_owner, owner, &old, 1420 owner | UMUTEX_CONTESTED); 1421 1422 /* The address was invalid or casueword failed to store. */ 1423 if (rv == -1 || rv == 1) { 1424 umtxq_lock(&uq->uq_key); 1425 umtxq_remove(uq); 1426 umtxq_unbusy(&uq->uq_key); 1427 umtxq_unlock(&uq->uq_key); 1428 umtx_key_release(&uq->uq_key); 1429 if (rv == -1) 1430 return (EFAULT); 1431 if (rv == 1) { 1432 rv = thread_check_susp(td, false); 1433 if (rv != 0) 1434 return (rv); 1435 } 1436 continue; 1437 } 1438 1439 /* 1440 * We set the contested bit, sleep. Otherwise the lock changed 1441 * and we need to retry or we lost a race to the thread 1442 * unlocking the umtx. 1443 */ 1444 umtxq_lock(&uq->uq_key); 1445 umtxq_unbusy(&uq->uq_key); 1446 MPASS(old == owner); 1447 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1448 NULL : &timo); 1449 umtxq_remove(uq); 1450 umtxq_unlock(&uq->uq_key); 1451 umtx_key_release(&uq->uq_key); 1452 1453 if (error == 0) 1454 error = thread_check_susp(td, false); 1455 } 1456 1457 return (0); 1458 } 1459 1460 /* 1461 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1462 */ 1463 static int 1464 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1465 { 1466 struct umtx_key key; 1467 uint32_t owner, old, id, newlock; 1468 int error, count; 1469 1470 id = td->td_tid; 1471 1472 again: 1473 /* 1474 * Make sure we own this mtx. 1475 */ 1476 error = fueword32(&m->m_owner, &owner); 1477 if (error == -1) 1478 return (EFAULT); 1479 1480 if ((owner & ~UMUTEX_CONTESTED) != id) 1481 return (EPERM); 1482 1483 newlock = umtx_unlock_val(flags, rb); 1484 if ((owner & UMUTEX_CONTESTED) == 0) { 1485 error = casueword32(&m->m_owner, owner, &old, newlock); 1486 if (error == -1) 1487 return (EFAULT); 1488 if (error == 1) { 1489 error = thread_check_susp(td, false); 1490 if (error != 0) 1491 return (error); 1492 goto again; 1493 } 1494 MPASS(old == owner); 1495 return (0); 1496 } 1497 1498 /* We should only ever be in here for contested locks */ 1499 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1500 &key)) != 0) 1501 return (error); 1502 1503 umtxq_lock(&key); 1504 umtxq_busy(&key); 1505 count = umtxq_count(&key); 1506 umtxq_unlock(&key); 1507 1508 /* 1509 * When unlocking the umtx, it must be marked as unowned if 1510 * there is zero or one thread only waiting for it. 1511 * Otherwise, it must be marked as contested. 1512 */ 1513 if (count > 1) 1514 newlock |= UMUTEX_CONTESTED; 1515 error = casueword32(&m->m_owner, owner, &old, newlock); 1516 umtxq_lock(&key); 1517 umtxq_signal(&key, 1); 1518 umtxq_unbusy(&key); 1519 umtxq_unlock(&key); 1520 umtx_key_release(&key); 1521 if (error == -1) 1522 return (EFAULT); 1523 if (error == 1) { 1524 if (old != owner) 1525 return (EINVAL); 1526 error = thread_check_susp(td, false); 1527 if (error != 0) 1528 return (error); 1529 goto again; 1530 } 1531 return (0); 1532 } 1533 1534 /* 1535 * Check if the mutex is available and wake up a waiter, 1536 * only for simple mutex. 1537 */ 1538 static int 1539 do_wake_umutex(struct thread *td, struct umutex *m) 1540 { 1541 struct umtx_key key; 1542 uint32_t owner; 1543 uint32_t flags; 1544 int error; 1545 int count; 1546 1547 again: 1548 error = fueword32(&m->m_owner, &owner); 1549 if (error == -1) 1550 return (EFAULT); 1551 1552 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1553 owner != UMUTEX_RB_NOTRECOV) 1554 return (0); 1555 1556 error = fueword32(&m->m_flags, &flags); 1557 if (error == -1) 1558 return (EFAULT); 1559 1560 /* We should only ever be in here for contested locks */ 1561 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1562 &key)) != 0) 1563 return (error); 1564 1565 umtxq_lock(&key); 1566 umtxq_busy(&key); 1567 count = umtxq_count(&key); 1568 umtxq_unlock(&key); 1569 1570 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1571 owner != UMUTEX_RB_NOTRECOV) { 1572 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1573 UMUTEX_UNOWNED); 1574 if (error == -1) { 1575 error = EFAULT; 1576 } else if (error == 1) { 1577 umtxq_lock(&key); 1578 umtxq_unbusy(&key); 1579 umtxq_unlock(&key); 1580 umtx_key_release(&key); 1581 error = thread_check_susp(td, false); 1582 if (error != 0) 1583 return (error); 1584 goto again; 1585 } 1586 } 1587 1588 umtxq_lock(&key); 1589 if (error == 0 && count != 0) { 1590 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1591 owner == UMUTEX_RB_OWNERDEAD || 1592 owner == UMUTEX_RB_NOTRECOV); 1593 umtxq_signal(&key, 1); 1594 } 1595 umtxq_unbusy(&key); 1596 umtxq_unlock(&key); 1597 umtx_key_release(&key); 1598 return (error); 1599 } 1600 1601 /* 1602 * Check if the mutex has waiters and tries to fix contention bit. 1603 */ 1604 static int 1605 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1606 { 1607 struct umtx_key key; 1608 uint32_t owner, old; 1609 int type; 1610 int error; 1611 int count; 1612 1613 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1614 UMUTEX_ROBUST)) { 1615 case 0: 1616 case UMUTEX_ROBUST: 1617 type = TYPE_NORMAL_UMUTEX; 1618 break; 1619 case UMUTEX_PRIO_INHERIT: 1620 type = TYPE_PI_UMUTEX; 1621 break; 1622 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1623 type = TYPE_PI_ROBUST_UMUTEX; 1624 break; 1625 case UMUTEX_PRIO_PROTECT: 1626 type = TYPE_PP_UMUTEX; 1627 break; 1628 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1629 type = TYPE_PP_ROBUST_UMUTEX; 1630 break; 1631 default: 1632 return (EINVAL); 1633 } 1634 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1635 return (error); 1636 1637 owner = 0; 1638 umtxq_lock(&key); 1639 umtxq_busy(&key); 1640 count = umtxq_count(&key); 1641 umtxq_unlock(&key); 1642 1643 error = fueword32(&m->m_owner, &owner); 1644 if (error == -1) 1645 error = EFAULT; 1646 1647 /* 1648 * Only repair contention bit if there is a waiter, this means 1649 * the mutex is still being referenced by userland code, 1650 * otherwise don't update any memory. 1651 */ 1652 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1653 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1654 error = casueword32(&m->m_owner, owner, &old, 1655 owner | UMUTEX_CONTESTED); 1656 if (error == -1) { 1657 error = EFAULT; 1658 break; 1659 } 1660 if (error == 0) { 1661 MPASS(old == owner); 1662 break; 1663 } 1664 owner = old; 1665 error = thread_check_susp(td, false); 1666 } 1667 1668 umtxq_lock(&key); 1669 if (error == EFAULT) { 1670 umtxq_signal(&key, INT_MAX); 1671 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1672 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1673 umtxq_signal(&key, 1); 1674 umtxq_unbusy(&key); 1675 umtxq_unlock(&key); 1676 umtx_key_release(&key); 1677 return (error); 1678 } 1679 1680 struct umtx_pi * 1681 umtx_pi_alloc(int flags) 1682 { 1683 struct umtx_pi *pi; 1684 1685 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1686 TAILQ_INIT(&pi->pi_blocked); 1687 atomic_add_int(&umtx_pi_allocated, 1); 1688 return (pi); 1689 } 1690 1691 void 1692 umtx_pi_free(struct umtx_pi *pi) 1693 { 1694 uma_zfree(umtx_pi_zone, pi); 1695 atomic_add_int(&umtx_pi_allocated, -1); 1696 } 1697 1698 /* 1699 * Adjust the thread's position on a pi_state after its priority has been 1700 * changed. 1701 */ 1702 static int 1703 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1704 { 1705 struct umtx_q *uq, *uq1, *uq2; 1706 struct thread *td1; 1707 1708 mtx_assert(&umtx_lock, MA_OWNED); 1709 if (pi == NULL) 1710 return (0); 1711 1712 uq = td->td_umtxq; 1713 1714 /* 1715 * Check if the thread needs to be moved on the blocked chain. 1716 * It needs to be moved if either its priority is lower than 1717 * the previous thread or higher than the next thread. 1718 */ 1719 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1720 uq2 = TAILQ_NEXT(uq, uq_lockq); 1721 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1722 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1723 /* 1724 * Remove thread from blocked chain and determine where 1725 * it should be moved to. 1726 */ 1727 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1728 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1729 td1 = uq1->uq_thread; 1730 MPASS(td1->td_proc->p_magic == P_MAGIC); 1731 if (UPRI(td1) > UPRI(td)) 1732 break; 1733 } 1734 1735 if (uq1 == NULL) 1736 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1737 else 1738 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1739 } 1740 return (1); 1741 } 1742 1743 static struct umtx_pi * 1744 umtx_pi_next(struct umtx_pi *pi) 1745 { 1746 struct umtx_q *uq_owner; 1747 1748 if (pi->pi_owner == NULL) 1749 return (NULL); 1750 uq_owner = pi->pi_owner->td_umtxq; 1751 if (uq_owner == NULL) 1752 return (NULL); 1753 return (uq_owner->uq_pi_blocked); 1754 } 1755 1756 /* 1757 * Floyd's Cycle-Finding Algorithm. 1758 */ 1759 static bool 1760 umtx_pi_check_loop(struct umtx_pi *pi) 1761 { 1762 struct umtx_pi *pi1; /* fast iterator */ 1763 1764 mtx_assert(&umtx_lock, MA_OWNED); 1765 if (pi == NULL) 1766 return (false); 1767 pi1 = pi; 1768 for (;;) { 1769 pi = umtx_pi_next(pi); 1770 if (pi == NULL) 1771 break; 1772 pi1 = umtx_pi_next(pi1); 1773 if (pi1 == NULL) 1774 break; 1775 pi1 = umtx_pi_next(pi1); 1776 if (pi1 == NULL) 1777 break; 1778 if (pi == pi1) 1779 return (true); 1780 } 1781 return (false); 1782 } 1783 1784 /* 1785 * Propagate priority when a thread is blocked on POSIX 1786 * PI mutex. 1787 */ 1788 static void 1789 umtx_propagate_priority(struct thread *td) 1790 { 1791 struct umtx_q *uq; 1792 struct umtx_pi *pi; 1793 int pri; 1794 1795 mtx_assert(&umtx_lock, MA_OWNED); 1796 pri = UPRI(td); 1797 uq = td->td_umtxq; 1798 pi = uq->uq_pi_blocked; 1799 if (pi == NULL) 1800 return; 1801 if (umtx_pi_check_loop(pi)) 1802 return; 1803 1804 for (;;) { 1805 td = pi->pi_owner; 1806 if (td == NULL || td == curthread) 1807 return; 1808 1809 MPASS(td->td_proc != NULL); 1810 MPASS(td->td_proc->p_magic == P_MAGIC); 1811 1812 thread_lock(td); 1813 if (td->td_lend_user_pri > pri) 1814 sched_lend_user_prio(td, pri); 1815 else { 1816 thread_unlock(td); 1817 break; 1818 } 1819 thread_unlock(td); 1820 1821 /* 1822 * Pick up the lock that td is blocked on. 1823 */ 1824 uq = td->td_umtxq; 1825 pi = uq->uq_pi_blocked; 1826 if (pi == NULL) 1827 break; 1828 /* Resort td on the list if needed. */ 1829 umtx_pi_adjust_thread(pi, td); 1830 } 1831 } 1832 1833 /* 1834 * Unpropagate priority for a PI mutex when a thread blocked on 1835 * it is interrupted by signal or resumed by others. 1836 */ 1837 static void 1838 umtx_repropagate_priority(struct umtx_pi *pi) 1839 { 1840 struct umtx_q *uq, *uq_owner; 1841 struct umtx_pi *pi2; 1842 int pri; 1843 1844 mtx_assert(&umtx_lock, MA_OWNED); 1845 1846 if (umtx_pi_check_loop(pi)) 1847 return; 1848 while (pi != NULL && pi->pi_owner != NULL) { 1849 pri = PRI_MAX; 1850 uq_owner = pi->pi_owner->td_umtxq; 1851 1852 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1853 uq = TAILQ_FIRST(&pi2->pi_blocked); 1854 if (uq != NULL) { 1855 if (pri > UPRI(uq->uq_thread)) 1856 pri = UPRI(uq->uq_thread); 1857 } 1858 } 1859 1860 if (pri > uq_owner->uq_inherited_pri) 1861 pri = uq_owner->uq_inherited_pri; 1862 thread_lock(pi->pi_owner); 1863 sched_lend_user_prio(pi->pi_owner, pri); 1864 thread_unlock(pi->pi_owner); 1865 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1866 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1867 } 1868 } 1869 1870 /* 1871 * Insert a PI mutex into owned list. 1872 */ 1873 static void 1874 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1875 { 1876 struct umtx_q *uq_owner; 1877 1878 uq_owner = owner->td_umtxq; 1879 mtx_assert(&umtx_lock, MA_OWNED); 1880 MPASS(pi->pi_owner == NULL); 1881 pi->pi_owner = owner; 1882 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1883 } 1884 1885 /* 1886 * Disown a PI mutex, and remove it from the owned list. 1887 */ 1888 static void 1889 umtx_pi_disown(struct umtx_pi *pi) 1890 { 1891 1892 mtx_assert(&umtx_lock, MA_OWNED); 1893 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1894 pi->pi_owner = NULL; 1895 } 1896 1897 /* 1898 * Claim ownership of a PI mutex. 1899 */ 1900 int 1901 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1902 { 1903 struct umtx_q *uq; 1904 int pri; 1905 1906 mtx_lock(&umtx_lock); 1907 if (pi->pi_owner == owner) { 1908 mtx_unlock(&umtx_lock); 1909 return (0); 1910 } 1911 1912 if (pi->pi_owner != NULL) { 1913 /* 1914 * userland may have already messed the mutex, sigh. 1915 */ 1916 mtx_unlock(&umtx_lock); 1917 return (EPERM); 1918 } 1919 umtx_pi_setowner(pi, owner); 1920 uq = TAILQ_FIRST(&pi->pi_blocked); 1921 if (uq != NULL) { 1922 pri = UPRI(uq->uq_thread); 1923 thread_lock(owner); 1924 if (pri < UPRI(owner)) 1925 sched_lend_user_prio(owner, pri); 1926 thread_unlock(owner); 1927 } 1928 mtx_unlock(&umtx_lock); 1929 return (0); 1930 } 1931 1932 /* 1933 * Adjust a thread's order position in its blocked PI mutex, 1934 * this may result new priority propagating process. 1935 */ 1936 void 1937 umtx_pi_adjust(struct thread *td, u_char oldpri) 1938 { 1939 struct umtx_q *uq; 1940 struct umtx_pi *pi; 1941 1942 uq = td->td_umtxq; 1943 mtx_lock(&umtx_lock); 1944 /* 1945 * Pick up the lock that td is blocked on. 1946 */ 1947 pi = uq->uq_pi_blocked; 1948 if (pi != NULL) { 1949 umtx_pi_adjust_thread(pi, td); 1950 umtx_repropagate_priority(pi); 1951 } 1952 mtx_unlock(&umtx_lock); 1953 } 1954 1955 /* 1956 * Sleep on a PI mutex. 1957 */ 1958 int 1959 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1960 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1961 { 1962 struct thread *td, *td1; 1963 struct umtx_q *uq1; 1964 int error, pri; 1965 #ifdef INVARIANTS 1966 struct umtxq_chain *uc; 1967 1968 uc = umtxq_getchain(&pi->pi_key); 1969 #endif 1970 error = 0; 1971 td = uq->uq_thread; 1972 KASSERT(td == curthread, ("inconsistent uq_thread")); 1973 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1974 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1975 umtxq_insert(uq); 1976 mtx_lock(&umtx_lock); 1977 if (pi->pi_owner == NULL) { 1978 mtx_unlock(&umtx_lock); 1979 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1980 mtx_lock(&umtx_lock); 1981 if (td1 != NULL) { 1982 if (pi->pi_owner == NULL) 1983 umtx_pi_setowner(pi, td1); 1984 PROC_UNLOCK(td1->td_proc); 1985 } 1986 } 1987 1988 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1989 pri = UPRI(uq1->uq_thread); 1990 if (pri > UPRI(td)) 1991 break; 1992 } 1993 1994 if (uq1 != NULL) 1995 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1996 else 1997 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1998 1999 uq->uq_pi_blocked = pi; 2000 thread_lock(td); 2001 td->td_flags |= TDF_UPIBLOCKED; 2002 thread_unlock(td); 2003 umtx_propagate_priority(td); 2004 mtx_unlock(&umtx_lock); 2005 umtxq_unbusy(&uq->uq_key); 2006 2007 error = umtxq_sleep(uq, wmesg, timo); 2008 umtxq_remove(uq); 2009 2010 mtx_lock(&umtx_lock); 2011 uq->uq_pi_blocked = NULL; 2012 thread_lock(td); 2013 td->td_flags &= ~TDF_UPIBLOCKED; 2014 thread_unlock(td); 2015 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2016 umtx_repropagate_priority(pi); 2017 mtx_unlock(&umtx_lock); 2018 umtxq_unlock(&uq->uq_key); 2019 2020 return (error); 2021 } 2022 2023 /* 2024 * Add reference count for a PI mutex. 2025 */ 2026 void 2027 umtx_pi_ref(struct umtx_pi *pi) 2028 { 2029 2030 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2031 pi->pi_refcount++; 2032 } 2033 2034 /* 2035 * Decrease reference count for a PI mutex, if the counter 2036 * is decreased to zero, its memory space is freed. 2037 */ 2038 void 2039 umtx_pi_unref(struct umtx_pi *pi) 2040 { 2041 struct umtxq_chain *uc; 2042 2043 uc = umtxq_getchain(&pi->pi_key); 2044 UMTXQ_LOCKED_ASSERT(uc); 2045 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2046 if (--pi->pi_refcount == 0) { 2047 mtx_lock(&umtx_lock); 2048 if (pi->pi_owner != NULL) 2049 umtx_pi_disown(pi); 2050 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2051 ("blocked queue not empty")); 2052 mtx_unlock(&umtx_lock); 2053 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2054 umtx_pi_free(pi); 2055 } 2056 } 2057 2058 /* 2059 * Find a PI mutex in hash table. 2060 */ 2061 struct umtx_pi * 2062 umtx_pi_lookup(struct umtx_key *key) 2063 { 2064 struct umtxq_chain *uc; 2065 struct umtx_pi *pi; 2066 2067 uc = umtxq_getchain(key); 2068 UMTXQ_LOCKED_ASSERT(uc); 2069 2070 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2071 if (umtx_key_match(&pi->pi_key, key)) { 2072 return (pi); 2073 } 2074 } 2075 return (NULL); 2076 } 2077 2078 /* 2079 * Insert a PI mutex into hash table. 2080 */ 2081 void 2082 umtx_pi_insert(struct umtx_pi *pi) 2083 { 2084 struct umtxq_chain *uc; 2085 2086 uc = umtxq_getchain(&pi->pi_key); 2087 UMTXQ_LOCKED_ASSERT(uc); 2088 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2089 } 2090 2091 /* 2092 * Drop a PI mutex and wakeup a top waiter. 2093 */ 2094 int 2095 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2096 { 2097 struct umtx_q *uq_first, *uq_first2, *uq_me; 2098 struct umtx_pi *pi, *pi2; 2099 int pri; 2100 2101 UMTXQ_ASSERT_LOCKED_BUSY(key); 2102 *count = umtxq_count_pi(key, &uq_first); 2103 if (uq_first != NULL) { 2104 mtx_lock(&umtx_lock); 2105 pi = uq_first->uq_pi_blocked; 2106 KASSERT(pi != NULL, ("pi == NULL?")); 2107 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2108 mtx_unlock(&umtx_lock); 2109 /* userland messed the mutex */ 2110 return (EPERM); 2111 } 2112 uq_me = td->td_umtxq; 2113 if (pi->pi_owner == td) 2114 umtx_pi_disown(pi); 2115 /* get highest priority thread which is still sleeping. */ 2116 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2117 while (uq_first != NULL && 2118 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2119 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2120 } 2121 pri = PRI_MAX; 2122 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2123 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2124 if (uq_first2 != NULL) { 2125 if (pri > UPRI(uq_first2->uq_thread)) 2126 pri = UPRI(uq_first2->uq_thread); 2127 } 2128 } 2129 thread_lock(td); 2130 sched_lend_user_prio(td, pri); 2131 thread_unlock(td); 2132 mtx_unlock(&umtx_lock); 2133 if (uq_first) 2134 umtxq_signal_thread(uq_first); 2135 } else { 2136 pi = umtx_pi_lookup(key); 2137 /* 2138 * A umtx_pi can exist if a signal or timeout removed the 2139 * last waiter from the umtxq, but there is still 2140 * a thread in do_lock_pi() holding the umtx_pi. 2141 */ 2142 if (pi != NULL) { 2143 /* 2144 * The umtx_pi can be unowned, such as when a thread 2145 * has just entered do_lock_pi(), allocated the 2146 * umtx_pi, and unlocked the umtxq. 2147 * If the current thread owns it, it must disown it. 2148 */ 2149 mtx_lock(&umtx_lock); 2150 if (pi->pi_owner == td) 2151 umtx_pi_disown(pi); 2152 mtx_unlock(&umtx_lock); 2153 } 2154 } 2155 return (0); 2156 } 2157 2158 /* 2159 * Lock a PI mutex. 2160 */ 2161 static int 2162 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2163 struct _umtx_time *timeout, int try) 2164 { 2165 struct umtx_abs_timeout timo; 2166 struct umtx_q *uq; 2167 struct umtx_pi *pi, *new_pi; 2168 uint32_t id, old_owner, owner, old; 2169 int error, rv; 2170 2171 id = td->td_tid; 2172 uq = td->td_umtxq; 2173 2174 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2175 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2176 &uq->uq_key)) != 0) 2177 return (error); 2178 2179 if (timeout != NULL) 2180 umtx_abs_timeout_init2(&timo, timeout); 2181 2182 umtxq_lock(&uq->uq_key); 2183 pi = umtx_pi_lookup(&uq->uq_key); 2184 if (pi == NULL) { 2185 new_pi = umtx_pi_alloc(M_NOWAIT); 2186 if (new_pi == NULL) { 2187 umtxq_unlock(&uq->uq_key); 2188 new_pi = umtx_pi_alloc(M_WAITOK); 2189 umtxq_lock(&uq->uq_key); 2190 pi = umtx_pi_lookup(&uq->uq_key); 2191 if (pi != NULL) { 2192 umtx_pi_free(new_pi); 2193 new_pi = NULL; 2194 } 2195 } 2196 if (new_pi != NULL) { 2197 new_pi->pi_key = uq->uq_key; 2198 umtx_pi_insert(new_pi); 2199 pi = new_pi; 2200 } 2201 } 2202 umtx_pi_ref(pi); 2203 umtxq_unlock(&uq->uq_key); 2204 2205 /* 2206 * Care must be exercised when dealing with umtx structure. It 2207 * can fault on any access. 2208 */ 2209 for (;;) { 2210 /* 2211 * Try the uncontested case. This should be done in userland. 2212 */ 2213 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2214 /* The address was invalid. */ 2215 if (rv == -1) { 2216 error = EFAULT; 2217 break; 2218 } 2219 /* The acquire succeeded. */ 2220 if (rv == 0) { 2221 MPASS(owner == UMUTEX_UNOWNED); 2222 error = 0; 2223 break; 2224 } 2225 2226 if (owner == UMUTEX_RB_NOTRECOV) { 2227 error = ENOTRECOVERABLE; 2228 break; 2229 } 2230 2231 /* 2232 * Avoid overwriting a possible error from sleep due 2233 * to the pending signal with suspension check result. 2234 */ 2235 if (error == 0) { 2236 error = thread_check_susp(td, true); 2237 if (error != 0) 2238 break; 2239 } 2240 2241 /* If no one owns it but it is contested try to acquire it. */ 2242 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2243 old_owner = owner; 2244 rv = casueword32(&m->m_owner, owner, &owner, 2245 id | UMUTEX_CONTESTED); 2246 /* The address was invalid. */ 2247 if (rv == -1) { 2248 error = EFAULT; 2249 break; 2250 } 2251 if (rv == 1) { 2252 if (error == 0) { 2253 error = thread_check_susp(td, true); 2254 if (error != 0) 2255 break; 2256 } 2257 2258 /* 2259 * If this failed the lock could 2260 * changed, restart. 2261 */ 2262 continue; 2263 } 2264 2265 MPASS(rv == 0); 2266 MPASS(owner == old_owner); 2267 umtxq_lock(&uq->uq_key); 2268 umtxq_busy(&uq->uq_key); 2269 error = umtx_pi_claim(pi, td); 2270 umtxq_unbusy(&uq->uq_key); 2271 umtxq_unlock(&uq->uq_key); 2272 if (error != 0) { 2273 /* 2274 * Since we're going to return an 2275 * error, restore the m_owner to its 2276 * previous, unowned state to avoid 2277 * compounding the problem. 2278 */ 2279 (void)casuword32(&m->m_owner, 2280 id | UMUTEX_CONTESTED, old_owner); 2281 } 2282 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2283 error = EOWNERDEAD; 2284 break; 2285 } 2286 2287 if ((owner & ~UMUTEX_CONTESTED) == id) { 2288 error = EDEADLK; 2289 break; 2290 } 2291 2292 if (try != 0) { 2293 error = EBUSY; 2294 break; 2295 } 2296 2297 /* 2298 * If we caught a signal, we have retried and now 2299 * exit immediately. 2300 */ 2301 if (error != 0) 2302 break; 2303 2304 umtxq_lock(&uq->uq_key); 2305 umtxq_busy(&uq->uq_key); 2306 umtxq_unlock(&uq->uq_key); 2307 2308 /* 2309 * Set the contested bit so that a release in user space 2310 * knows to use the system call for unlock. If this fails 2311 * either some one else has acquired the lock or it has been 2312 * released. 2313 */ 2314 rv = casueword32(&m->m_owner, owner, &old, owner | 2315 UMUTEX_CONTESTED); 2316 2317 /* The address was invalid. */ 2318 if (rv == -1) { 2319 umtxq_unbusy_unlocked(&uq->uq_key); 2320 error = EFAULT; 2321 break; 2322 } 2323 if (rv == 1) { 2324 umtxq_unbusy_unlocked(&uq->uq_key); 2325 error = thread_check_susp(td, true); 2326 if (error != 0) 2327 break; 2328 2329 /* 2330 * The lock changed and we need to retry or we 2331 * lost a race to the thread unlocking the 2332 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2333 * value for owner is impossible there. 2334 */ 2335 continue; 2336 } 2337 2338 umtxq_lock(&uq->uq_key); 2339 2340 /* We set the contested bit, sleep. */ 2341 MPASS(old == owner); 2342 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2343 "umtxpi", timeout == NULL ? NULL : &timo, 2344 (flags & USYNC_PROCESS_SHARED) != 0); 2345 if (error != 0) 2346 continue; 2347 2348 error = thread_check_susp(td, false); 2349 if (error != 0) 2350 break; 2351 } 2352 2353 umtxq_lock(&uq->uq_key); 2354 umtx_pi_unref(pi); 2355 umtxq_unlock(&uq->uq_key); 2356 2357 umtx_key_release(&uq->uq_key); 2358 return (error); 2359 } 2360 2361 /* 2362 * Unlock a PI mutex. 2363 */ 2364 static int 2365 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2366 { 2367 struct umtx_key key; 2368 uint32_t id, new_owner, old, owner; 2369 int count, error; 2370 2371 id = td->td_tid; 2372 2373 usrloop: 2374 /* 2375 * Make sure we own this mtx. 2376 */ 2377 error = fueword32(&m->m_owner, &owner); 2378 if (error == -1) 2379 return (EFAULT); 2380 2381 if ((owner & ~UMUTEX_CONTESTED) != id) 2382 return (EPERM); 2383 2384 new_owner = umtx_unlock_val(flags, rb); 2385 2386 /* This should be done in userland */ 2387 if ((owner & UMUTEX_CONTESTED) == 0) { 2388 error = casueword32(&m->m_owner, owner, &old, new_owner); 2389 if (error == -1) 2390 return (EFAULT); 2391 if (error == 1) { 2392 error = thread_check_susp(td, true); 2393 if (error != 0) 2394 return (error); 2395 goto usrloop; 2396 } 2397 if (old == owner) 2398 return (0); 2399 owner = old; 2400 } 2401 2402 /* We should only ever be in here for contested locks */ 2403 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2404 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2405 &key)) != 0) 2406 return (error); 2407 2408 umtxq_lock(&key); 2409 umtxq_busy(&key); 2410 error = umtx_pi_drop(td, &key, rb, &count); 2411 if (error != 0) { 2412 umtxq_unbusy(&key); 2413 umtxq_unlock(&key); 2414 umtx_key_release(&key); 2415 /* userland messed the mutex */ 2416 return (error); 2417 } 2418 umtxq_unlock(&key); 2419 2420 /* 2421 * When unlocking the umtx, it must be marked as unowned if 2422 * there is zero or one thread only waiting for it. 2423 * Otherwise, it must be marked as contested. 2424 */ 2425 2426 if (count > 1) 2427 new_owner |= UMUTEX_CONTESTED; 2428 again: 2429 error = casueword32(&m->m_owner, owner, &old, new_owner); 2430 if (error == 1) { 2431 error = thread_check_susp(td, false); 2432 if (error == 0) 2433 goto again; 2434 } 2435 umtxq_unbusy_unlocked(&key); 2436 umtx_key_release(&key); 2437 if (error == -1) 2438 return (EFAULT); 2439 if (error == 0 && old != owner) 2440 return (EINVAL); 2441 return (error); 2442 } 2443 2444 /* 2445 * Lock a PP mutex. 2446 */ 2447 static int 2448 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2449 struct _umtx_time *timeout, int try) 2450 { 2451 struct umtx_abs_timeout timo; 2452 struct umtx_q *uq, *uq2; 2453 struct umtx_pi *pi; 2454 uint32_t ceiling; 2455 uint32_t owner, id; 2456 int error, pri, old_inherited_pri, su, rv; 2457 2458 id = td->td_tid; 2459 uq = td->td_umtxq; 2460 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2461 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2462 &uq->uq_key)) != 0) 2463 return (error); 2464 2465 if (timeout != NULL) 2466 umtx_abs_timeout_init2(&timo, timeout); 2467 2468 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2469 for (;;) { 2470 old_inherited_pri = uq->uq_inherited_pri; 2471 umtxq_lock(&uq->uq_key); 2472 umtxq_busy(&uq->uq_key); 2473 umtxq_unlock(&uq->uq_key); 2474 2475 rv = fueword32(&m->m_ceilings[0], &ceiling); 2476 if (rv == -1) { 2477 error = EFAULT; 2478 goto out; 2479 } 2480 ceiling = RTP_PRIO_MAX - ceiling; 2481 if (ceiling > RTP_PRIO_MAX) { 2482 error = EINVAL; 2483 goto out; 2484 } 2485 2486 mtx_lock(&umtx_lock); 2487 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2488 mtx_unlock(&umtx_lock); 2489 error = EINVAL; 2490 goto out; 2491 } 2492 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2493 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2494 thread_lock(td); 2495 if (uq->uq_inherited_pri < UPRI(td)) 2496 sched_lend_user_prio(td, uq->uq_inherited_pri); 2497 thread_unlock(td); 2498 } 2499 mtx_unlock(&umtx_lock); 2500 2501 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2502 id | UMUTEX_CONTESTED); 2503 /* The address was invalid. */ 2504 if (rv == -1) { 2505 error = EFAULT; 2506 break; 2507 } 2508 if (rv == 0) { 2509 MPASS(owner == UMUTEX_CONTESTED); 2510 error = 0; 2511 break; 2512 } 2513 /* rv == 1 */ 2514 if (owner == UMUTEX_RB_OWNERDEAD) { 2515 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2516 &owner, id | UMUTEX_CONTESTED); 2517 if (rv == -1) { 2518 error = EFAULT; 2519 break; 2520 } 2521 if (rv == 0) { 2522 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2523 error = EOWNERDEAD; /* success */ 2524 break; 2525 } 2526 2527 /* 2528 * rv == 1, only check for suspension if we 2529 * did not already catched a signal. If we 2530 * get an error from the check, the same 2531 * condition is checked by the umtxq_sleep() 2532 * call below, so we should obliterate the 2533 * error to not skip the last loop iteration. 2534 */ 2535 if (error == 0) { 2536 error = thread_check_susp(td, false); 2537 if (error == 0) { 2538 if (try != 0) 2539 error = EBUSY; 2540 else 2541 continue; 2542 } 2543 error = 0; 2544 } 2545 } else if (owner == UMUTEX_RB_NOTRECOV) { 2546 error = ENOTRECOVERABLE; 2547 } 2548 2549 if (try != 0) 2550 error = EBUSY; 2551 2552 /* 2553 * If we caught a signal, we have retried and now 2554 * exit immediately. 2555 */ 2556 if (error != 0) 2557 break; 2558 2559 umtxq_lock(&uq->uq_key); 2560 umtxq_insert(uq); 2561 umtxq_unbusy(&uq->uq_key); 2562 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2563 NULL : &timo); 2564 umtxq_remove(uq); 2565 umtxq_unlock(&uq->uq_key); 2566 2567 mtx_lock(&umtx_lock); 2568 uq->uq_inherited_pri = old_inherited_pri; 2569 pri = PRI_MAX; 2570 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2571 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2572 if (uq2 != NULL) { 2573 if (pri > UPRI(uq2->uq_thread)) 2574 pri = UPRI(uq2->uq_thread); 2575 } 2576 } 2577 if (pri > uq->uq_inherited_pri) 2578 pri = uq->uq_inherited_pri; 2579 thread_lock(td); 2580 sched_lend_user_prio(td, pri); 2581 thread_unlock(td); 2582 mtx_unlock(&umtx_lock); 2583 } 2584 2585 if (error != 0 && error != EOWNERDEAD) { 2586 mtx_lock(&umtx_lock); 2587 uq->uq_inherited_pri = old_inherited_pri; 2588 pri = PRI_MAX; 2589 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2590 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2591 if (uq2 != NULL) { 2592 if (pri > UPRI(uq2->uq_thread)) 2593 pri = UPRI(uq2->uq_thread); 2594 } 2595 } 2596 if (pri > uq->uq_inherited_pri) 2597 pri = uq->uq_inherited_pri; 2598 thread_lock(td); 2599 sched_lend_user_prio(td, pri); 2600 thread_unlock(td); 2601 mtx_unlock(&umtx_lock); 2602 } 2603 2604 out: 2605 umtxq_unbusy_unlocked(&uq->uq_key); 2606 umtx_key_release(&uq->uq_key); 2607 return (error); 2608 } 2609 2610 /* 2611 * Unlock a PP mutex. 2612 */ 2613 static int 2614 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2615 { 2616 struct umtx_key key; 2617 struct umtx_q *uq, *uq2; 2618 struct umtx_pi *pi; 2619 uint32_t id, owner, rceiling; 2620 int error, pri, new_inherited_pri, su; 2621 2622 id = td->td_tid; 2623 uq = td->td_umtxq; 2624 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2625 2626 /* 2627 * Make sure we own this mtx. 2628 */ 2629 error = fueword32(&m->m_owner, &owner); 2630 if (error == -1) 2631 return (EFAULT); 2632 2633 if ((owner & ~UMUTEX_CONTESTED) != id) 2634 return (EPERM); 2635 2636 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2637 if (error != 0) 2638 return (error); 2639 2640 if (rceiling == -1) 2641 new_inherited_pri = PRI_MAX; 2642 else { 2643 rceiling = RTP_PRIO_MAX - rceiling; 2644 if (rceiling > RTP_PRIO_MAX) 2645 return (EINVAL); 2646 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2647 } 2648 2649 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2650 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2651 &key)) != 0) 2652 return (error); 2653 umtxq_lock(&key); 2654 umtxq_busy(&key); 2655 umtxq_unlock(&key); 2656 /* 2657 * For priority protected mutex, always set unlocked state 2658 * to UMUTEX_CONTESTED, so that userland always enters kernel 2659 * to lock the mutex, it is necessary because thread priority 2660 * has to be adjusted for such mutex. 2661 */ 2662 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2663 UMUTEX_CONTESTED); 2664 2665 umtxq_lock(&key); 2666 if (error == 0) 2667 umtxq_signal(&key, 1); 2668 umtxq_unbusy(&key); 2669 umtxq_unlock(&key); 2670 2671 if (error == -1) 2672 error = EFAULT; 2673 else { 2674 mtx_lock(&umtx_lock); 2675 if (su != 0) 2676 uq->uq_inherited_pri = new_inherited_pri; 2677 pri = PRI_MAX; 2678 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2679 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2680 if (uq2 != NULL) { 2681 if (pri > UPRI(uq2->uq_thread)) 2682 pri = UPRI(uq2->uq_thread); 2683 } 2684 } 2685 if (pri > uq->uq_inherited_pri) 2686 pri = uq->uq_inherited_pri; 2687 thread_lock(td); 2688 sched_lend_user_prio(td, pri); 2689 thread_unlock(td); 2690 mtx_unlock(&umtx_lock); 2691 } 2692 umtx_key_release(&key); 2693 return (error); 2694 } 2695 2696 static int 2697 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2698 uint32_t *old_ceiling) 2699 { 2700 struct umtx_q *uq; 2701 uint32_t flags, id, owner, save_ceiling; 2702 int error, rv, rv1; 2703 2704 error = fueword32(&m->m_flags, &flags); 2705 if (error == -1) 2706 return (EFAULT); 2707 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2708 return (EINVAL); 2709 if (ceiling > RTP_PRIO_MAX) 2710 return (EINVAL); 2711 id = td->td_tid; 2712 uq = td->td_umtxq; 2713 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2714 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2715 &uq->uq_key)) != 0) 2716 return (error); 2717 for (;;) { 2718 umtxq_lock(&uq->uq_key); 2719 umtxq_busy(&uq->uq_key); 2720 umtxq_unlock(&uq->uq_key); 2721 2722 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2723 if (rv == -1) { 2724 error = EFAULT; 2725 break; 2726 } 2727 2728 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2729 id | UMUTEX_CONTESTED); 2730 if (rv == -1) { 2731 error = EFAULT; 2732 break; 2733 } 2734 2735 if (rv == 0) { 2736 MPASS(owner == UMUTEX_CONTESTED); 2737 rv = suword32(&m->m_ceilings[0], ceiling); 2738 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2739 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2740 break; 2741 } 2742 2743 if ((owner & ~UMUTEX_CONTESTED) == id) { 2744 rv = suword32(&m->m_ceilings[0], ceiling); 2745 error = rv == 0 ? 0 : EFAULT; 2746 break; 2747 } 2748 2749 if (owner == UMUTEX_RB_OWNERDEAD) { 2750 error = EOWNERDEAD; 2751 break; 2752 } else if (owner == UMUTEX_RB_NOTRECOV) { 2753 error = ENOTRECOVERABLE; 2754 break; 2755 } 2756 2757 /* 2758 * If we caught a signal, we have retried and now 2759 * exit immediately. 2760 */ 2761 if (error != 0) 2762 break; 2763 2764 /* 2765 * We set the contested bit, sleep. Otherwise the lock changed 2766 * and we need to retry or we lost a race to the thread 2767 * unlocking the umtx. 2768 */ 2769 umtxq_lock(&uq->uq_key); 2770 umtxq_insert(uq); 2771 umtxq_unbusy(&uq->uq_key); 2772 error = umtxq_sleep(uq, "umtxpp", NULL); 2773 umtxq_remove(uq); 2774 umtxq_unlock(&uq->uq_key); 2775 } 2776 umtxq_lock(&uq->uq_key); 2777 if (error == 0) 2778 umtxq_signal(&uq->uq_key, INT_MAX); 2779 umtxq_unbusy(&uq->uq_key); 2780 umtxq_unlock(&uq->uq_key); 2781 umtx_key_release(&uq->uq_key); 2782 if (error == 0 && old_ceiling != NULL) { 2783 rv = suword32(old_ceiling, save_ceiling); 2784 error = rv == 0 ? 0 : EFAULT; 2785 } 2786 return (error); 2787 } 2788 2789 /* 2790 * Lock a userland POSIX mutex. 2791 */ 2792 static int 2793 do_lock_umutex(struct thread *td, struct umutex *m, 2794 struct _umtx_time *timeout, int mode) 2795 { 2796 uint32_t flags; 2797 int error; 2798 2799 error = fueword32(&m->m_flags, &flags); 2800 if (error == -1) 2801 return (EFAULT); 2802 2803 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2804 case 0: 2805 error = do_lock_normal(td, m, flags, timeout, mode); 2806 break; 2807 case UMUTEX_PRIO_INHERIT: 2808 error = do_lock_pi(td, m, flags, timeout, mode); 2809 break; 2810 case UMUTEX_PRIO_PROTECT: 2811 error = do_lock_pp(td, m, flags, timeout, mode); 2812 break; 2813 default: 2814 return (EINVAL); 2815 } 2816 if (timeout == NULL) { 2817 if (error == EINTR && mode != _UMUTEX_WAIT) 2818 error = ERESTART; 2819 } else { 2820 /* Timed-locking is not restarted. */ 2821 if (error == ERESTART) 2822 error = EINTR; 2823 } 2824 return (error); 2825 } 2826 2827 /* 2828 * Unlock a userland POSIX mutex. 2829 */ 2830 static int 2831 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2832 { 2833 uint32_t flags; 2834 int error; 2835 2836 error = fueword32(&m->m_flags, &flags); 2837 if (error == -1) 2838 return (EFAULT); 2839 2840 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2841 case 0: 2842 return (do_unlock_normal(td, m, flags, rb)); 2843 case UMUTEX_PRIO_INHERIT: 2844 return (do_unlock_pi(td, m, flags, rb)); 2845 case UMUTEX_PRIO_PROTECT: 2846 return (do_unlock_pp(td, m, flags, rb)); 2847 } 2848 2849 return (EINVAL); 2850 } 2851 2852 static int 2853 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2854 struct timespec *timeout, u_long wflags) 2855 { 2856 struct umtx_abs_timeout timo; 2857 struct umtx_q *uq; 2858 uint32_t flags, clockid, hasw; 2859 int error; 2860 2861 uq = td->td_umtxq; 2862 error = fueword32(&cv->c_flags, &flags); 2863 if (error == -1) 2864 return (EFAULT); 2865 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2866 if (error != 0) 2867 return (error); 2868 2869 if ((wflags & CVWAIT_CLOCKID) != 0) { 2870 error = fueword32(&cv->c_clockid, &clockid); 2871 if (error == -1) { 2872 umtx_key_release(&uq->uq_key); 2873 return (EFAULT); 2874 } 2875 if (clockid < CLOCK_REALTIME || 2876 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2877 /* hmm, only HW clock id will work. */ 2878 umtx_key_release(&uq->uq_key); 2879 return (EINVAL); 2880 } 2881 } else { 2882 clockid = CLOCK_REALTIME; 2883 } 2884 2885 umtxq_lock(&uq->uq_key); 2886 umtxq_busy(&uq->uq_key); 2887 umtxq_insert(uq); 2888 umtxq_unlock(&uq->uq_key); 2889 2890 /* 2891 * Set c_has_waiters to 1 before releasing user mutex, also 2892 * don't modify cache line when unnecessary. 2893 */ 2894 error = fueword32(&cv->c_has_waiters, &hasw); 2895 if (error == 0 && hasw == 0) 2896 suword32(&cv->c_has_waiters, 1); 2897 2898 umtxq_unbusy_unlocked(&uq->uq_key); 2899 2900 error = do_unlock_umutex(td, m, false); 2901 2902 if (timeout != NULL) 2903 umtx_abs_timeout_init(&timo, clockid, 2904 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2905 2906 umtxq_lock(&uq->uq_key); 2907 if (error == 0) { 2908 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2909 NULL : &timo); 2910 } 2911 2912 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2913 error = 0; 2914 else { 2915 /* 2916 * This must be timeout,interrupted by signal or 2917 * surprious wakeup, clear c_has_waiter flag when 2918 * necessary. 2919 */ 2920 umtxq_busy(&uq->uq_key); 2921 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2922 int oldlen = uq->uq_cur_queue->length; 2923 umtxq_remove(uq); 2924 if (oldlen == 1) { 2925 umtxq_unlock(&uq->uq_key); 2926 suword32(&cv->c_has_waiters, 0); 2927 umtxq_lock(&uq->uq_key); 2928 } 2929 } 2930 umtxq_unbusy(&uq->uq_key); 2931 if (error == ERESTART) 2932 error = EINTR; 2933 } 2934 2935 umtxq_unlock(&uq->uq_key); 2936 umtx_key_release(&uq->uq_key); 2937 return (error); 2938 } 2939 2940 /* 2941 * Signal a userland condition variable. 2942 */ 2943 static int 2944 do_cv_signal(struct thread *td, struct ucond *cv) 2945 { 2946 struct umtx_key key; 2947 int error, cnt, nwake; 2948 uint32_t flags; 2949 2950 error = fueword32(&cv->c_flags, &flags); 2951 if (error == -1) 2952 return (EFAULT); 2953 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2954 return (error); 2955 umtxq_lock(&key); 2956 umtxq_busy(&key); 2957 cnt = umtxq_count(&key); 2958 nwake = umtxq_signal(&key, 1); 2959 if (cnt <= nwake) { 2960 umtxq_unlock(&key); 2961 error = suword32(&cv->c_has_waiters, 0); 2962 if (error == -1) 2963 error = EFAULT; 2964 umtxq_lock(&key); 2965 } 2966 umtxq_unbusy(&key); 2967 umtxq_unlock(&key); 2968 umtx_key_release(&key); 2969 return (error); 2970 } 2971 2972 static int 2973 do_cv_broadcast(struct thread *td, struct ucond *cv) 2974 { 2975 struct umtx_key key; 2976 int error; 2977 uint32_t flags; 2978 2979 error = fueword32(&cv->c_flags, &flags); 2980 if (error == -1) 2981 return (EFAULT); 2982 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2983 return (error); 2984 2985 umtxq_lock(&key); 2986 umtxq_busy(&key); 2987 umtxq_signal(&key, INT_MAX); 2988 umtxq_unlock(&key); 2989 2990 error = suword32(&cv->c_has_waiters, 0); 2991 if (error == -1) 2992 error = EFAULT; 2993 2994 umtxq_unbusy_unlocked(&key); 2995 2996 umtx_key_release(&key); 2997 return (error); 2998 } 2999 3000 static int 3001 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3002 struct _umtx_time *timeout) 3003 { 3004 struct umtx_abs_timeout timo; 3005 struct umtx_q *uq; 3006 uint32_t flags, wrflags; 3007 int32_t state, oldstate; 3008 int32_t blocked_readers; 3009 int error, error1, rv; 3010 3011 uq = td->td_umtxq; 3012 error = fueword32(&rwlock->rw_flags, &flags); 3013 if (error == -1) 3014 return (EFAULT); 3015 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3016 if (error != 0) 3017 return (error); 3018 3019 if (timeout != NULL) 3020 umtx_abs_timeout_init2(&timo, timeout); 3021 3022 wrflags = URWLOCK_WRITE_OWNER; 3023 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3024 wrflags |= URWLOCK_WRITE_WAITERS; 3025 3026 for (;;) { 3027 rv = fueword32(&rwlock->rw_state, &state); 3028 if (rv == -1) { 3029 umtx_key_release(&uq->uq_key); 3030 return (EFAULT); 3031 } 3032 3033 /* try to lock it */ 3034 while (!(state & wrflags)) { 3035 if (__predict_false(URWLOCK_READER_COUNT(state) == 3036 URWLOCK_MAX_READERS)) { 3037 umtx_key_release(&uq->uq_key); 3038 return (EAGAIN); 3039 } 3040 rv = casueword32(&rwlock->rw_state, state, 3041 &oldstate, state + 1); 3042 if (rv == -1) { 3043 umtx_key_release(&uq->uq_key); 3044 return (EFAULT); 3045 } 3046 if (rv == 0) { 3047 MPASS(oldstate == state); 3048 umtx_key_release(&uq->uq_key); 3049 return (0); 3050 } 3051 error = thread_check_susp(td, true); 3052 if (error != 0) 3053 break; 3054 state = oldstate; 3055 } 3056 3057 if (error) 3058 break; 3059 3060 /* grab monitor lock */ 3061 umtxq_lock(&uq->uq_key); 3062 umtxq_busy(&uq->uq_key); 3063 umtxq_unlock(&uq->uq_key); 3064 3065 /* 3066 * re-read the state, in case it changed between the try-lock above 3067 * and the check below 3068 */ 3069 rv = fueword32(&rwlock->rw_state, &state); 3070 if (rv == -1) 3071 error = EFAULT; 3072 3073 /* set read contention bit */ 3074 while (error == 0 && (state & wrflags) && 3075 !(state & URWLOCK_READ_WAITERS)) { 3076 rv = casueword32(&rwlock->rw_state, state, 3077 &oldstate, state | URWLOCK_READ_WAITERS); 3078 if (rv == -1) { 3079 error = EFAULT; 3080 break; 3081 } 3082 if (rv == 0) { 3083 MPASS(oldstate == state); 3084 goto sleep; 3085 } 3086 state = oldstate; 3087 error = thread_check_susp(td, false); 3088 if (error != 0) 3089 break; 3090 } 3091 if (error != 0) { 3092 umtxq_unbusy_unlocked(&uq->uq_key); 3093 break; 3094 } 3095 3096 /* state is changed while setting flags, restart */ 3097 if (!(state & wrflags)) { 3098 umtxq_unbusy_unlocked(&uq->uq_key); 3099 error = thread_check_susp(td, true); 3100 if (error != 0) 3101 break; 3102 continue; 3103 } 3104 3105 sleep: 3106 /* 3107 * Contention bit is set, before sleeping, increase 3108 * read waiter count. 3109 */ 3110 rv = fueword32(&rwlock->rw_blocked_readers, 3111 &blocked_readers); 3112 if (rv == -1) { 3113 umtxq_unbusy_unlocked(&uq->uq_key); 3114 error = EFAULT; 3115 break; 3116 } 3117 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3118 3119 while (state & wrflags) { 3120 umtxq_lock(&uq->uq_key); 3121 umtxq_insert(uq); 3122 umtxq_unbusy(&uq->uq_key); 3123 3124 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3125 NULL : &timo); 3126 3127 umtxq_busy(&uq->uq_key); 3128 umtxq_remove(uq); 3129 umtxq_unlock(&uq->uq_key); 3130 if (error) 3131 break; 3132 rv = fueword32(&rwlock->rw_state, &state); 3133 if (rv == -1) { 3134 error = EFAULT; 3135 break; 3136 } 3137 } 3138 3139 /* decrease read waiter count, and may clear read contention bit */ 3140 rv = fueword32(&rwlock->rw_blocked_readers, 3141 &blocked_readers); 3142 if (rv == -1) { 3143 umtxq_unbusy_unlocked(&uq->uq_key); 3144 error = EFAULT; 3145 break; 3146 } 3147 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3148 if (blocked_readers == 1) { 3149 rv = fueword32(&rwlock->rw_state, &state); 3150 if (rv == -1) { 3151 umtxq_unbusy_unlocked(&uq->uq_key); 3152 error = EFAULT; 3153 break; 3154 } 3155 for (;;) { 3156 rv = casueword32(&rwlock->rw_state, state, 3157 &oldstate, state & ~URWLOCK_READ_WAITERS); 3158 if (rv == -1) { 3159 error = EFAULT; 3160 break; 3161 } 3162 if (rv == 0) { 3163 MPASS(oldstate == state); 3164 break; 3165 } 3166 state = oldstate; 3167 error1 = thread_check_susp(td, false); 3168 if (error1 != 0) { 3169 if (error == 0) 3170 error = error1; 3171 break; 3172 } 3173 } 3174 } 3175 3176 umtxq_unbusy_unlocked(&uq->uq_key); 3177 if (error != 0) 3178 break; 3179 } 3180 umtx_key_release(&uq->uq_key); 3181 if (error == ERESTART) 3182 error = EINTR; 3183 return (error); 3184 } 3185 3186 static int 3187 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3188 { 3189 struct umtx_abs_timeout timo; 3190 struct umtx_q *uq; 3191 uint32_t flags; 3192 int32_t state, oldstate; 3193 int32_t blocked_writers; 3194 int32_t blocked_readers; 3195 int error, error1, rv; 3196 3197 uq = td->td_umtxq; 3198 error = fueword32(&rwlock->rw_flags, &flags); 3199 if (error == -1) 3200 return (EFAULT); 3201 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3202 if (error != 0) 3203 return (error); 3204 3205 if (timeout != NULL) 3206 umtx_abs_timeout_init2(&timo, timeout); 3207 3208 blocked_readers = 0; 3209 for (;;) { 3210 rv = fueword32(&rwlock->rw_state, &state); 3211 if (rv == -1) { 3212 umtx_key_release(&uq->uq_key); 3213 return (EFAULT); 3214 } 3215 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3216 URWLOCK_READER_COUNT(state) == 0) { 3217 rv = casueword32(&rwlock->rw_state, state, 3218 &oldstate, state | URWLOCK_WRITE_OWNER); 3219 if (rv == -1) { 3220 umtx_key_release(&uq->uq_key); 3221 return (EFAULT); 3222 } 3223 if (rv == 0) { 3224 MPASS(oldstate == state); 3225 umtx_key_release(&uq->uq_key); 3226 return (0); 3227 } 3228 state = oldstate; 3229 error = thread_check_susp(td, true); 3230 if (error != 0) 3231 break; 3232 } 3233 3234 if (error) { 3235 if ((state & (URWLOCK_WRITE_OWNER | 3236 URWLOCK_WRITE_WAITERS)) == 0 && 3237 blocked_readers != 0) { 3238 umtxq_lock(&uq->uq_key); 3239 umtxq_busy(&uq->uq_key); 3240 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3241 UMTX_SHARED_QUEUE); 3242 umtxq_unbusy(&uq->uq_key); 3243 umtxq_unlock(&uq->uq_key); 3244 } 3245 3246 break; 3247 } 3248 3249 /* grab monitor lock */ 3250 umtxq_lock(&uq->uq_key); 3251 umtxq_busy(&uq->uq_key); 3252 umtxq_unlock(&uq->uq_key); 3253 3254 /* 3255 * Re-read the state, in case it changed between the 3256 * try-lock above and the check below. 3257 */ 3258 rv = fueword32(&rwlock->rw_state, &state); 3259 if (rv == -1) 3260 error = EFAULT; 3261 3262 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3263 URWLOCK_READER_COUNT(state) != 0) && 3264 (state & URWLOCK_WRITE_WAITERS) == 0) { 3265 rv = casueword32(&rwlock->rw_state, state, 3266 &oldstate, state | URWLOCK_WRITE_WAITERS); 3267 if (rv == -1) { 3268 error = EFAULT; 3269 break; 3270 } 3271 if (rv == 0) { 3272 MPASS(oldstate == state); 3273 goto sleep; 3274 } 3275 state = oldstate; 3276 error = thread_check_susp(td, false); 3277 if (error != 0) 3278 break; 3279 } 3280 if (error != 0) { 3281 umtxq_unbusy_unlocked(&uq->uq_key); 3282 break; 3283 } 3284 3285 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3286 URWLOCK_READER_COUNT(state) == 0) { 3287 umtxq_unbusy_unlocked(&uq->uq_key); 3288 error = thread_check_susp(td, false); 3289 if (error != 0) 3290 break; 3291 continue; 3292 } 3293 sleep: 3294 rv = fueword32(&rwlock->rw_blocked_writers, 3295 &blocked_writers); 3296 if (rv == -1) { 3297 umtxq_unbusy_unlocked(&uq->uq_key); 3298 error = EFAULT; 3299 break; 3300 } 3301 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3302 3303 while ((state & URWLOCK_WRITE_OWNER) || 3304 URWLOCK_READER_COUNT(state) != 0) { 3305 umtxq_lock(&uq->uq_key); 3306 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3307 umtxq_unbusy(&uq->uq_key); 3308 3309 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3310 NULL : &timo); 3311 3312 umtxq_busy(&uq->uq_key); 3313 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3314 umtxq_unlock(&uq->uq_key); 3315 if (error) 3316 break; 3317 rv = fueword32(&rwlock->rw_state, &state); 3318 if (rv == -1) { 3319 error = EFAULT; 3320 break; 3321 } 3322 } 3323 3324 rv = fueword32(&rwlock->rw_blocked_writers, 3325 &blocked_writers); 3326 if (rv == -1) { 3327 umtxq_unbusy_unlocked(&uq->uq_key); 3328 error = EFAULT; 3329 break; 3330 } 3331 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3332 if (blocked_writers == 1) { 3333 rv = fueword32(&rwlock->rw_state, &state); 3334 if (rv == -1) { 3335 umtxq_unbusy_unlocked(&uq->uq_key); 3336 error = EFAULT; 3337 break; 3338 } 3339 for (;;) { 3340 rv = casueword32(&rwlock->rw_state, state, 3341 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3342 if (rv == -1) { 3343 error = EFAULT; 3344 break; 3345 } 3346 if (rv == 0) { 3347 MPASS(oldstate == state); 3348 break; 3349 } 3350 state = oldstate; 3351 error1 = thread_check_susp(td, false); 3352 /* 3353 * We are leaving the URWLOCK_WRITE_WAITERS 3354 * behind, but this should not harm the 3355 * correctness. 3356 */ 3357 if (error1 != 0) { 3358 if (error == 0) 3359 error = error1; 3360 break; 3361 } 3362 } 3363 rv = fueword32(&rwlock->rw_blocked_readers, 3364 &blocked_readers); 3365 if (rv == -1) { 3366 umtxq_unbusy_unlocked(&uq->uq_key); 3367 error = EFAULT; 3368 break; 3369 } 3370 } else 3371 blocked_readers = 0; 3372 3373 umtxq_unbusy_unlocked(&uq->uq_key); 3374 } 3375 3376 umtx_key_release(&uq->uq_key); 3377 if (error == ERESTART) 3378 error = EINTR; 3379 return (error); 3380 } 3381 3382 static int 3383 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3384 { 3385 struct umtx_q *uq; 3386 uint32_t flags; 3387 int32_t state, oldstate; 3388 int error, rv, q, count; 3389 3390 uq = td->td_umtxq; 3391 error = fueword32(&rwlock->rw_flags, &flags); 3392 if (error == -1) 3393 return (EFAULT); 3394 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3395 if (error != 0) 3396 return (error); 3397 3398 error = fueword32(&rwlock->rw_state, &state); 3399 if (error == -1) { 3400 error = EFAULT; 3401 goto out; 3402 } 3403 if (state & URWLOCK_WRITE_OWNER) { 3404 for (;;) { 3405 rv = casueword32(&rwlock->rw_state, state, 3406 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3407 if (rv == -1) { 3408 error = EFAULT; 3409 goto out; 3410 } 3411 if (rv == 1) { 3412 state = oldstate; 3413 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3414 error = EPERM; 3415 goto out; 3416 } 3417 error = thread_check_susp(td, true); 3418 if (error != 0) 3419 goto out; 3420 } else 3421 break; 3422 } 3423 } else if (URWLOCK_READER_COUNT(state) != 0) { 3424 for (;;) { 3425 rv = casueword32(&rwlock->rw_state, state, 3426 &oldstate, state - 1); 3427 if (rv == -1) { 3428 error = EFAULT; 3429 goto out; 3430 } 3431 if (rv == 1) { 3432 state = oldstate; 3433 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3434 error = EPERM; 3435 goto out; 3436 } 3437 error = thread_check_susp(td, true); 3438 if (error != 0) 3439 goto out; 3440 } else 3441 break; 3442 } 3443 } else { 3444 error = EPERM; 3445 goto out; 3446 } 3447 3448 count = 0; 3449 3450 if (!(flags & URWLOCK_PREFER_READER)) { 3451 if (state & URWLOCK_WRITE_WAITERS) { 3452 count = 1; 3453 q = UMTX_EXCLUSIVE_QUEUE; 3454 } else if (state & URWLOCK_READ_WAITERS) { 3455 count = INT_MAX; 3456 q = UMTX_SHARED_QUEUE; 3457 } 3458 } else { 3459 if (state & URWLOCK_READ_WAITERS) { 3460 count = INT_MAX; 3461 q = UMTX_SHARED_QUEUE; 3462 } else if (state & URWLOCK_WRITE_WAITERS) { 3463 count = 1; 3464 q = UMTX_EXCLUSIVE_QUEUE; 3465 } 3466 } 3467 3468 if (count) { 3469 umtxq_lock(&uq->uq_key); 3470 umtxq_busy(&uq->uq_key); 3471 umtxq_signal_queue(&uq->uq_key, count, q); 3472 umtxq_unbusy(&uq->uq_key); 3473 umtxq_unlock(&uq->uq_key); 3474 } 3475 out: 3476 umtx_key_release(&uq->uq_key); 3477 return (error); 3478 } 3479 3480 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3481 static int 3482 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3483 { 3484 struct umtx_abs_timeout timo; 3485 struct umtx_q *uq; 3486 uint32_t flags, count, count1; 3487 int error, rv, rv1; 3488 3489 uq = td->td_umtxq; 3490 error = fueword32(&sem->_flags, &flags); 3491 if (error == -1) 3492 return (EFAULT); 3493 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3494 if (error != 0) 3495 return (error); 3496 3497 if (timeout != NULL) 3498 umtx_abs_timeout_init2(&timo, timeout); 3499 3500 again: 3501 umtxq_lock(&uq->uq_key); 3502 umtxq_busy(&uq->uq_key); 3503 umtxq_insert(uq); 3504 umtxq_unlock(&uq->uq_key); 3505 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3506 if (rv == 0) 3507 rv1 = fueword32(&sem->_count, &count); 3508 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3509 (rv == 1 && count1 == 0)) { 3510 umtxq_lock(&uq->uq_key); 3511 umtxq_unbusy(&uq->uq_key); 3512 umtxq_remove(uq); 3513 umtxq_unlock(&uq->uq_key); 3514 if (rv == 1) { 3515 rv = thread_check_susp(td, true); 3516 if (rv == 0) 3517 goto again; 3518 error = rv; 3519 goto out; 3520 } 3521 if (rv == 0) 3522 rv = rv1; 3523 error = rv == -1 ? EFAULT : 0; 3524 goto out; 3525 } 3526 umtxq_lock(&uq->uq_key); 3527 umtxq_unbusy(&uq->uq_key); 3528 3529 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3530 3531 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3532 error = 0; 3533 else { 3534 umtxq_remove(uq); 3535 /* A relative timeout cannot be restarted. */ 3536 if (error == ERESTART && timeout != NULL && 3537 (timeout->_flags & UMTX_ABSTIME) == 0) 3538 error = EINTR; 3539 } 3540 umtxq_unlock(&uq->uq_key); 3541 out: 3542 umtx_key_release(&uq->uq_key); 3543 return (error); 3544 } 3545 3546 /* 3547 * Signal a userland semaphore. 3548 */ 3549 static int 3550 do_sem_wake(struct thread *td, struct _usem *sem) 3551 { 3552 struct umtx_key key; 3553 int error, cnt; 3554 uint32_t flags; 3555 3556 error = fueword32(&sem->_flags, &flags); 3557 if (error == -1) 3558 return (EFAULT); 3559 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3560 return (error); 3561 umtxq_lock(&key); 3562 umtxq_busy(&key); 3563 cnt = umtxq_count(&key); 3564 if (cnt > 0) { 3565 /* 3566 * Check if count is greater than 0, this means the memory is 3567 * still being referenced by user code, so we can safely 3568 * update _has_waiters flag. 3569 */ 3570 if (cnt == 1) { 3571 umtxq_unlock(&key); 3572 error = suword32(&sem->_has_waiters, 0); 3573 umtxq_lock(&key); 3574 if (error == -1) 3575 error = EFAULT; 3576 } 3577 umtxq_signal(&key, 1); 3578 } 3579 umtxq_unbusy(&key); 3580 umtxq_unlock(&key); 3581 umtx_key_release(&key); 3582 return (error); 3583 } 3584 #endif 3585 3586 static int 3587 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3588 { 3589 struct umtx_abs_timeout timo; 3590 struct umtx_q *uq; 3591 uint32_t count, flags; 3592 int error, rv; 3593 3594 uq = td->td_umtxq; 3595 flags = fuword32(&sem->_flags); 3596 if (timeout != NULL) 3597 umtx_abs_timeout_init2(&timo, timeout); 3598 3599 again: 3600 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3601 if (error != 0) 3602 return (error); 3603 umtxq_lock(&uq->uq_key); 3604 umtxq_busy(&uq->uq_key); 3605 umtxq_insert(uq); 3606 umtxq_unlock(&uq->uq_key); 3607 rv = fueword32(&sem->_count, &count); 3608 if (rv == -1) { 3609 umtxq_lock(&uq->uq_key); 3610 umtxq_unbusy(&uq->uq_key); 3611 umtxq_remove(uq); 3612 umtxq_unlock(&uq->uq_key); 3613 umtx_key_release(&uq->uq_key); 3614 return (EFAULT); 3615 } 3616 for (;;) { 3617 if (USEM_COUNT(count) != 0) { 3618 umtxq_lock(&uq->uq_key); 3619 umtxq_unbusy(&uq->uq_key); 3620 umtxq_remove(uq); 3621 umtxq_unlock(&uq->uq_key); 3622 umtx_key_release(&uq->uq_key); 3623 return (0); 3624 } 3625 if (count == USEM_HAS_WAITERS) 3626 break; 3627 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3628 if (rv == 0) 3629 break; 3630 umtxq_lock(&uq->uq_key); 3631 umtxq_unbusy(&uq->uq_key); 3632 umtxq_remove(uq); 3633 umtxq_unlock(&uq->uq_key); 3634 umtx_key_release(&uq->uq_key); 3635 if (rv == -1) 3636 return (EFAULT); 3637 rv = thread_check_susp(td, true); 3638 if (rv != 0) 3639 return (rv); 3640 goto again; 3641 } 3642 umtxq_lock(&uq->uq_key); 3643 umtxq_unbusy(&uq->uq_key); 3644 3645 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3646 3647 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3648 error = 0; 3649 else { 3650 umtxq_remove(uq); 3651 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3652 /* A relative timeout cannot be restarted. */ 3653 if (error == ERESTART) 3654 error = EINTR; 3655 if (error == EINTR) { 3656 umtx_abs_timeout_update(&timo); 3657 timespecsub(&timo.end, &timo.cur, 3658 &timeout->_timeout); 3659 } 3660 } 3661 } 3662 umtxq_unlock(&uq->uq_key); 3663 umtx_key_release(&uq->uq_key); 3664 return (error); 3665 } 3666 3667 /* 3668 * Signal a userland semaphore. 3669 */ 3670 static int 3671 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3672 { 3673 struct umtx_key key; 3674 int error, cnt, rv; 3675 uint32_t count, flags; 3676 3677 rv = fueword32(&sem->_flags, &flags); 3678 if (rv == -1) 3679 return (EFAULT); 3680 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3681 return (error); 3682 umtxq_lock(&key); 3683 umtxq_busy(&key); 3684 cnt = umtxq_count(&key); 3685 if (cnt > 0) { 3686 /* 3687 * If this was the last sleeping thread, clear the waiters 3688 * flag in _count. 3689 */ 3690 if (cnt == 1) { 3691 umtxq_unlock(&key); 3692 rv = fueword32(&sem->_count, &count); 3693 while (rv != -1 && count & USEM_HAS_WAITERS) { 3694 rv = casueword32(&sem->_count, count, &count, 3695 count & ~USEM_HAS_WAITERS); 3696 if (rv == 1) { 3697 rv = thread_check_susp(td, true); 3698 if (rv != 0) 3699 break; 3700 } 3701 } 3702 if (rv == -1) 3703 error = EFAULT; 3704 else if (rv > 0) { 3705 error = rv; 3706 } 3707 umtxq_lock(&key); 3708 } 3709 3710 umtxq_signal(&key, 1); 3711 } 3712 umtxq_unbusy(&key); 3713 umtxq_unlock(&key); 3714 umtx_key_release(&key); 3715 return (error); 3716 } 3717 3718 #ifdef COMPAT_FREEBSD10 3719 int 3720 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3721 { 3722 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3723 } 3724 3725 int 3726 freebsd10__umtx_unlock(struct thread *td, 3727 struct freebsd10__umtx_unlock_args *uap) 3728 { 3729 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3730 } 3731 #endif 3732 3733 inline int 3734 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3735 { 3736 int error; 3737 3738 error = copyin(uaddr, tsp, sizeof(*tsp)); 3739 if (error == 0) { 3740 if (tsp->tv_sec < 0 || 3741 tsp->tv_nsec >= 1000000000 || 3742 tsp->tv_nsec < 0) 3743 error = EINVAL; 3744 } 3745 return (error); 3746 } 3747 3748 static inline int 3749 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3750 { 3751 int error; 3752 3753 if (size <= sizeof(tp->_timeout)) { 3754 tp->_clockid = CLOCK_REALTIME; 3755 tp->_flags = 0; 3756 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3757 } else 3758 error = copyin(uaddr, tp, sizeof(*tp)); 3759 if (error != 0) 3760 return (error); 3761 if (tp->_timeout.tv_sec < 0 || 3762 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3763 return (EINVAL); 3764 return (0); 3765 } 3766 3767 static int 3768 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3769 struct umtx_robust_lists_params *rb) 3770 { 3771 3772 if (size > sizeof(*rb)) 3773 return (EINVAL); 3774 return (copyin(uaddr, rb, size)); 3775 } 3776 3777 static int 3778 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3779 { 3780 3781 /* 3782 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3783 * and we're only called if sz >= sizeof(timespec) as supplied in the 3784 * copyops. 3785 */ 3786 KASSERT(sz >= sizeof(*tsp), 3787 ("umtx_copyops specifies incorrect sizes")); 3788 3789 return (copyout(tsp, uaddr, sizeof(*tsp))); 3790 } 3791 3792 #ifdef COMPAT_FREEBSD10 3793 static int 3794 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3795 const struct umtx_copyops *ops) 3796 { 3797 struct timespec *ts, timeout; 3798 int error; 3799 3800 /* Allow a null timespec (wait forever). */ 3801 if (uap->uaddr2 == NULL) 3802 ts = NULL; 3803 else { 3804 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3805 if (error != 0) 3806 return (error); 3807 ts = &timeout; 3808 } 3809 #ifdef COMPAT_FREEBSD32 3810 if (ops->compat32) 3811 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3812 #endif 3813 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3814 } 3815 3816 static int 3817 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3818 const struct umtx_copyops *ops) 3819 { 3820 #ifdef COMPAT_FREEBSD32 3821 if (ops->compat32) 3822 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3823 #endif 3824 return (do_unlock_umtx(td, uap->obj, uap->val)); 3825 } 3826 #endif /* COMPAT_FREEBSD10 */ 3827 3828 #if !defined(COMPAT_FREEBSD10) 3829 static int 3830 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3831 const struct umtx_copyops *ops __unused) 3832 { 3833 return (EOPNOTSUPP); 3834 } 3835 #endif /* COMPAT_FREEBSD10 */ 3836 3837 static int 3838 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3839 const struct umtx_copyops *ops) 3840 { 3841 struct _umtx_time timeout, *tm_p; 3842 int error; 3843 3844 if (uap->uaddr2 == NULL) 3845 tm_p = NULL; 3846 else { 3847 error = ops->copyin_umtx_time( 3848 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3849 if (error != 0) 3850 return (error); 3851 tm_p = &timeout; 3852 } 3853 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3854 } 3855 3856 static int 3857 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3858 const struct umtx_copyops *ops) 3859 { 3860 struct _umtx_time timeout, *tm_p; 3861 int error; 3862 3863 if (uap->uaddr2 == NULL) 3864 tm_p = NULL; 3865 else { 3866 error = ops->copyin_umtx_time( 3867 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3868 if (error != 0) 3869 return (error); 3870 tm_p = &timeout; 3871 } 3872 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3873 } 3874 3875 static int 3876 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3877 const struct umtx_copyops *ops) 3878 { 3879 struct _umtx_time *tm_p, timeout; 3880 int error; 3881 3882 if (uap->uaddr2 == NULL) 3883 tm_p = NULL; 3884 else { 3885 error = ops->copyin_umtx_time( 3886 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3887 if (error != 0) 3888 return (error); 3889 tm_p = &timeout; 3890 } 3891 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3892 } 3893 3894 static int 3895 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3896 const struct umtx_copyops *ops __unused) 3897 { 3898 3899 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3900 } 3901 3902 #define BATCH_SIZE 128 3903 static int 3904 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3905 { 3906 char *uaddrs[BATCH_SIZE], **upp; 3907 int count, error, i, pos, tocopy; 3908 3909 upp = (char **)uap->obj; 3910 error = 0; 3911 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3912 pos += tocopy) { 3913 tocopy = MIN(count, BATCH_SIZE); 3914 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3915 if (error != 0) 3916 break; 3917 for (i = 0; i < tocopy; ++i) { 3918 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3919 } 3920 maybe_yield(); 3921 } 3922 return (error); 3923 } 3924 3925 static int 3926 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3927 { 3928 uint32_t uaddrs[BATCH_SIZE], *upp; 3929 int count, error, i, pos, tocopy; 3930 3931 upp = (uint32_t *)uap->obj; 3932 error = 0; 3933 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3934 pos += tocopy) { 3935 tocopy = MIN(count, BATCH_SIZE); 3936 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3937 if (error != 0) 3938 break; 3939 for (i = 0; i < tocopy; ++i) { 3940 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3941 INT_MAX, 1); 3942 } 3943 maybe_yield(); 3944 } 3945 return (error); 3946 } 3947 3948 static int 3949 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3950 const struct umtx_copyops *ops) 3951 { 3952 3953 if (ops->compat32) 3954 return (__umtx_op_nwake_private_compat32(td, uap)); 3955 return (__umtx_op_nwake_private_native(td, uap)); 3956 } 3957 3958 static int 3959 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3960 const struct umtx_copyops *ops __unused) 3961 { 3962 3963 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3964 } 3965 3966 static int 3967 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3968 const struct umtx_copyops *ops) 3969 { 3970 struct _umtx_time *tm_p, timeout; 3971 int error; 3972 3973 /* Allow a null timespec (wait forever). */ 3974 if (uap->uaddr2 == NULL) 3975 tm_p = NULL; 3976 else { 3977 error = ops->copyin_umtx_time( 3978 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3979 if (error != 0) 3980 return (error); 3981 tm_p = &timeout; 3982 } 3983 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3984 } 3985 3986 static int 3987 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3988 const struct umtx_copyops *ops __unused) 3989 { 3990 3991 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3992 } 3993 3994 static int 3995 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3996 const struct umtx_copyops *ops) 3997 { 3998 struct _umtx_time *tm_p, timeout; 3999 int error; 4000 4001 /* Allow a null timespec (wait forever). */ 4002 if (uap->uaddr2 == NULL) 4003 tm_p = NULL; 4004 else { 4005 error = ops->copyin_umtx_time( 4006 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4007 if (error != 0) 4008 return (error); 4009 tm_p = &timeout; 4010 } 4011 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4012 } 4013 4014 static int 4015 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4016 const struct umtx_copyops *ops __unused) 4017 { 4018 4019 return (do_wake_umutex(td, uap->obj)); 4020 } 4021 4022 static int 4023 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4024 const struct umtx_copyops *ops __unused) 4025 { 4026 4027 return (do_unlock_umutex(td, uap->obj, false)); 4028 } 4029 4030 static int 4031 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4032 const struct umtx_copyops *ops __unused) 4033 { 4034 4035 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4036 } 4037 4038 static int 4039 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4040 const struct umtx_copyops *ops) 4041 { 4042 struct timespec *ts, timeout; 4043 int error; 4044 4045 /* Allow a null timespec (wait forever). */ 4046 if (uap->uaddr2 == NULL) 4047 ts = NULL; 4048 else { 4049 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4050 if (error != 0) 4051 return (error); 4052 ts = &timeout; 4053 } 4054 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4055 } 4056 4057 static int 4058 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4059 const struct umtx_copyops *ops __unused) 4060 { 4061 4062 return (do_cv_signal(td, uap->obj)); 4063 } 4064 4065 static int 4066 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4067 const struct umtx_copyops *ops __unused) 4068 { 4069 4070 return (do_cv_broadcast(td, uap->obj)); 4071 } 4072 4073 static int 4074 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4075 const struct umtx_copyops *ops) 4076 { 4077 struct _umtx_time timeout; 4078 int error; 4079 4080 /* Allow a null timespec (wait forever). */ 4081 if (uap->uaddr2 == NULL) { 4082 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4083 } else { 4084 error = ops->copyin_umtx_time(uap->uaddr2, 4085 (size_t)uap->uaddr1, &timeout); 4086 if (error != 0) 4087 return (error); 4088 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4089 } 4090 return (error); 4091 } 4092 4093 static int 4094 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4095 const struct umtx_copyops *ops) 4096 { 4097 struct _umtx_time timeout; 4098 int error; 4099 4100 /* Allow a null timespec (wait forever). */ 4101 if (uap->uaddr2 == NULL) { 4102 error = do_rw_wrlock(td, uap->obj, 0); 4103 } else { 4104 error = ops->copyin_umtx_time(uap->uaddr2, 4105 (size_t)uap->uaddr1, &timeout); 4106 if (error != 0) 4107 return (error); 4108 4109 error = do_rw_wrlock(td, uap->obj, &timeout); 4110 } 4111 return (error); 4112 } 4113 4114 static int 4115 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4116 const struct umtx_copyops *ops __unused) 4117 { 4118 4119 return (do_rw_unlock(td, uap->obj)); 4120 } 4121 4122 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4123 static int 4124 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4125 const struct umtx_copyops *ops) 4126 { 4127 struct _umtx_time *tm_p, timeout; 4128 int error; 4129 4130 /* Allow a null timespec (wait forever). */ 4131 if (uap->uaddr2 == NULL) 4132 tm_p = NULL; 4133 else { 4134 error = ops->copyin_umtx_time( 4135 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4136 if (error != 0) 4137 return (error); 4138 tm_p = &timeout; 4139 } 4140 return (do_sem_wait(td, uap->obj, tm_p)); 4141 } 4142 4143 static int 4144 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4145 const struct umtx_copyops *ops __unused) 4146 { 4147 4148 return (do_sem_wake(td, uap->obj)); 4149 } 4150 #endif 4151 4152 static int 4153 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4154 const struct umtx_copyops *ops __unused) 4155 { 4156 4157 return (do_wake2_umutex(td, uap->obj, uap->val)); 4158 } 4159 4160 static int 4161 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4162 const struct umtx_copyops *ops) 4163 { 4164 struct _umtx_time *tm_p, timeout; 4165 size_t uasize; 4166 int error; 4167 4168 /* Allow a null timespec (wait forever). */ 4169 if (uap->uaddr2 == NULL) { 4170 uasize = 0; 4171 tm_p = NULL; 4172 } else { 4173 uasize = (size_t)uap->uaddr1; 4174 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4175 if (error != 0) 4176 return (error); 4177 tm_p = &timeout; 4178 } 4179 error = do_sem2_wait(td, uap->obj, tm_p); 4180 if (error == EINTR && uap->uaddr2 != NULL && 4181 (timeout._flags & UMTX_ABSTIME) == 0 && 4182 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4183 error = ops->copyout_timeout( 4184 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4185 uasize - ops->umtx_time_sz, &timeout._timeout); 4186 if (error == 0) { 4187 error = EINTR; 4188 } 4189 } 4190 4191 return (error); 4192 } 4193 4194 static int 4195 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4196 const struct umtx_copyops *ops __unused) 4197 { 4198 4199 return (do_sem2_wake(td, uap->obj)); 4200 } 4201 4202 #define USHM_OBJ_UMTX(o) \ 4203 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4204 4205 #define USHMF_REG_LINKED 0x0001 4206 #define USHMF_OBJ_LINKED 0x0002 4207 struct umtx_shm_reg { 4208 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4209 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4210 struct umtx_key ushm_key; 4211 struct ucred *ushm_cred; 4212 struct shmfd *ushm_obj; 4213 u_int ushm_refcnt; 4214 u_int ushm_flags; 4215 }; 4216 4217 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4218 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4219 4220 static uma_zone_t umtx_shm_reg_zone; 4221 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4222 static struct mtx umtx_shm_lock; 4223 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4224 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4225 4226 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4227 4228 static void 4229 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4230 { 4231 struct umtx_shm_reg_head d; 4232 struct umtx_shm_reg *reg, *reg1; 4233 4234 TAILQ_INIT(&d); 4235 mtx_lock(&umtx_shm_lock); 4236 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4237 mtx_unlock(&umtx_shm_lock); 4238 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4239 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4240 umtx_shm_free_reg(reg); 4241 } 4242 } 4243 4244 static struct task umtx_shm_reg_delfree_task = 4245 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4246 4247 static struct umtx_shm_reg * 4248 umtx_shm_find_reg_locked(const struct umtx_key *key) 4249 { 4250 struct umtx_shm_reg *reg; 4251 struct umtx_shm_reg_head *reg_head; 4252 4253 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4254 mtx_assert(&umtx_shm_lock, MA_OWNED); 4255 reg_head = &umtx_shm_registry[key->hash]; 4256 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4257 KASSERT(reg->ushm_key.shared, 4258 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4259 if (reg->ushm_key.info.shared.object == 4260 key->info.shared.object && 4261 reg->ushm_key.info.shared.offset == 4262 key->info.shared.offset) { 4263 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4264 KASSERT(reg->ushm_refcnt > 0, 4265 ("reg %p refcnt 0 onlist", reg)); 4266 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4267 ("reg %p not linked", reg)); 4268 reg->ushm_refcnt++; 4269 return (reg); 4270 } 4271 } 4272 return (NULL); 4273 } 4274 4275 static struct umtx_shm_reg * 4276 umtx_shm_find_reg(const struct umtx_key *key) 4277 { 4278 struct umtx_shm_reg *reg; 4279 4280 mtx_lock(&umtx_shm_lock); 4281 reg = umtx_shm_find_reg_locked(key); 4282 mtx_unlock(&umtx_shm_lock); 4283 return (reg); 4284 } 4285 4286 static void 4287 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4288 { 4289 4290 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4291 crfree(reg->ushm_cred); 4292 shm_drop(reg->ushm_obj); 4293 uma_zfree(umtx_shm_reg_zone, reg); 4294 } 4295 4296 static bool 4297 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4298 { 4299 bool res; 4300 4301 mtx_assert(&umtx_shm_lock, MA_OWNED); 4302 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4303 reg->ushm_refcnt--; 4304 res = reg->ushm_refcnt == 0; 4305 if (res || force) { 4306 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4307 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4308 reg, ushm_reg_link); 4309 reg->ushm_flags &= ~USHMF_REG_LINKED; 4310 } 4311 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4312 LIST_REMOVE(reg, ushm_obj_link); 4313 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4314 } 4315 } 4316 return (res); 4317 } 4318 4319 static void 4320 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4321 { 4322 vm_object_t object; 4323 bool dofree; 4324 4325 if (force) { 4326 object = reg->ushm_obj->shm_object; 4327 VM_OBJECT_WLOCK(object); 4328 object->flags |= OBJ_UMTXDEAD; 4329 VM_OBJECT_WUNLOCK(object); 4330 } 4331 mtx_lock(&umtx_shm_lock); 4332 dofree = umtx_shm_unref_reg_locked(reg, force); 4333 mtx_unlock(&umtx_shm_lock); 4334 if (dofree) 4335 umtx_shm_free_reg(reg); 4336 } 4337 4338 void 4339 umtx_shm_object_init(vm_object_t object) 4340 { 4341 4342 LIST_INIT(USHM_OBJ_UMTX(object)); 4343 } 4344 4345 void 4346 umtx_shm_object_terminated(vm_object_t object) 4347 { 4348 struct umtx_shm_reg *reg, *reg1; 4349 bool dofree; 4350 4351 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4352 return; 4353 4354 dofree = false; 4355 mtx_lock(&umtx_shm_lock); 4356 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4357 if (umtx_shm_unref_reg_locked(reg, true)) { 4358 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4359 ushm_reg_link); 4360 dofree = true; 4361 } 4362 } 4363 mtx_unlock(&umtx_shm_lock); 4364 if (dofree) 4365 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4366 } 4367 4368 static int 4369 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4370 struct umtx_shm_reg **res) 4371 { 4372 struct umtx_shm_reg *reg, *reg1; 4373 struct ucred *cred; 4374 int error; 4375 4376 reg = umtx_shm_find_reg(key); 4377 if (reg != NULL) { 4378 *res = reg; 4379 return (0); 4380 } 4381 cred = td->td_ucred; 4382 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4383 return (ENOMEM); 4384 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4385 reg->ushm_refcnt = 1; 4386 bcopy(key, ®->ushm_key, sizeof(*key)); 4387 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4388 reg->ushm_cred = crhold(cred); 4389 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4390 if (error != 0) { 4391 umtx_shm_free_reg(reg); 4392 return (error); 4393 } 4394 mtx_lock(&umtx_shm_lock); 4395 reg1 = umtx_shm_find_reg_locked(key); 4396 if (reg1 != NULL) { 4397 mtx_unlock(&umtx_shm_lock); 4398 umtx_shm_free_reg(reg); 4399 *res = reg1; 4400 return (0); 4401 } 4402 reg->ushm_refcnt++; 4403 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4404 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4405 ushm_obj_link); 4406 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4407 mtx_unlock(&umtx_shm_lock); 4408 *res = reg; 4409 return (0); 4410 } 4411 4412 static int 4413 umtx_shm_alive(struct thread *td, void *addr) 4414 { 4415 vm_map_t map; 4416 vm_map_entry_t entry; 4417 vm_object_t object; 4418 vm_pindex_t pindex; 4419 vm_prot_t prot; 4420 int res, ret; 4421 boolean_t wired; 4422 4423 map = &td->td_proc->p_vmspace->vm_map; 4424 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4425 &object, &pindex, &prot, &wired); 4426 if (res != KERN_SUCCESS) 4427 return (EFAULT); 4428 if (object == NULL) 4429 ret = EINVAL; 4430 else 4431 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4432 vm_map_lookup_done(map, entry); 4433 return (ret); 4434 } 4435 4436 static void 4437 umtx_shm_init(void) 4438 { 4439 int i; 4440 4441 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4442 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4443 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4444 for (i = 0; i < nitems(umtx_shm_registry); i++) 4445 TAILQ_INIT(&umtx_shm_registry[i]); 4446 } 4447 4448 static int 4449 umtx_shm(struct thread *td, void *addr, u_int flags) 4450 { 4451 struct umtx_key key; 4452 struct umtx_shm_reg *reg; 4453 struct file *fp; 4454 int error, fd; 4455 4456 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4457 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4458 return (EINVAL); 4459 if ((flags & UMTX_SHM_ALIVE) != 0) 4460 return (umtx_shm_alive(td, addr)); 4461 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4462 if (error != 0) 4463 return (error); 4464 KASSERT(key.shared == 1, ("non-shared key")); 4465 if ((flags & UMTX_SHM_CREAT) != 0) { 4466 error = umtx_shm_create_reg(td, &key, ®); 4467 } else { 4468 reg = umtx_shm_find_reg(&key); 4469 if (reg == NULL) 4470 error = ESRCH; 4471 } 4472 umtx_key_release(&key); 4473 if (error != 0) 4474 return (error); 4475 KASSERT(reg != NULL, ("no reg")); 4476 if ((flags & UMTX_SHM_DESTROY) != 0) { 4477 umtx_shm_unref_reg(reg, true); 4478 } else { 4479 #if 0 4480 #ifdef MAC 4481 error = mac_posixshm_check_open(td->td_ucred, 4482 reg->ushm_obj, FFLAGS(O_RDWR)); 4483 if (error == 0) 4484 #endif 4485 error = shm_access(reg->ushm_obj, td->td_ucred, 4486 FFLAGS(O_RDWR)); 4487 if (error == 0) 4488 #endif 4489 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4490 if (error == 0) { 4491 shm_hold(reg->ushm_obj); 4492 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4493 &shm_ops); 4494 td->td_retval[0] = fd; 4495 fdrop(fp, td); 4496 } 4497 } 4498 umtx_shm_unref_reg(reg, false); 4499 return (error); 4500 } 4501 4502 static int 4503 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4504 const struct umtx_copyops *ops __unused) 4505 { 4506 4507 return (umtx_shm(td, uap->uaddr1, uap->val)); 4508 } 4509 4510 static int 4511 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4512 const struct umtx_copyops *ops) 4513 { 4514 struct umtx_robust_lists_params rb; 4515 int error; 4516 4517 if (ops->compat32) { 4518 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4519 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4520 td->td_rb_inact != 0)) 4521 return (EBUSY); 4522 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4523 return (EBUSY); 4524 } 4525 4526 bzero(&rb, sizeof(rb)); 4527 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4528 if (error != 0) 4529 return (error); 4530 4531 if (ops->compat32) 4532 td->td_pflags2 |= TDP2_COMPAT32RB; 4533 4534 td->td_rb_list = rb.robust_list_offset; 4535 td->td_rbp_list = rb.robust_priv_list_offset; 4536 td->td_rb_inact = rb.robust_inact_offset; 4537 return (0); 4538 } 4539 4540 #if defined(__i386__) || defined(__amd64__) 4541 /* 4542 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4543 * 32-bit time_t there. Other architectures just need the i386 definitions 4544 * along with their standard compat32. 4545 */ 4546 struct timespecx32 { 4547 int64_t tv_sec; 4548 int32_t tv_nsec; 4549 }; 4550 4551 struct umtx_timex32 { 4552 struct timespecx32 _timeout; 4553 uint32_t _flags; 4554 uint32_t _clockid; 4555 }; 4556 4557 #ifndef __i386__ 4558 #define timespeci386 timespec32 4559 #define umtx_timei386 umtx_time32 4560 #endif 4561 #else /* !__i386__ && !__amd64__ */ 4562 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4563 struct timespeci386 { 4564 int32_t tv_sec; 4565 int32_t tv_nsec; 4566 }; 4567 4568 struct umtx_timei386 { 4569 struct timespeci386 _timeout; 4570 uint32_t _flags; 4571 uint32_t _clockid; 4572 }; 4573 4574 #if defined(__LP64__) 4575 #define timespecx32 timespec32 4576 #define umtx_timex32 umtx_time32 4577 #endif 4578 #endif 4579 4580 static int 4581 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4582 struct umtx_robust_lists_params *rbp) 4583 { 4584 struct umtx_robust_lists_params_compat32 rb32; 4585 int error; 4586 4587 if (size > sizeof(rb32)) 4588 return (EINVAL); 4589 bzero(&rb32, sizeof(rb32)); 4590 error = copyin(uaddr, &rb32, size); 4591 if (error != 0) 4592 return (error); 4593 CP(rb32, *rbp, robust_list_offset); 4594 CP(rb32, *rbp, robust_priv_list_offset); 4595 CP(rb32, *rbp, robust_inact_offset); 4596 return (0); 4597 } 4598 4599 #ifndef __i386__ 4600 static inline int 4601 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4602 { 4603 struct timespeci386 ts32; 4604 int error; 4605 4606 error = copyin(uaddr, &ts32, sizeof(ts32)); 4607 if (error == 0) { 4608 if (ts32.tv_sec < 0 || 4609 ts32.tv_nsec >= 1000000000 || 4610 ts32.tv_nsec < 0) 4611 error = EINVAL; 4612 else { 4613 CP(ts32, *tsp, tv_sec); 4614 CP(ts32, *tsp, tv_nsec); 4615 } 4616 } 4617 return (error); 4618 } 4619 4620 static inline int 4621 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4622 { 4623 struct umtx_timei386 t32; 4624 int error; 4625 4626 t32._clockid = CLOCK_REALTIME; 4627 t32._flags = 0; 4628 if (size <= sizeof(t32._timeout)) 4629 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4630 else 4631 error = copyin(uaddr, &t32, sizeof(t32)); 4632 if (error != 0) 4633 return (error); 4634 if (t32._timeout.tv_sec < 0 || 4635 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4636 return (EINVAL); 4637 TS_CP(t32, *tp, _timeout); 4638 CP(t32, *tp, _flags); 4639 CP(t32, *tp, _clockid); 4640 return (0); 4641 } 4642 4643 static int 4644 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4645 { 4646 struct timespeci386 remain32 = { 4647 .tv_sec = tsp->tv_sec, 4648 .tv_nsec = tsp->tv_nsec, 4649 }; 4650 4651 /* 4652 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4653 * and we're only called if sz >= sizeof(timespec) as supplied in the 4654 * copyops. 4655 */ 4656 KASSERT(sz >= sizeof(remain32), 4657 ("umtx_copyops specifies incorrect sizes")); 4658 4659 return (copyout(&remain32, uaddr, sizeof(remain32))); 4660 } 4661 #endif /* !__i386__ */ 4662 4663 #if defined(__i386__) || defined(__LP64__) 4664 static inline int 4665 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4666 { 4667 struct timespecx32 ts32; 4668 int error; 4669 4670 error = copyin(uaddr, &ts32, sizeof(ts32)); 4671 if (error == 0) { 4672 if (ts32.tv_sec < 0 || 4673 ts32.tv_nsec >= 1000000000 || 4674 ts32.tv_nsec < 0) 4675 error = EINVAL; 4676 else { 4677 CP(ts32, *tsp, tv_sec); 4678 CP(ts32, *tsp, tv_nsec); 4679 } 4680 } 4681 return (error); 4682 } 4683 4684 static inline int 4685 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4686 { 4687 struct umtx_timex32 t32; 4688 int error; 4689 4690 t32._clockid = CLOCK_REALTIME; 4691 t32._flags = 0; 4692 if (size <= sizeof(t32._timeout)) 4693 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4694 else 4695 error = copyin(uaddr, &t32, sizeof(t32)); 4696 if (error != 0) 4697 return (error); 4698 if (t32._timeout.tv_sec < 0 || 4699 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4700 return (EINVAL); 4701 TS_CP(t32, *tp, _timeout); 4702 CP(t32, *tp, _flags); 4703 CP(t32, *tp, _clockid); 4704 return (0); 4705 } 4706 4707 static int 4708 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4709 { 4710 struct timespecx32 remain32 = { 4711 .tv_sec = tsp->tv_sec, 4712 .tv_nsec = tsp->tv_nsec, 4713 }; 4714 4715 /* 4716 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4717 * and we're only called if sz >= sizeof(timespec) as supplied in the 4718 * copyops. 4719 */ 4720 KASSERT(sz >= sizeof(remain32), 4721 ("umtx_copyops specifies incorrect sizes")); 4722 4723 return (copyout(&remain32, uaddr, sizeof(remain32))); 4724 } 4725 #endif /* __i386__ || __LP64__ */ 4726 4727 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4728 const struct umtx_copyops *umtx_ops); 4729 4730 static const _umtx_op_func op_table[] = { 4731 #ifdef COMPAT_FREEBSD10 4732 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4733 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4734 #else 4735 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4736 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4737 #endif 4738 [UMTX_OP_WAIT] = __umtx_op_wait, 4739 [UMTX_OP_WAKE] = __umtx_op_wake, 4740 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4741 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4742 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4743 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4744 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4745 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4746 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4747 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4748 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4749 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4750 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4751 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4752 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4753 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4754 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4755 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4756 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4757 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4758 #else 4759 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4760 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4761 #endif 4762 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4763 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4764 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4765 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4766 [UMTX_OP_SHM] = __umtx_op_shm, 4767 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4768 }; 4769 4770 static const struct umtx_copyops umtx_native_ops = { 4771 .copyin_timeout = umtx_copyin_timeout, 4772 .copyin_umtx_time = umtx_copyin_umtx_time, 4773 .copyin_robust_lists = umtx_copyin_robust_lists, 4774 .copyout_timeout = umtx_copyout_timeout, 4775 .timespec_sz = sizeof(struct timespec), 4776 .umtx_time_sz = sizeof(struct _umtx_time), 4777 }; 4778 4779 #ifndef __i386__ 4780 static const struct umtx_copyops umtx_native_opsi386 = { 4781 .copyin_timeout = umtx_copyin_timeouti386, 4782 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4783 .copyin_robust_lists = umtx_copyin_robust_lists32, 4784 .copyout_timeout = umtx_copyout_timeouti386, 4785 .timespec_sz = sizeof(struct timespeci386), 4786 .umtx_time_sz = sizeof(struct umtx_timei386), 4787 .compat32 = true, 4788 }; 4789 #endif 4790 4791 #if defined(__i386__) || defined(__LP64__) 4792 /* i386 can emulate other 32-bit archs, too! */ 4793 static const struct umtx_copyops umtx_native_opsx32 = { 4794 .copyin_timeout = umtx_copyin_timeoutx32, 4795 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4796 .copyin_robust_lists = umtx_copyin_robust_lists32, 4797 .copyout_timeout = umtx_copyout_timeoutx32, 4798 .timespec_sz = sizeof(struct timespecx32), 4799 .umtx_time_sz = sizeof(struct umtx_timex32), 4800 .compat32 = true, 4801 }; 4802 4803 #ifdef COMPAT_FREEBSD32 4804 #ifdef __amd64__ 4805 #define umtx_native_ops32 umtx_native_opsi386 4806 #else 4807 #define umtx_native_ops32 umtx_native_opsx32 4808 #endif 4809 #endif /* COMPAT_FREEBSD32 */ 4810 #endif /* __i386__ || __LP64__ */ 4811 4812 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4813 4814 static int 4815 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4816 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4817 { 4818 struct _umtx_op_args uap = { 4819 .obj = obj, 4820 .op = op & ~UMTX_OP__FLAGS, 4821 .val = val, 4822 .uaddr1 = uaddr1, 4823 .uaddr2 = uaddr2 4824 }; 4825 4826 if ((uap.op >= nitems(op_table))) 4827 return (EINVAL); 4828 return ((*op_table[uap.op])(td, &uap, ops)); 4829 } 4830 4831 int 4832 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4833 { 4834 static const struct umtx_copyops *umtx_ops; 4835 4836 umtx_ops = &umtx_native_ops; 4837 #ifdef __LP64__ 4838 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4839 if ((uap->op & UMTX_OP__I386) != 0) 4840 umtx_ops = &umtx_native_opsi386; 4841 else 4842 umtx_ops = &umtx_native_opsx32; 4843 } 4844 #elif !defined(__i386__) 4845 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4846 if ((uap->op & UMTX_OP__I386) != 0) 4847 umtx_ops = &umtx_native_opsi386; 4848 #else 4849 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4850 if ((uap->op & UMTX_OP__32BIT) != 0) 4851 umtx_ops = &umtx_native_opsx32; 4852 #endif 4853 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4854 uap->uaddr2, umtx_ops)); 4855 } 4856 4857 #ifdef COMPAT_FREEBSD32 4858 #ifdef COMPAT_FREEBSD10 4859 int 4860 freebsd10_freebsd32_umtx_lock(struct thread *td, 4861 struct freebsd10_freebsd32_umtx_lock_args *uap) 4862 { 4863 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4864 } 4865 4866 int 4867 freebsd10_freebsd32_umtx_unlock(struct thread *td, 4868 struct freebsd10_freebsd32_umtx_unlock_args *uap) 4869 { 4870 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4871 } 4872 #endif /* COMPAT_FREEBSD10 */ 4873 4874 int 4875 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4876 { 4877 4878 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, 4879 uap->uaddr2, &umtx_native_ops32)); 4880 } 4881 #endif /* COMPAT_FREEBSD32 */ 4882 4883 void 4884 umtx_thread_init(struct thread *td) 4885 { 4886 4887 td->td_umtxq = umtxq_alloc(); 4888 td->td_umtxq->uq_thread = td; 4889 } 4890 4891 void 4892 umtx_thread_fini(struct thread *td) 4893 { 4894 4895 umtxq_free(td->td_umtxq); 4896 } 4897 4898 /* 4899 * It will be called when new thread is created, e.g fork(). 4900 */ 4901 void 4902 umtx_thread_alloc(struct thread *td) 4903 { 4904 struct umtx_q *uq; 4905 4906 uq = td->td_umtxq; 4907 uq->uq_inherited_pri = PRI_MAX; 4908 4909 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4910 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4911 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4912 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4913 } 4914 4915 /* 4916 * exec() hook. 4917 * 4918 * Clear robust lists for all process' threads, not delaying the 4919 * cleanup to thread exit, since the relevant address space is 4920 * destroyed right now. 4921 */ 4922 void 4923 umtx_exec(struct proc *p) 4924 { 4925 struct thread *td; 4926 4927 KASSERT(p == curproc, ("need curproc")); 4928 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4929 (p->p_flag & P_STOPPED_SINGLE) != 0, 4930 ("curproc must be single-threaded")); 4931 /* 4932 * There is no need to lock the list as only this thread can be 4933 * running. 4934 */ 4935 FOREACH_THREAD_IN_PROC(p, td) { 4936 KASSERT(td == curthread || 4937 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4938 ("running thread %p %p", p, td)); 4939 umtx_thread_cleanup(td); 4940 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4941 } 4942 } 4943 4944 /* 4945 * thread exit hook. 4946 */ 4947 void 4948 umtx_thread_exit(struct thread *td) 4949 { 4950 4951 umtx_thread_cleanup(td); 4952 } 4953 4954 static int 4955 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4956 { 4957 u_long res1; 4958 uint32_t res32; 4959 int error; 4960 4961 if (compat32) { 4962 error = fueword32((void *)ptr, &res32); 4963 if (error == 0) 4964 res1 = res32; 4965 } else { 4966 error = fueword((void *)ptr, &res1); 4967 } 4968 if (error == 0) 4969 *res = res1; 4970 else 4971 error = EFAULT; 4972 return (error); 4973 } 4974 4975 static void 4976 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4977 bool compat32) 4978 { 4979 struct umutex32 m32; 4980 4981 if (compat32) { 4982 memcpy(&m32, m, sizeof(m32)); 4983 *rb_list = m32.m_rb_lnk; 4984 } else { 4985 *rb_list = m->m_rb_lnk; 4986 } 4987 } 4988 4989 static int 4990 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4991 bool compat32) 4992 { 4993 struct umutex m; 4994 int error; 4995 4996 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4997 error = copyin((void *)rbp, &m, sizeof(m)); 4998 if (error != 0) 4999 return (error); 5000 if (rb_list != NULL) 5001 umtx_read_rb_list(td, &m, rb_list, compat32); 5002 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5003 return (EINVAL); 5004 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5005 /* inact is cleared after unlock, allow the inconsistency */ 5006 return (inact ? 0 : EINVAL); 5007 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5008 } 5009 5010 static void 5011 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5012 const char *name, bool compat32) 5013 { 5014 int error, i; 5015 uintptr_t rbp; 5016 bool inact; 5017 5018 if (rb_list == 0) 5019 return; 5020 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5021 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5022 if (rbp == *rb_inact) { 5023 inact = true; 5024 *rb_inact = 0; 5025 } else 5026 inact = false; 5027 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5028 } 5029 if (i == umtx_max_rb && umtx_verbose_rb) { 5030 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5031 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5032 } 5033 if (error != 0 && umtx_verbose_rb) { 5034 uprintf("comm %s pid %d: handling %srb error %d\n", 5035 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5036 } 5037 } 5038 5039 /* 5040 * Clean up umtx data. 5041 */ 5042 static void 5043 umtx_thread_cleanup(struct thread *td) 5044 { 5045 struct umtx_q *uq; 5046 struct umtx_pi *pi; 5047 uintptr_t rb_inact; 5048 bool compat32; 5049 5050 /* 5051 * Disown pi mutexes. 5052 */ 5053 uq = td->td_umtxq; 5054 if (uq != NULL) { 5055 if (uq->uq_inherited_pri != PRI_MAX || 5056 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5057 mtx_lock(&umtx_lock); 5058 uq->uq_inherited_pri = PRI_MAX; 5059 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5060 pi->pi_owner = NULL; 5061 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5062 } 5063 mtx_unlock(&umtx_lock); 5064 } 5065 sched_lend_user_prio_cond(td, PRI_MAX); 5066 } 5067 5068 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5069 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5070 5071 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5072 return; 5073 5074 /* 5075 * Handle terminated robust mutexes. Must be done after 5076 * robust pi disown, otherwise unlock could see unowned 5077 * entries. 5078 */ 5079 rb_inact = td->td_rb_inact; 5080 if (rb_inact != 0) 5081 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5082 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5083 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5084 if (rb_inact != 0) 5085 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5086 } 5087