1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 #include <sys/umtxvar.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #include <compat/freebsd32/freebsd32.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32_proto.h> 82 #endif 83 84 #define _UMUTEX_TRY 1 85 #define _UMUTEX_WAIT 2 86 87 #ifdef UMTX_PROFILING 88 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 89 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 90 #endif 91 92 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 93 #ifdef INVARIANTS 94 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 95 struct umtxq_chain *uc; \ 96 \ 97 uc = umtxq_getchain(key); \ 98 mtx_assert(&uc->uc_lock, MA_OWNED); \ 99 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 100 } while (0) 101 #else 102 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 103 #endif 104 105 /* 106 * Don't propagate time-sharing priority, there is a security reason, 107 * a user can simply introduce PI-mutex, let thread A lock the mutex, 108 * and let another thread B block on the mutex, because B is 109 * sleeping, its priority will be boosted, this causes A's priority to 110 * be boosted via priority propagating too and will never be lowered even 111 * if it is using 100%CPU, this is unfair to other processes. 112 */ 113 114 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 115 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 116 PRI_MAX_TIMESHARE : (td)->td_user_pri) 117 118 #define GOLDEN_RATIO_PRIME 2654404609U 119 #ifndef UMTX_CHAINS 120 #define UMTX_CHAINS 512 121 #endif 122 #define UMTX_SHIFTS (__WORD_BIT - 9) 123 124 #define GET_SHARE(flags) \ 125 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 126 127 #define BUSY_SPINS 200 128 129 struct umtx_copyops { 130 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 131 int (*copyin_umtx_time)(const void *uaddr, size_t size, 132 struct _umtx_time *tp); 133 int (*copyin_robust_lists)(const void *uaddr, size_t size, 134 struct umtx_robust_lists_params *rbp); 135 int (*copyout_timeout)(void *uaddr, size_t size, 136 struct timespec *tsp); 137 const size_t timespec_sz; 138 const size_t umtx_time_sz; 139 const bool compat32; 140 }; 141 142 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 143 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 144 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 145 146 int umtx_shm_vnobj_persistent = 0; 147 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 148 &umtx_shm_vnobj_persistent, 0, 149 "False forces destruction of umtx attached to file, on last close"); 150 static int umtx_max_rb = 1000; 151 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 152 &umtx_max_rb, 0, 153 "Maximum number of robust mutexes allowed for each thread"); 154 155 static uma_zone_t umtx_pi_zone; 156 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 157 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 158 static int umtx_pi_allocated; 159 160 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 161 "umtx debug"); 162 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 163 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 164 static int umtx_verbose_rb = 1; 165 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 166 &umtx_verbose_rb, 0, 167 ""); 168 169 #ifdef UMTX_PROFILING 170 static long max_length; 171 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 172 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 173 "umtx chain stats"); 174 #endif 175 176 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 177 const struct _umtx_time *umtxtime); 178 static int umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo); 179 static inline void umtx_abs_timeout_update(struct umtx_abs_timeout *timo); 180 181 static void umtx_shm_init(void); 182 static void umtxq_sysinit(void *); 183 static void umtxq_hash(struct umtx_key *key); 184 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 185 bool rb); 186 static void umtx_thread_cleanup(struct thread *td); 187 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 188 189 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 190 191 static struct mtx umtx_lock; 192 193 #ifdef UMTX_PROFILING 194 static void 195 umtx_init_profiling(void) 196 { 197 struct sysctl_oid *chain_oid; 198 char chain_name[10]; 199 int i; 200 201 for (i = 0; i < UMTX_CHAINS; ++i) { 202 snprintf(chain_name, sizeof(chain_name), "%d", i); 203 chain_oid = SYSCTL_ADD_NODE(NULL, 204 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 205 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 206 "umtx hash stats"); 207 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 208 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 209 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 210 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 211 } 212 } 213 214 static int 215 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 216 { 217 char buf[512]; 218 struct sbuf sb; 219 struct umtxq_chain *uc; 220 u_int fract, i, j, tot, whole; 221 u_int sf0, sf1, sf2, sf3, sf4; 222 u_int si0, si1, si2, si3, si4; 223 u_int sw0, sw1, sw2, sw3, sw4; 224 225 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 226 for (i = 0; i < 2; i++) { 227 tot = 0; 228 for (j = 0; j < UMTX_CHAINS; ++j) { 229 uc = &umtxq_chains[i][j]; 230 mtx_lock(&uc->uc_lock); 231 tot += uc->max_length; 232 mtx_unlock(&uc->uc_lock); 233 } 234 if (tot == 0) 235 sbuf_printf(&sb, "%u) Empty ", i); 236 else { 237 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 238 si0 = si1 = si2 = si3 = si4 = 0; 239 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 240 for (j = 0; j < UMTX_CHAINS; j++) { 241 uc = &umtxq_chains[i][j]; 242 mtx_lock(&uc->uc_lock); 243 whole = uc->max_length * 100; 244 mtx_unlock(&uc->uc_lock); 245 fract = (whole % tot) * 100; 246 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 247 sf0 = fract; 248 si0 = j; 249 sw0 = whole; 250 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 251 sf1)) { 252 sf1 = fract; 253 si1 = j; 254 sw1 = whole; 255 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 256 sf2)) { 257 sf2 = fract; 258 si2 = j; 259 sw2 = whole; 260 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 261 sf3)) { 262 sf3 = fract; 263 si3 = j; 264 sw3 = whole; 265 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 266 sf4)) { 267 sf4 = fract; 268 si4 = j; 269 sw4 = whole; 270 } 271 } 272 sbuf_printf(&sb, "queue %u:\n", i); 273 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 274 sf0 / tot, si0); 275 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 276 sf1 / tot, si1); 277 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 278 sf2 / tot, si2); 279 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 280 sf3 / tot, si3); 281 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 282 sf4 / tot, si4); 283 } 284 } 285 sbuf_trim(&sb); 286 sbuf_finish(&sb); 287 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 288 sbuf_delete(&sb); 289 return (0); 290 } 291 292 static int 293 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 294 { 295 struct umtxq_chain *uc; 296 u_int i, j; 297 int clear, error; 298 299 clear = 0; 300 error = sysctl_handle_int(oidp, &clear, 0, req); 301 if (error != 0 || req->newptr == NULL) 302 return (error); 303 304 if (clear != 0) { 305 for (i = 0; i < 2; ++i) { 306 for (j = 0; j < UMTX_CHAINS; ++j) { 307 uc = &umtxq_chains[i][j]; 308 mtx_lock(&uc->uc_lock); 309 uc->length = 0; 310 uc->max_length = 0; 311 mtx_unlock(&uc->uc_lock); 312 } 313 } 314 } 315 return (0); 316 } 317 318 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 319 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 320 sysctl_debug_umtx_chains_clear, "I", 321 "Clear umtx chains statistics"); 322 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 323 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 324 sysctl_debug_umtx_chains_peaks, "A", 325 "Highest peaks in chains max length"); 326 #endif 327 328 static void 329 umtxq_sysinit(void *arg __unused) 330 { 331 int i, j; 332 333 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 334 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 335 for (i = 0; i < 2; ++i) { 336 for (j = 0; j < UMTX_CHAINS; ++j) { 337 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 338 MTX_DEF | MTX_DUPOK); 339 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 340 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 341 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 342 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 343 umtxq_chains[i][j].uc_busy = 0; 344 umtxq_chains[i][j].uc_waiters = 0; 345 #ifdef UMTX_PROFILING 346 umtxq_chains[i][j].length = 0; 347 umtxq_chains[i][j].max_length = 0; 348 #endif 349 } 350 } 351 #ifdef UMTX_PROFILING 352 umtx_init_profiling(); 353 #endif 354 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 355 umtx_shm_init(); 356 } 357 358 struct umtx_q * 359 umtxq_alloc(void) 360 { 361 struct umtx_q *uq; 362 363 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 364 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 365 M_WAITOK | M_ZERO); 366 TAILQ_INIT(&uq->uq_spare_queue->head); 367 TAILQ_INIT(&uq->uq_pi_contested); 368 uq->uq_inherited_pri = PRI_MAX; 369 return (uq); 370 } 371 372 void 373 umtxq_free(struct umtx_q *uq) 374 { 375 376 MPASS(uq->uq_spare_queue != NULL); 377 free(uq->uq_spare_queue, M_UMTX); 378 free(uq, M_UMTX); 379 } 380 381 static inline void 382 umtxq_hash(struct umtx_key *key) 383 { 384 unsigned n; 385 386 n = (uintptr_t)key->info.both.a + key->info.both.b; 387 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 388 } 389 390 struct umtxq_chain * 391 umtxq_getchain(struct umtx_key *key) 392 { 393 394 if (key->type <= TYPE_SEM) 395 return (&umtxq_chains[1][key->hash]); 396 return (&umtxq_chains[0][key->hash]); 397 } 398 399 /* 400 * Set chain to busy state when following operation 401 * may be blocked (kernel mutex can not be used). 402 */ 403 void 404 umtxq_busy(struct umtx_key *key) 405 { 406 struct umtxq_chain *uc; 407 408 uc = umtxq_getchain(key); 409 mtx_assert(&uc->uc_lock, MA_OWNED); 410 if (uc->uc_busy) { 411 #ifdef SMP 412 if (smp_cpus > 1) { 413 int count = BUSY_SPINS; 414 if (count > 0) { 415 umtxq_unlock(key); 416 while (uc->uc_busy && --count > 0) 417 cpu_spinwait(); 418 umtxq_lock(key); 419 } 420 } 421 #endif 422 while (uc->uc_busy) { 423 uc->uc_waiters++; 424 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 425 uc->uc_waiters--; 426 } 427 } 428 uc->uc_busy = 1; 429 } 430 431 /* 432 * Unbusy a chain. 433 */ 434 void 435 umtxq_unbusy(struct umtx_key *key) 436 { 437 struct umtxq_chain *uc; 438 439 uc = umtxq_getchain(key); 440 mtx_assert(&uc->uc_lock, MA_OWNED); 441 KASSERT(uc->uc_busy != 0, ("not busy")); 442 uc->uc_busy = 0; 443 if (uc->uc_waiters) 444 wakeup_one(uc); 445 } 446 447 void 448 umtxq_unbusy_unlocked(struct umtx_key *key) 449 { 450 451 umtxq_lock(key); 452 umtxq_unbusy(key); 453 umtxq_unlock(key); 454 } 455 456 static struct umtxq_queue * 457 umtxq_queue_lookup(struct umtx_key *key, int q) 458 { 459 struct umtxq_queue *uh; 460 struct umtxq_chain *uc; 461 462 uc = umtxq_getchain(key); 463 UMTXQ_LOCKED_ASSERT(uc); 464 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 465 if (umtx_key_match(&uh->key, key)) 466 return (uh); 467 } 468 469 return (NULL); 470 } 471 472 void 473 umtxq_insert_queue(struct umtx_q *uq, int q) 474 { 475 struct umtxq_queue *uh; 476 struct umtxq_chain *uc; 477 478 uc = umtxq_getchain(&uq->uq_key); 479 UMTXQ_LOCKED_ASSERT(uc); 480 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 481 uh = umtxq_queue_lookup(&uq->uq_key, q); 482 if (uh != NULL) { 483 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 484 } else { 485 uh = uq->uq_spare_queue; 486 uh->key = uq->uq_key; 487 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 488 #ifdef UMTX_PROFILING 489 uc->length++; 490 if (uc->length > uc->max_length) { 491 uc->max_length = uc->length; 492 if (uc->max_length > max_length) 493 max_length = uc->max_length; 494 } 495 #endif 496 } 497 uq->uq_spare_queue = NULL; 498 499 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 500 uh->length++; 501 uq->uq_flags |= UQF_UMTXQ; 502 uq->uq_cur_queue = uh; 503 return; 504 } 505 506 void 507 umtxq_remove_queue(struct umtx_q *uq, int q) 508 { 509 struct umtxq_chain *uc; 510 struct umtxq_queue *uh; 511 512 uc = umtxq_getchain(&uq->uq_key); 513 UMTXQ_LOCKED_ASSERT(uc); 514 if (uq->uq_flags & UQF_UMTXQ) { 515 uh = uq->uq_cur_queue; 516 TAILQ_REMOVE(&uh->head, uq, uq_link); 517 uh->length--; 518 uq->uq_flags &= ~UQF_UMTXQ; 519 if (TAILQ_EMPTY(&uh->head)) { 520 KASSERT(uh->length == 0, 521 ("inconsistent umtxq_queue length")); 522 #ifdef UMTX_PROFILING 523 uc->length--; 524 #endif 525 LIST_REMOVE(uh, link); 526 } else { 527 uh = LIST_FIRST(&uc->uc_spare_queue); 528 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 529 LIST_REMOVE(uh, link); 530 } 531 uq->uq_spare_queue = uh; 532 uq->uq_cur_queue = NULL; 533 } 534 } 535 536 /* 537 * Check if there are multiple waiters 538 */ 539 int 540 umtxq_count(struct umtx_key *key) 541 { 542 struct umtxq_queue *uh; 543 544 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 545 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 546 if (uh != NULL) 547 return (uh->length); 548 return (0); 549 } 550 551 /* 552 * Check if there are multiple PI waiters and returns first 553 * waiter. 554 */ 555 static int 556 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 557 { 558 struct umtxq_queue *uh; 559 560 *first = NULL; 561 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 562 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 563 if (uh != NULL) { 564 *first = TAILQ_FIRST(&uh->head); 565 return (uh->length); 566 } 567 return (0); 568 } 569 570 /* 571 * Wake up threads waiting on an userland object by a bit mask. 572 */ 573 int 574 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 575 { 576 struct umtxq_queue *uh; 577 struct umtx_q *uq, *uq_temp; 578 int ret; 579 580 ret = 0; 581 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 582 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 583 if (uh == NULL) 584 return (0); 585 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 586 if ((uq->uq_bitset & bitset) == 0) 587 continue; 588 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 589 wakeup_one(uq); 590 if (++ret >= n_wake) 591 break; 592 } 593 return (ret); 594 } 595 596 /* 597 * Wake up threads waiting on an userland object. 598 */ 599 600 static int 601 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 602 { 603 struct umtxq_queue *uh; 604 struct umtx_q *uq; 605 int ret; 606 607 ret = 0; 608 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 609 uh = umtxq_queue_lookup(key, q); 610 if (uh != NULL) { 611 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 612 umtxq_remove_queue(uq, q); 613 wakeup(uq); 614 if (++ret >= n_wake) 615 return (ret); 616 } 617 } 618 return (ret); 619 } 620 621 /* 622 * Wake up specified thread. 623 */ 624 static inline void 625 umtxq_signal_thread(struct umtx_q *uq) 626 { 627 628 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 629 umtxq_remove(uq); 630 wakeup(uq); 631 } 632 633 /* 634 * Wake up a maximum of n_wake threads that are waiting on an userland 635 * object identified by key. The remaining threads are removed from queue 636 * identified by key and added to the queue identified by key2 (requeued). 637 * The n_requeue specifies an upper limit on the number of threads that 638 * are requeued to the second queue. 639 */ 640 int 641 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 642 int n_requeue) 643 { 644 struct umtxq_queue *uh; 645 struct umtx_q *uq, *uq_temp; 646 int ret; 647 648 ret = 0; 649 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 650 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 651 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 652 if (uh == NULL) 653 return (0); 654 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 655 if (++ret <= n_wake) { 656 umtxq_remove(uq); 657 wakeup_one(uq); 658 } else { 659 umtxq_remove(uq); 660 uq->uq_key = *key2; 661 umtxq_insert(uq); 662 if (ret - n_wake == n_requeue) 663 break; 664 } 665 } 666 return (ret); 667 } 668 669 static inline int 670 tstohz(const struct timespec *tsp) 671 { 672 struct timeval tv; 673 674 TIMESPEC_TO_TIMEVAL(&tv, tsp); 675 return tvtohz(&tv); 676 } 677 678 void 679 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 680 int absolute, const struct timespec *timeout) 681 { 682 683 timo->clockid = clockid; 684 if (!absolute) { 685 timo->is_abs_real = false; 686 umtx_abs_timeout_update(timo); 687 timespecadd(&timo->cur, timeout, &timo->end); 688 } else { 689 timo->end = *timeout; 690 timo->is_abs_real = clockid == CLOCK_REALTIME || 691 clockid == CLOCK_REALTIME_FAST || 692 clockid == CLOCK_REALTIME_PRECISE; 693 /* 694 * If is_abs_real, umtxq_sleep will read the clock 695 * after setting td_rtcgen; otherwise, read it here. 696 */ 697 if (!timo->is_abs_real) { 698 umtx_abs_timeout_update(timo); 699 } 700 } 701 } 702 703 static void 704 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 705 const struct _umtx_time *umtxtime) 706 { 707 708 umtx_abs_timeout_init(timo, umtxtime->_clockid, 709 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 710 } 711 712 static void 713 umtx_abs_timeout_update(struct umtx_abs_timeout *timo) 714 { 715 716 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 717 } 718 719 static int 720 umtx_abs_timeout_gethz(struct umtx_abs_timeout *timo) 721 { 722 struct timespec tts; 723 724 if (timespeccmp(&timo->end, &timo->cur, <=)) 725 return (-1); 726 timespecsub(&timo->end, &timo->cur, &tts); 727 return (tstohz(&tts)); 728 } 729 730 static uint32_t 731 umtx_unlock_val(uint32_t flags, bool rb) 732 { 733 734 if (rb) 735 return (UMUTEX_RB_OWNERDEAD); 736 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 737 return (UMUTEX_RB_NOTRECOV); 738 else 739 return (UMUTEX_UNOWNED); 740 741 } 742 743 /* 744 * Put thread into sleep state, before sleeping, check if 745 * thread was removed from umtx queue. 746 */ 747 int 748 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 749 struct umtx_abs_timeout *abstime) 750 { 751 struct umtxq_chain *uc; 752 int error, timo; 753 754 if (abstime != NULL && abstime->is_abs_real) { 755 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 756 umtx_abs_timeout_update(abstime); 757 } 758 759 uc = umtxq_getchain(&uq->uq_key); 760 UMTXQ_LOCKED_ASSERT(uc); 761 for (;;) { 762 if (!(uq->uq_flags & UQF_UMTXQ)) { 763 error = 0; 764 break; 765 } 766 if (abstime != NULL) { 767 timo = umtx_abs_timeout_gethz(abstime); 768 if (timo < 0) { 769 error = ETIMEDOUT; 770 break; 771 } 772 } else 773 timo = 0; 774 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 775 if (error == EINTR || error == ERESTART) { 776 umtxq_lock(&uq->uq_key); 777 break; 778 } 779 if (abstime != NULL) { 780 if (abstime->is_abs_real) 781 curthread->td_rtcgen = 782 atomic_load_acq_int(&rtc_generation); 783 umtx_abs_timeout_update(abstime); 784 } 785 umtxq_lock(&uq->uq_key); 786 } 787 788 curthread->td_rtcgen = 0; 789 return (error); 790 } 791 792 /* 793 * Convert userspace address into unique logical address. 794 */ 795 int 796 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 797 { 798 struct thread *td = curthread; 799 vm_map_t map; 800 vm_map_entry_t entry; 801 vm_pindex_t pindex; 802 vm_prot_t prot; 803 boolean_t wired; 804 805 key->type = type; 806 if (share == THREAD_SHARE) { 807 key->shared = 0; 808 key->info.private.vs = td->td_proc->p_vmspace; 809 key->info.private.addr = (uintptr_t)addr; 810 } else { 811 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 812 map = &td->td_proc->p_vmspace->vm_map; 813 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 814 &entry, &key->info.shared.object, &pindex, &prot, 815 &wired) != KERN_SUCCESS) { 816 return (EFAULT); 817 } 818 819 if ((share == PROCESS_SHARE) || 820 (share == AUTO_SHARE && 821 VM_INHERIT_SHARE == entry->inheritance)) { 822 key->shared = 1; 823 key->info.shared.offset = (vm_offset_t)addr - 824 entry->start + entry->offset; 825 vm_object_reference(key->info.shared.object); 826 } else { 827 key->shared = 0; 828 key->info.private.vs = td->td_proc->p_vmspace; 829 key->info.private.addr = (uintptr_t)addr; 830 } 831 vm_map_lookup_done(map, entry); 832 } 833 834 umtxq_hash(key); 835 return (0); 836 } 837 838 /* 839 * Release key. 840 */ 841 void 842 umtx_key_release(struct umtx_key *key) 843 { 844 if (key->shared) 845 vm_object_deallocate(key->info.shared.object); 846 } 847 848 #ifdef COMPAT_FREEBSD10 849 /* 850 * Lock a umtx object. 851 */ 852 static int 853 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 854 const struct timespec *timeout) 855 { 856 struct umtx_abs_timeout timo; 857 struct umtx_q *uq; 858 u_long owner; 859 u_long old; 860 int error = 0; 861 862 uq = td->td_umtxq; 863 if (timeout != NULL) 864 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 865 866 /* 867 * Care must be exercised when dealing with umtx structure. It 868 * can fault on any access. 869 */ 870 for (;;) { 871 /* 872 * Try the uncontested case. This should be done in userland. 873 */ 874 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 875 876 /* The acquire succeeded. */ 877 if (owner == UMTX_UNOWNED) 878 return (0); 879 880 /* The address was invalid. */ 881 if (owner == -1) 882 return (EFAULT); 883 884 /* If no one owns it but it is contested try to acquire it. */ 885 if (owner == UMTX_CONTESTED) { 886 owner = casuword(&umtx->u_owner, 887 UMTX_CONTESTED, id | UMTX_CONTESTED); 888 889 if (owner == UMTX_CONTESTED) 890 return (0); 891 892 /* The address was invalid. */ 893 if (owner == -1) 894 return (EFAULT); 895 896 error = thread_check_susp(td, false); 897 if (error != 0) 898 break; 899 900 /* If this failed the lock has changed, restart. */ 901 continue; 902 } 903 904 /* 905 * If we caught a signal, we have retried and now 906 * exit immediately. 907 */ 908 if (error != 0) 909 break; 910 911 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 912 AUTO_SHARE, &uq->uq_key)) != 0) 913 return (error); 914 915 umtxq_lock(&uq->uq_key); 916 umtxq_busy(&uq->uq_key); 917 umtxq_insert(uq); 918 umtxq_unbusy(&uq->uq_key); 919 umtxq_unlock(&uq->uq_key); 920 921 /* 922 * Set the contested bit so that a release in user space 923 * knows to use the system call for unlock. If this fails 924 * either some one else has acquired the lock or it has been 925 * released. 926 */ 927 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 928 929 /* The address was invalid. */ 930 if (old == -1) { 931 umtxq_lock(&uq->uq_key); 932 umtxq_remove(uq); 933 umtxq_unlock(&uq->uq_key); 934 umtx_key_release(&uq->uq_key); 935 return (EFAULT); 936 } 937 938 /* 939 * We set the contested bit, sleep. Otherwise the lock changed 940 * and we need to retry or we lost a race to the thread 941 * unlocking the umtx. 942 */ 943 umtxq_lock(&uq->uq_key); 944 if (old == owner) 945 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 946 &timo); 947 umtxq_remove(uq); 948 umtxq_unlock(&uq->uq_key); 949 umtx_key_release(&uq->uq_key); 950 951 if (error == 0) 952 error = thread_check_susp(td, false); 953 } 954 955 if (timeout == NULL) { 956 /* Mutex locking is restarted if it is interrupted. */ 957 if (error == EINTR) 958 error = ERESTART; 959 } else { 960 /* Timed-locking is not restarted. */ 961 if (error == ERESTART) 962 error = EINTR; 963 } 964 return (error); 965 } 966 967 /* 968 * Unlock a umtx object. 969 */ 970 static int 971 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 972 { 973 struct umtx_key key; 974 u_long owner; 975 u_long old; 976 int error; 977 int count; 978 979 /* 980 * Make sure we own this mtx. 981 */ 982 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 983 if (owner == -1) 984 return (EFAULT); 985 986 if ((owner & ~UMTX_CONTESTED) != id) 987 return (EPERM); 988 989 /* This should be done in userland */ 990 if ((owner & UMTX_CONTESTED) == 0) { 991 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 992 if (old == -1) 993 return (EFAULT); 994 if (old == owner) 995 return (0); 996 owner = old; 997 } 998 999 /* We should only ever be in here for contested locks */ 1000 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1001 &key)) != 0) 1002 return (error); 1003 1004 umtxq_lock(&key); 1005 umtxq_busy(&key); 1006 count = umtxq_count(&key); 1007 umtxq_unlock(&key); 1008 1009 /* 1010 * When unlocking the umtx, it must be marked as unowned if 1011 * there is zero or one thread only waiting for it. 1012 * Otherwise, it must be marked as contested. 1013 */ 1014 old = casuword(&umtx->u_owner, owner, 1015 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1016 umtxq_lock(&key); 1017 umtxq_signal(&key,1); 1018 umtxq_unbusy(&key); 1019 umtxq_unlock(&key); 1020 umtx_key_release(&key); 1021 if (old == -1) 1022 return (EFAULT); 1023 if (old != owner) 1024 return (EINVAL); 1025 return (0); 1026 } 1027 1028 #ifdef COMPAT_FREEBSD32 1029 1030 /* 1031 * Lock a umtx object. 1032 */ 1033 static int 1034 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1035 const struct timespec *timeout) 1036 { 1037 struct umtx_abs_timeout timo; 1038 struct umtx_q *uq; 1039 uint32_t owner; 1040 uint32_t old; 1041 int error = 0; 1042 1043 uq = td->td_umtxq; 1044 1045 if (timeout != NULL) 1046 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1047 1048 /* 1049 * Care must be exercised when dealing with umtx structure. It 1050 * can fault on any access. 1051 */ 1052 for (;;) { 1053 /* 1054 * Try the uncontested case. This should be done in userland. 1055 */ 1056 owner = casuword32(m, UMUTEX_UNOWNED, id); 1057 1058 /* The acquire succeeded. */ 1059 if (owner == UMUTEX_UNOWNED) 1060 return (0); 1061 1062 /* The address was invalid. */ 1063 if (owner == -1) 1064 return (EFAULT); 1065 1066 /* If no one owns it but it is contested try to acquire it. */ 1067 if (owner == UMUTEX_CONTESTED) { 1068 owner = casuword32(m, 1069 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1070 if (owner == UMUTEX_CONTESTED) 1071 return (0); 1072 1073 /* The address was invalid. */ 1074 if (owner == -1) 1075 return (EFAULT); 1076 1077 error = thread_check_susp(td, false); 1078 if (error != 0) 1079 break; 1080 1081 /* If this failed the lock has changed, restart. */ 1082 continue; 1083 } 1084 1085 /* 1086 * If we caught a signal, we have retried and now 1087 * exit immediately. 1088 */ 1089 if (error != 0) 1090 return (error); 1091 1092 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1093 AUTO_SHARE, &uq->uq_key)) != 0) 1094 return (error); 1095 1096 umtxq_lock(&uq->uq_key); 1097 umtxq_busy(&uq->uq_key); 1098 umtxq_insert(uq); 1099 umtxq_unbusy(&uq->uq_key); 1100 umtxq_unlock(&uq->uq_key); 1101 1102 /* 1103 * Set the contested bit so that a release in user space 1104 * knows to use the system call for unlock. If this fails 1105 * either some one else has acquired the lock or it has been 1106 * released. 1107 */ 1108 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1109 1110 /* The address was invalid. */ 1111 if (old == -1) { 1112 umtxq_lock(&uq->uq_key); 1113 umtxq_remove(uq); 1114 umtxq_unlock(&uq->uq_key); 1115 umtx_key_release(&uq->uq_key); 1116 return (EFAULT); 1117 } 1118 1119 /* 1120 * We set the contested bit, sleep. Otherwise the lock changed 1121 * and we need to retry or we lost a race to the thread 1122 * unlocking the umtx. 1123 */ 1124 umtxq_lock(&uq->uq_key); 1125 if (old == owner) 1126 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1127 NULL : &timo); 1128 umtxq_remove(uq); 1129 umtxq_unlock(&uq->uq_key); 1130 umtx_key_release(&uq->uq_key); 1131 1132 if (error == 0) 1133 error = thread_check_susp(td, false); 1134 } 1135 1136 if (timeout == NULL) { 1137 /* Mutex locking is restarted if it is interrupted. */ 1138 if (error == EINTR) 1139 error = ERESTART; 1140 } else { 1141 /* Timed-locking is not restarted. */ 1142 if (error == ERESTART) 1143 error = EINTR; 1144 } 1145 return (error); 1146 } 1147 1148 /* 1149 * Unlock a umtx object. 1150 */ 1151 static int 1152 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1153 { 1154 struct umtx_key key; 1155 uint32_t owner; 1156 uint32_t old; 1157 int error; 1158 int count; 1159 1160 /* 1161 * Make sure we own this mtx. 1162 */ 1163 owner = fuword32(m); 1164 if (owner == -1) 1165 return (EFAULT); 1166 1167 if ((owner & ~UMUTEX_CONTESTED) != id) 1168 return (EPERM); 1169 1170 /* This should be done in userland */ 1171 if ((owner & UMUTEX_CONTESTED) == 0) { 1172 old = casuword32(m, owner, UMUTEX_UNOWNED); 1173 if (old == -1) 1174 return (EFAULT); 1175 if (old == owner) 1176 return (0); 1177 owner = old; 1178 } 1179 1180 /* We should only ever be in here for contested locks */ 1181 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1182 &key)) != 0) 1183 return (error); 1184 1185 umtxq_lock(&key); 1186 umtxq_busy(&key); 1187 count = umtxq_count(&key); 1188 umtxq_unlock(&key); 1189 1190 /* 1191 * When unlocking the umtx, it must be marked as unowned if 1192 * there is zero or one thread only waiting for it. 1193 * Otherwise, it must be marked as contested. 1194 */ 1195 old = casuword32(m, owner, 1196 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1197 umtxq_lock(&key); 1198 umtxq_signal(&key,1); 1199 umtxq_unbusy(&key); 1200 umtxq_unlock(&key); 1201 umtx_key_release(&key); 1202 if (old == -1) 1203 return (EFAULT); 1204 if (old != owner) 1205 return (EINVAL); 1206 return (0); 1207 } 1208 #endif /* COMPAT_FREEBSD32 */ 1209 #endif /* COMPAT_FREEBSD10 */ 1210 1211 /* 1212 * Fetch and compare value, sleep on the address if value is not changed. 1213 */ 1214 static int 1215 do_wait(struct thread *td, void *addr, u_long id, 1216 struct _umtx_time *timeout, int compat32, int is_private) 1217 { 1218 struct umtx_abs_timeout timo; 1219 struct umtx_q *uq; 1220 u_long tmp; 1221 uint32_t tmp32; 1222 int error = 0; 1223 1224 uq = td->td_umtxq; 1225 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1226 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1227 return (error); 1228 1229 if (timeout != NULL) 1230 umtx_abs_timeout_init2(&timo, timeout); 1231 1232 umtxq_lock(&uq->uq_key); 1233 umtxq_insert(uq); 1234 umtxq_unlock(&uq->uq_key); 1235 if (compat32 == 0) { 1236 error = fueword(addr, &tmp); 1237 if (error != 0) 1238 error = EFAULT; 1239 } else { 1240 error = fueword32(addr, &tmp32); 1241 if (error == 0) 1242 tmp = tmp32; 1243 else 1244 error = EFAULT; 1245 } 1246 umtxq_lock(&uq->uq_key); 1247 if (error == 0) { 1248 if (tmp == id) 1249 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1250 NULL : &timo); 1251 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1252 error = 0; 1253 else 1254 umtxq_remove(uq); 1255 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1256 umtxq_remove(uq); 1257 } 1258 umtxq_unlock(&uq->uq_key); 1259 umtx_key_release(&uq->uq_key); 1260 if (error == ERESTART) 1261 error = EINTR; 1262 return (error); 1263 } 1264 1265 /* 1266 * Wake up threads sleeping on the specified address. 1267 */ 1268 int 1269 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1270 { 1271 struct umtx_key key; 1272 int ret; 1273 1274 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1275 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1276 return (ret); 1277 umtxq_lock(&key); 1278 umtxq_signal(&key, n_wake); 1279 umtxq_unlock(&key); 1280 umtx_key_release(&key); 1281 return (0); 1282 } 1283 1284 /* 1285 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1286 */ 1287 static int 1288 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1289 struct _umtx_time *timeout, int mode) 1290 { 1291 struct umtx_abs_timeout timo; 1292 struct umtx_q *uq; 1293 uint32_t owner, old, id; 1294 int error, rv; 1295 1296 id = td->td_tid; 1297 uq = td->td_umtxq; 1298 error = 0; 1299 if (timeout != NULL) 1300 umtx_abs_timeout_init2(&timo, timeout); 1301 1302 /* 1303 * Care must be exercised when dealing with umtx structure. It 1304 * can fault on any access. 1305 */ 1306 for (;;) { 1307 rv = fueword32(&m->m_owner, &owner); 1308 if (rv == -1) 1309 return (EFAULT); 1310 if (mode == _UMUTEX_WAIT) { 1311 if (owner == UMUTEX_UNOWNED || 1312 owner == UMUTEX_CONTESTED || 1313 owner == UMUTEX_RB_OWNERDEAD || 1314 owner == UMUTEX_RB_NOTRECOV) 1315 return (0); 1316 } else { 1317 /* 1318 * Robust mutex terminated. Kernel duty is to 1319 * return EOWNERDEAD to the userspace. The 1320 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1321 * by the common userspace code. 1322 */ 1323 if (owner == UMUTEX_RB_OWNERDEAD) { 1324 rv = casueword32(&m->m_owner, 1325 UMUTEX_RB_OWNERDEAD, &owner, 1326 id | UMUTEX_CONTESTED); 1327 if (rv == -1) 1328 return (EFAULT); 1329 if (rv == 0) { 1330 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1331 return (EOWNERDEAD); /* success */ 1332 } 1333 MPASS(rv == 1); 1334 rv = thread_check_susp(td, false); 1335 if (rv != 0) 1336 return (rv); 1337 continue; 1338 } 1339 if (owner == UMUTEX_RB_NOTRECOV) 1340 return (ENOTRECOVERABLE); 1341 1342 /* 1343 * Try the uncontested case. This should be 1344 * done in userland. 1345 */ 1346 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1347 &owner, id); 1348 /* The address was invalid. */ 1349 if (rv == -1) 1350 return (EFAULT); 1351 1352 /* The acquire succeeded. */ 1353 if (rv == 0) { 1354 MPASS(owner == UMUTEX_UNOWNED); 1355 return (0); 1356 } 1357 1358 /* 1359 * If no one owns it but it is contested try 1360 * to acquire it. 1361 */ 1362 MPASS(rv == 1); 1363 if (owner == UMUTEX_CONTESTED) { 1364 rv = casueword32(&m->m_owner, 1365 UMUTEX_CONTESTED, &owner, 1366 id | UMUTEX_CONTESTED); 1367 /* The address was invalid. */ 1368 if (rv == -1) 1369 return (EFAULT); 1370 if (rv == 0) { 1371 MPASS(owner == UMUTEX_CONTESTED); 1372 return (0); 1373 } 1374 if (rv == 1) { 1375 rv = thread_check_susp(td, false); 1376 if (rv != 0) 1377 return (rv); 1378 } 1379 1380 /* 1381 * If this failed the lock has 1382 * changed, restart. 1383 */ 1384 continue; 1385 } 1386 1387 /* rv == 1 but not contested, likely store failure */ 1388 rv = thread_check_susp(td, false); 1389 if (rv != 0) 1390 return (rv); 1391 } 1392 1393 if (mode == _UMUTEX_TRY) 1394 return (EBUSY); 1395 1396 /* 1397 * If we caught a signal, we have retried and now 1398 * exit immediately. 1399 */ 1400 if (error != 0) 1401 return (error); 1402 1403 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1404 GET_SHARE(flags), &uq->uq_key)) != 0) 1405 return (error); 1406 1407 umtxq_lock(&uq->uq_key); 1408 umtxq_busy(&uq->uq_key); 1409 umtxq_insert(uq); 1410 umtxq_unlock(&uq->uq_key); 1411 1412 /* 1413 * Set the contested bit so that a release in user space 1414 * knows to use the system call for unlock. If this fails 1415 * either some one else has acquired the lock or it has been 1416 * released. 1417 */ 1418 rv = casueword32(&m->m_owner, owner, &old, 1419 owner | UMUTEX_CONTESTED); 1420 1421 /* The address was invalid or casueword failed to store. */ 1422 if (rv == -1 || rv == 1) { 1423 umtxq_lock(&uq->uq_key); 1424 umtxq_remove(uq); 1425 umtxq_unbusy(&uq->uq_key); 1426 umtxq_unlock(&uq->uq_key); 1427 umtx_key_release(&uq->uq_key); 1428 if (rv == -1) 1429 return (EFAULT); 1430 if (rv == 1) { 1431 rv = thread_check_susp(td, false); 1432 if (rv != 0) 1433 return (rv); 1434 } 1435 continue; 1436 } 1437 1438 /* 1439 * We set the contested bit, sleep. Otherwise the lock changed 1440 * and we need to retry or we lost a race to the thread 1441 * unlocking the umtx. 1442 */ 1443 umtxq_lock(&uq->uq_key); 1444 umtxq_unbusy(&uq->uq_key); 1445 MPASS(old == owner); 1446 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1447 NULL : &timo); 1448 umtxq_remove(uq); 1449 umtxq_unlock(&uq->uq_key); 1450 umtx_key_release(&uq->uq_key); 1451 1452 if (error == 0) 1453 error = thread_check_susp(td, false); 1454 } 1455 1456 return (0); 1457 } 1458 1459 /* 1460 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1461 */ 1462 static int 1463 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1464 { 1465 struct umtx_key key; 1466 uint32_t owner, old, id, newlock; 1467 int error, count; 1468 1469 id = td->td_tid; 1470 1471 again: 1472 /* 1473 * Make sure we own this mtx. 1474 */ 1475 error = fueword32(&m->m_owner, &owner); 1476 if (error == -1) 1477 return (EFAULT); 1478 1479 if ((owner & ~UMUTEX_CONTESTED) != id) 1480 return (EPERM); 1481 1482 newlock = umtx_unlock_val(flags, rb); 1483 if ((owner & UMUTEX_CONTESTED) == 0) { 1484 error = casueword32(&m->m_owner, owner, &old, newlock); 1485 if (error == -1) 1486 return (EFAULT); 1487 if (error == 1) { 1488 error = thread_check_susp(td, false); 1489 if (error != 0) 1490 return (error); 1491 goto again; 1492 } 1493 MPASS(old == owner); 1494 return (0); 1495 } 1496 1497 /* We should only ever be in here for contested locks */ 1498 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1499 &key)) != 0) 1500 return (error); 1501 1502 umtxq_lock(&key); 1503 umtxq_busy(&key); 1504 count = umtxq_count(&key); 1505 umtxq_unlock(&key); 1506 1507 /* 1508 * When unlocking the umtx, it must be marked as unowned if 1509 * there is zero or one thread only waiting for it. 1510 * Otherwise, it must be marked as contested. 1511 */ 1512 if (count > 1) 1513 newlock |= UMUTEX_CONTESTED; 1514 error = casueword32(&m->m_owner, owner, &old, newlock); 1515 umtxq_lock(&key); 1516 umtxq_signal(&key, 1); 1517 umtxq_unbusy(&key); 1518 umtxq_unlock(&key); 1519 umtx_key_release(&key); 1520 if (error == -1) 1521 return (EFAULT); 1522 if (error == 1) { 1523 if (old != owner) 1524 return (EINVAL); 1525 error = thread_check_susp(td, false); 1526 if (error != 0) 1527 return (error); 1528 goto again; 1529 } 1530 return (0); 1531 } 1532 1533 /* 1534 * Check if the mutex is available and wake up a waiter, 1535 * only for simple mutex. 1536 */ 1537 static int 1538 do_wake_umutex(struct thread *td, struct umutex *m) 1539 { 1540 struct umtx_key key; 1541 uint32_t owner; 1542 uint32_t flags; 1543 int error; 1544 int count; 1545 1546 again: 1547 error = fueword32(&m->m_owner, &owner); 1548 if (error == -1) 1549 return (EFAULT); 1550 1551 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1552 owner != UMUTEX_RB_NOTRECOV) 1553 return (0); 1554 1555 error = fueword32(&m->m_flags, &flags); 1556 if (error == -1) 1557 return (EFAULT); 1558 1559 /* We should only ever be in here for contested locks */ 1560 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1561 &key)) != 0) 1562 return (error); 1563 1564 umtxq_lock(&key); 1565 umtxq_busy(&key); 1566 count = umtxq_count(&key); 1567 umtxq_unlock(&key); 1568 1569 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1570 owner != UMUTEX_RB_NOTRECOV) { 1571 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1572 UMUTEX_UNOWNED); 1573 if (error == -1) { 1574 error = EFAULT; 1575 } else if (error == 1) { 1576 umtxq_lock(&key); 1577 umtxq_unbusy(&key); 1578 umtxq_unlock(&key); 1579 umtx_key_release(&key); 1580 error = thread_check_susp(td, false); 1581 if (error != 0) 1582 return (error); 1583 goto again; 1584 } 1585 } 1586 1587 umtxq_lock(&key); 1588 if (error == 0 && count != 0) { 1589 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1590 owner == UMUTEX_RB_OWNERDEAD || 1591 owner == UMUTEX_RB_NOTRECOV); 1592 umtxq_signal(&key, 1); 1593 } 1594 umtxq_unbusy(&key); 1595 umtxq_unlock(&key); 1596 umtx_key_release(&key); 1597 return (error); 1598 } 1599 1600 /* 1601 * Check if the mutex has waiters and tries to fix contention bit. 1602 */ 1603 static int 1604 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1605 { 1606 struct umtx_key key; 1607 uint32_t owner, old; 1608 int type; 1609 int error; 1610 int count; 1611 1612 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1613 UMUTEX_ROBUST)) { 1614 case 0: 1615 case UMUTEX_ROBUST: 1616 type = TYPE_NORMAL_UMUTEX; 1617 break; 1618 case UMUTEX_PRIO_INHERIT: 1619 type = TYPE_PI_UMUTEX; 1620 break; 1621 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1622 type = TYPE_PI_ROBUST_UMUTEX; 1623 break; 1624 case UMUTEX_PRIO_PROTECT: 1625 type = TYPE_PP_UMUTEX; 1626 break; 1627 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1628 type = TYPE_PP_ROBUST_UMUTEX; 1629 break; 1630 default: 1631 return (EINVAL); 1632 } 1633 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1634 return (error); 1635 1636 owner = 0; 1637 umtxq_lock(&key); 1638 umtxq_busy(&key); 1639 count = umtxq_count(&key); 1640 umtxq_unlock(&key); 1641 1642 error = fueword32(&m->m_owner, &owner); 1643 if (error == -1) 1644 error = EFAULT; 1645 1646 /* 1647 * Only repair contention bit if there is a waiter, this means 1648 * the mutex is still being referenced by userland code, 1649 * otherwise don't update any memory. 1650 */ 1651 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1652 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1653 error = casueword32(&m->m_owner, owner, &old, 1654 owner | UMUTEX_CONTESTED); 1655 if (error == -1) { 1656 error = EFAULT; 1657 break; 1658 } 1659 if (error == 0) { 1660 MPASS(old == owner); 1661 break; 1662 } 1663 owner = old; 1664 error = thread_check_susp(td, false); 1665 } 1666 1667 umtxq_lock(&key); 1668 if (error == EFAULT) { 1669 umtxq_signal(&key, INT_MAX); 1670 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1671 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1672 umtxq_signal(&key, 1); 1673 umtxq_unbusy(&key); 1674 umtxq_unlock(&key); 1675 umtx_key_release(&key); 1676 return (error); 1677 } 1678 1679 struct umtx_pi * 1680 umtx_pi_alloc(int flags) 1681 { 1682 struct umtx_pi *pi; 1683 1684 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1685 TAILQ_INIT(&pi->pi_blocked); 1686 atomic_add_int(&umtx_pi_allocated, 1); 1687 return (pi); 1688 } 1689 1690 void 1691 umtx_pi_free(struct umtx_pi *pi) 1692 { 1693 uma_zfree(umtx_pi_zone, pi); 1694 atomic_add_int(&umtx_pi_allocated, -1); 1695 } 1696 1697 /* 1698 * Adjust the thread's position on a pi_state after its priority has been 1699 * changed. 1700 */ 1701 static int 1702 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1703 { 1704 struct umtx_q *uq, *uq1, *uq2; 1705 struct thread *td1; 1706 1707 mtx_assert(&umtx_lock, MA_OWNED); 1708 if (pi == NULL) 1709 return (0); 1710 1711 uq = td->td_umtxq; 1712 1713 /* 1714 * Check if the thread needs to be moved on the blocked chain. 1715 * It needs to be moved if either its priority is lower than 1716 * the previous thread or higher than the next thread. 1717 */ 1718 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1719 uq2 = TAILQ_NEXT(uq, uq_lockq); 1720 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1721 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1722 /* 1723 * Remove thread from blocked chain and determine where 1724 * it should be moved to. 1725 */ 1726 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1727 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1728 td1 = uq1->uq_thread; 1729 MPASS(td1->td_proc->p_magic == P_MAGIC); 1730 if (UPRI(td1) > UPRI(td)) 1731 break; 1732 } 1733 1734 if (uq1 == NULL) 1735 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1736 else 1737 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1738 } 1739 return (1); 1740 } 1741 1742 static struct umtx_pi * 1743 umtx_pi_next(struct umtx_pi *pi) 1744 { 1745 struct umtx_q *uq_owner; 1746 1747 if (pi->pi_owner == NULL) 1748 return (NULL); 1749 uq_owner = pi->pi_owner->td_umtxq; 1750 if (uq_owner == NULL) 1751 return (NULL); 1752 return (uq_owner->uq_pi_blocked); 1753 } 1754 1755 /* 1756 * Floyd's Cycle-Finding Algorithm. 1757 */ 1758 static bool 1759 umtx_pi_check_loop(struct umtx_pi *pi) 1760 { 1761 struct umtx_pi *pi1; /* fast iterator */ 1762 1763 mtx_assert(&umtx_lock, MA_OWNED); 1764 if (pi == NULL) 1765 return (false); 1766 pi1 = pi; 1767 for (;;) { 1768 pi = umtx_pi_next(pi); 1769 if (pi == NULL) 1770 break; 1771 pi1 = umtx_pi_next(pi1); 1772 if (pi1 == NULL) 1773 break; 1774 pi1 = umtx_pi_next(pi1); 1775 if (pi1 == NULL) 1776 break; 1777 if (pi == pi1) 1778 return (true); 1779 } 1780 return (false); 1781 } 1782 1783 /* 1784 * Propagate priority when a thread is blocked on POSIX 1785 * PI mutex. 1786 */ 1787 static void 1788 umtx_propagate_priority(struct thread *td) 1789 { 1790 struct umtx_q *uq; 1791 struct umtx_pi *pi; 1792 int pri; 1793 1794 mtx_assert(&umtx_lock, MA_OWNED); 1795 pri = UPRI(td); 1796 uq = td->td_umtxq; 1797 pi = uq->uq_pi_blocked; 1798 if (pi == NULL) 1799 return; 1800 if (umtx_pi_check_loop(pi)) 1801 return; 1802 1803 for (;;) { 1804 td = pi->pi_owner; 1805 if (td == NULL || td == curthread) 1806 return; 1807 1808 MPASS(td->td_proc != NULL); 1809 MPASS(td->td_proc->p_magic == P_MAGIC); 1810 1811 thread_lock(td); 1812 if (td->td_lend_user_pri > pri) 1813 sched_lend_user_prio(td, pri); 1814 else { 1815 thread_unlock(td); 1816 break; 1817 } 1818 thread_unlock(td); 1819 1820 /* 1821 * Pick up the lock that td is blocked on. 1822 */ 1823 uq = td->td_umtxq; 1824 pi = uq->uq_pi_blocked; 1825 if (pi == NULL) 1826 break; 1827 /* Resort td on the list if needed. */ 1828 umtx_pi_adjust_thread(pi, td); 1829 } 1830 } 1831 1832 /* 1833 * Unpropagate priority for a PI mutex when a thread blocked on 1834 * it is interrupted by signal or resumed by others. 1835 */ 1836 static void 1837 umtx_repropagate_priority(struct umtx_pi *pi) 1838 { 1839 struct umtx_q *uq, *uq_owner; 1840 struct umtx_pi *pi2; 1841 int pri; 1842 1843 mtx_assert(&umtx_lock, MA_OWNED); 1844 1845 if (umtx_pi_check_loop(pi)) 1846 return; 1847 while (pi != NULL && pi->pi_owner != NULL) { 1848 pri = PRI_MAX; 1849 uq_owner = pi->pi_owner->td_umtxq; 1850 1851 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1852 uq = TAILQ_FIRST(&pi2->pi_blocked); 1853 if (uq != NULL) { 1854 if (pri > UPRI(uq->uq_thread)) 1855 pri = UPRI(uq->uq_thread); 1856 } 1857 } 1858 1859 if (pri > uq_owner->uq_inherited_pri) 1860 pri = uq_owner->uq_inherited_pri; 1861 thread_lock(pi->pi_owner); 1862 sched_lend_user_prio(pi->pi_owner, pri); 1863 thread_unlock(pi->pi_owner); 1864 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1865 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1866 } 1867 } 1868 1869 /* 1870 * Insert a PI mutex into owned list. 1871 */ 1872 static void 1873 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1874 { 1875 struct umtx_q *uq_owner; 1876 1877 uq_owner = owner->td_umtxq; 1878 mtx_assert(&umtx_lock, MA_OWNED); 1879 MPASS(pi->pi_owner == NULL); 1880 pi->pi_owner = owner; 1881 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1882 } 1883 1884 /* 1885 * Disown a PI mutex, and remove it from the owned list. 1886 */ 1887 static void 1888 umtx_pi_disown(struct umtx_pi *pi) 1889 { 1890 1891 mtx_assert(&umtx_lock, MA_OWNED); 1892 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1893 pi->pi_owner = NULL; 1894 } 1895 1896 /* 1897 * Claim ownership of a PI mutex. 1898 */ 1899 int 1900 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1901 { 1902 struct umtx_q *uq; 1903 int pri; 1904 1905 mtx_lock(&umtx_lock); 1906 if (pi->pi_owner == owner) { 1907 mtx_unlock(&umtx_lock); 1908 return (0); 1909 } 1910 1911 if (pi->pi_owner != NULL) { 1912 /* 1913 * userland may have already messed the mutex, sigh. 1914 */ 1915 mtx_unlock(&umtx_lock); 1916 return (EPERM); 1917 } 1918 umtx_pi_setowner(pi, owner); 1919 uq = TAILQ_FIRST(&pi->pi_blocked); 1920 if (uq != NULL) { 1921 pri = UPRI(uq->uq_thread); 1922 thread_lock(owner); 1923 if (pri < UPRI(owner)) 1924 sched_lend_user_prio(owner, pri); 1925 thread_unlock(owner); 1926 } 1927 mtx_unlock(&umtx_lock); 1928 return (0); 1929 } 1930 1931 /* 1932 * Adjust a thread's order position in its blocked PI mutex, 1933 * this may result new priority propagating process. 1934 */ 1935 void 1936 umtx_pi_adjust(struct thread *td, u_char oldpri) 1937 { 1938 struct umtx_q *uq; 1939 struct umtx_pi *pi; 1940 1941 uq = td->td_umtxq; 1942 mtx_lock(&umtx_lock); 1943 /* 1944 * Pick up the lock that td is blocked on. 1945 */ 1946 pi = uq->uq_pi_blocked; 1947 if (pi != NULL) { 1948 umtx_pi_adjust_thread(pi, td); 1949 umtx_repropagate_priority(pi); 1950 } 1951 mtx_unlock(&umtx_lock); 1952 } 1953 1954 /* 1955 * Sleep on a PI mutex. 1956 */ 1957 int 1958 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1959 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1960 { 1961 struct thread *td, *td1; 1962 struct umtx_q *uq1; 1963 int error, pri; 1964 #ifdef INVARIANTS 1965 struct umtxq_chain *uc; 1966 1967 uc = umtxq_getchain(&pi->pi_key); 1968 #endif 1969 error = 0; 1970 td = uq->uq_thread; 1971 KASSERT(td == curthread, ("inconsistent uq_thread")); 1972 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1973 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1974 umtxq_insert(uq); 1975 mtx_lock(&umtx_lock); 1976 if (pi->pi_owner == NULL) { 1977 mtx_unlock(&umtx_lock); 1978 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1979 mtx_lock(&umtx_lock); 1980 if (td1 != NULL) { 1981 if (pi->pi_owner == NULL) 1982 umtx_pi_setowner(pi, td1); 1983 PROC_UNLOCK(td1->td_proc); 1984 } 1985 } 1986 1987 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1988 pri = UPRI(uq1->uq_thread); 1989 if (pri > UPRI(td)) 1990 break; 1991 } 1992 1993 if (uq1 != NULL) 1994 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1995 else 1996 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1997 1998 uq->uq_pi_blocked = pi; 1999 thread_lock(td); 2000 td->td_flags |= TDF_UPIBLOCKED; 2001 thread_unlock(td); 2002 umtx_propagate_priority(td); 2003 mtx_unlock(&umtx_lock); 2004 umtxq_unbusy(&uq->uq_key); 2005 2006 error = umtxq_sleep(uq, wmesg, timo); 2007 umtxq_remove(uq); 2008 2009 mtx_lock(&umtx_lock); 2010 uq->uq_pi_blocked = NULL; 2011 thread_lock(td); 2012 td->td_flags &= ~TDF_UPIBLOCKED; 2013 thread_unlock(td); 2014 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2015 umtx_repropagate_priority(pi); 2016 mtx_unlock(&umtx_lock); 2017 umtxq_unlock(&uq->uq_key); 2018 2019 return (error); 2020 } 2021 2022 /* 2023 * Add reference count for a PI mutex. 2024 */ 2025 void 2026 umtx_pi_ref(struct umtx_pi *pi) 2027 { 2028 2029 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2030 pi->pi_refcount++; 2031 } 2032 2033 /* 2034 * Decrease reference count for a PI mutex, if the counter 2035 * is decreased to zero, its memory space is freed. 2036 */ 2037 void 2038 umtx_pi_unref(struct umtx_pi *pi) 2039 { 2040 struct umtxq_chain *uc; 2041 2042 uc = umtxq_getchain(&pi->pi_key); 2043 UMTXQ_LOCKED_ASSERT(uc); 2044 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2045 if (--pi->pi_refcount == 0) { 2046 mtx_lock(&umtx_lock); 2047 if (pi->pi_owner != NULL) 2048 umtx_pi_disown(pi); 2049 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2050 ("blocked queue not empty")); 2051 mtx_unlock(&umtx_lock); 2052 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2053 umtx_pi_free(pi); 2054 } 2055 } 2056 2057 /* 2058 * Find a PI mutex in hash table. 2059 */ 2060 struct umtx_pi * 2061 umtx_pi_lookup(struct umtx_key *key) 2062 { 2063 struct umtxq_chain *uc; 2064 struct umtx_pi *pi; 2065 2066 uc = umtxq_getchain(key); 2067 UMTXQ_LOCKED_ASSERT(uc); 2068 2069 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2070 if (umtx_key_match(&pi->pi_key, key)) { 2071 return (pi); 2072 } 2073 } 2074 return (NULL); 2075 } 2076 2077 /* 2078 * Insert a PI mutex into hash table. 2079 */ 2080 void 2081 umtx_pi_insert(struct umtx_pi *pi) 2082 { 2083 struct umtxq_chain *uc; 2084 2085 uc = umtxq_getchain(&pi->pi_key); 2086 UMTXQ_LOCKED_ASSERT(uc); 2087 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2088 } 2089 2090 /* 2091 * Drop a PI mutex and wakeup a top waiter. 2092 */ 2093 int 2094 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2095 { 2096 struct umtx_q *uq_first, *uq_first2, *uq_me; 2097 struct umtx_pi *pi, *pi2; 2098 int pri; 2099 2100 UMTXQ_ASSERT_LOCKED_BUSY(key); 2101 *count = umtxq_count_pi(key, &uq_first); 2102 if (uq_first != NULL) { 2103 mtx_lock(&umtx_lock); 2104 pi = uq_first->uq_pi_blocked; 2105 KASSERT(pi != NULL, ("pi == NULL?")); 2106 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2107 mtx_unlock(&umtx_lock); 2108 /* userland messed the mutex */ 2109 return (EPERM); 2110 } 2111 uq_me = td->td_umtxq; 2112 if (pi->pi_owner == td) 2113 umtx_pi_disown(pi); 2114 /* get highest priority thread which is still sleeping. */ 2115 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2116 while (uq_first != NULL && 2117 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2118 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2119 } 2120 pri = PRI_MAX; 2121 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2122 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2123 if (uq_first2 != NULL) { 2124 if (pri > UPRI(uq_first2->uq_thread)) 2125 pri = UPRI(uq_first2->uq_thread); 2126 } 2127 } 2128 thread_lock(td); 2129 sched_lend_user_prio(td, pri); 2130 thread_unlock(td); 2131 mtx_unlock(&umtx_lock); 2132 if (uq_first) 2133 umtxq_signal_thread(uq_first); 2134 } else { 2135 pi = umtx_pi_lookup(key); 2136 /* 2137 * A umtx_pi can exist if a signal or timeout removed the 2138 * last waiter from the umtxq, but there is still 2139 * a thread in do_lock_pi() holding the umtx_pi. 2140 */ 2141 if (pi != NULL) { 2142 /* 2143 * The umtx_pi can be unowned, such as when a thread 2144 * has just entered do_lock_pi(), allocated the 2145 * umtx_pi, and unlocked the umtxq. 2146 * If the current thread owns it, it must disown it. 2147 */ 2148 mtx_lock(&umtx_lock); 2149 if (pi->pi_owner == td) 2150 umtx_pi_disown(pi); 2151 mtx_unlock(&umtx_lock); 2152 } 2153 } 2154 return (0); 2155 } 2156 2157 /* 2158 * Lock a PI mutex. 2159 */ 2160 static int 2161 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2162 struct _umtx_time *timeout, int try) 2163 { 2164 struct umtx_abs_timeout timo; 2165 struct umtx_q *uq; 2166 struct umtx_pi *pi, *new_pi; 2167 uint32_t id, old_owner, owner, old; 2168 int error, rv; 2169 2170 id = td->td_tid; 2171 uq = td->td_umtxq; 2172 2173 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2174 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2175 &uq->uq_key)) != 0) 2176 return (error); 2177 2178 if (timeout != NULL) 2179 umtx_abs_timeout_init2(&timo, timeout); 2180 2181 umtxq_lock(&uq->uq_key); 2182 pi = umtx_pi_lookup(&uq->uq_key); 2183 if (pi == NULL) { 2184 new_pi = umtx_pi_alloc(M_NOWAIT); 2185 if (new_pi == NULL) { 2186 umtxq_unlock(&uq->uq_key); 2187 new_pi = umtx_pi_alloc(M_WAITOK); 2188 umtxq_lock(&uq->uq_key); 2189 pi = umtx_pi_lookup(&uq->uq_key); 2190 if (pi != NULL) { 2191 umtx_pi_free(new_pi); 2192 new_pi = NULL; 2193 } 2194 } 2195 if (new_pi != NULL) { 2196 new_pi->pi_key = uq->uq_key; 2197 umtx_pi_insert(new_pi); 2198 pi = new_pi; 2199 } 2200 } 2201 umtx_pi_ref(pi); 2202 umtxq_unlock(&uq->uq_key); 2203 2204 /* 2205 * Care must be exercised when dealing with umtx structure. It 2206 * can fault on any access. 2207 */ 2208 for (;;) { 2209 /* 2210 * Try the uncontested case. This should be done in userland. 2211 */ 2212 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2213 /* The address was invalid. */ 2214 if (rv == -1) { 2215 error = EFAULT; 2216 break; 2217 } 2218 /* The acquire succeeded. */ 2219 if (rv == 0) { 2220 MPASS(owner == UMUTEX_UNOWNED); 2221 error = 0; 2222 break; 2223 } 2224 2225 if (owner == UMUTEX_RB_NOTRECOV) { 2226 error = ENOTRECOVERABLE; 2227 break; 2228 } 2229 2230 /* 2231 * Avoid overwriting a possible error from sleep due 2232 * to the pending signal with suspension check result. 2233 */ 2234 if (error == 0) { 2235 error = thread_check_susp(td, true); 2236 if (error != 0) 2237 break; 2238 } 2239 2240 /* If no one owns it but it is contested try to acquire it. */ 2241 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2242 old_owner = owner; 2243 rv = casueword32(&m->m_owner, owner, &owner, 2244 id | UMUTEX_CONTESTED); 2245 /* The address was invalid. */ 2246 if (rv == -1) { 2247 error = EFAULT; 2248 break; 2249 } 2250 if (rv == 1) { 2251 if (error == 0) { 2252 error = thread_check_susp(td, true); 2253 if (error != 0) 2254 break; 2255 } 2256 2257 /* 2258 * If this failed the lock could 2259 * changed, restart. 2260 */ 2261 continue; 2262 } 2263 2264 MPASS(rv == 0); 2265 MPASS(owner == old_owner); 2266 umtxq_lock(&uq->uq_key); 2267 umtxq_busy(&uq->uq_key); 2268 error = umtx_pi_claim(pi, td); 2269 umtxq_unbusy(&uq->uq_key); 2270 umtxq_unlock(&uq->uq_key); 2271 if (error != 0) { 2272 /* 2273 * Since we're going to return an 2274 * error, restore the m_owner to its 2275 * previous, unowned state to avoid 2276 * compounding the problem. 2277 */ 2278 (void)casuword32(&m->m_owner, 2279 id | UMUTEX_CONTESTED, old_owner); 2280 } 2281 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2282 error = EOWNERDEAD; 2283 break; 2284 } 2285 2286 if ((owner & ~UMUTEX_CONTESTED) == id) { 2287 error = EDEADLK; 2288 break; 2289 } 2290 2291 if (try != 0) { 2292 error = EBUSY; 2293 break; 2294 } 2295 2296 /* 2297 * If we caught a signal, we have retried and now 2298 * exit immediately. 2299 */ 2300 if (error != 0) 2301 break; 2302 2303 umtxq_lock(&uq->uq_key); 2304 umtxq_busy(&uq->uq_key); 2305 umtxq_unlock(&uq->uq_key); 2306 2307 /* 2308 * Set the contested bit so that a release in user space 2309 * knows to use the system call for unlock. If this fails 2310 * either some one else has acquired the lock or it has been 2311 * released. 2312 */ 2313 rv = casueword32(&m->m_owner, owner, &old, owner | 2314 UMUTEX_CONTESTED); 2315 2316 /* The address was invalid. */ 2317 if (rv == -1) { 2318 umtxq_unbusy_unlocked(&uq->uq_key); 2319 error = EFAULT; 2320 break; 2321 } 2322 if (rv == 1) { 2323 umtxq_unbusy_unlocked(&uq->uq_key); 2324 error = thread_check_susp(td, true); 2325 if (error != 0) 2326 break; 2327 2328 /* 2329 * The lock changed and we need to retry or we 2330 * lost a race to the thread unlocking the 2331 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2332 * value for owner is impossible there. 2333 */ 2334 continue; 2335 } 2336 2337 umtxq_lock(&uq->uq_key); 2338 2339 /* We set the contested bit, sleep. */ 2340 MPASS(old == owner); 2341 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2342 "umtxpi", timeout == NULL ? NULL : &timo, 2343 (flags & USYNC_PROCESS_SHARED) != 0); 2344 if (error != 0) 2345 continue; 2346 2347 error = thread_check_susp(td, false); 2348 if (error != 0) 2349 break; 2350 } 2351 2352 umtxq_lock(&uq->uq_key); 2353 umtx_pi_unref(pi); 2354 umtxq_unlock(&uq->uq_key); 2355 2356 umtx_key_release(&uq->uq_key); 2357 return (error); 2358 } 2359 2360 /* 2361 * Unlock a PI mutex. 2362 */ 2363 static int 2364 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2365 { 2366 struct umtx_key key; 2367 uint32_t id, new_owner, old, owner; 2368 int count, error; 2369 2370 id = td->td_tid; 2371 2372 usrloop: 2373 /* 2374 * Make sure we own this mtx. 2375 */ 2376 error = fueword32(&m->m_owner, &owner); 2377 if (error == -1) 2378 return (EFAULT); 2379 2380 if ((owner & ~UMUTEX_CONTESTED) != id) 2381 return (EPERM); 2382 2383 new_owner = umtx_unlock_val(flags, rb); 2384 2385 /* This should be done in userland */ 2386 if ((owner & UMUTEX_CONTESTED) == 0) { 2387 error = casueword32(&m->m_owner, owner, &old, new_owner); 2388 if (error == -1) 2389 return (EFAULT); 2390 if (error == 1) { 2391 error = thread_check_susp(td, true); 2392 if (error != 0) 2393 return (error); 2394 goto usrloop; 2395 } 2396 if (old == owner) 2397 return (0); 2398 owner = old; 2399 } 2400 2401 /* We should only ever be in here for contested locks */ 2402 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2403 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2404 &key)) != 0) 2405 return (error); 2406 2407 umtxq_lock(&key); 2408 umtxq_busy(&key); 2409 error = umtx_pi_drop(td, &key, rb, &count); 2410 if (error != 0) { 2411 umtxq_unbusy(&key); 2412 umtxq_unlock(&key); 2413 umtx_key_release(&key); 2414 /* userland messed the mutex */ 2415 return (error); 2416 } 2417 umtxq_unlock(&key); 2418 2419 /* 2420 * When unlocking the umtx, it must be marked as unowned if 2421 * there is zero or one thread only waiting for it. 2422 * Otherwise, it must be marked as contested. 2423 */ 2424 2425 if (count > 1) 2426 new_owner |= UMUTEX_CONTESTED; 2427 again: 2428 error = casueword32(&m->m_owner, owner, &old, new_owner); 2429 if (error == 1) { 2430 error = thread_check_susp(td, false); 2431 if (error == 0) 2432 goto again; 2433 } 2434 umtxq_unbusy_unlocked(&key); 2435 umtx_key_release(&key); 2436 if (error == -1) 2437 return (EFAULT); 2438 if (error == 0 && old != owner) 2439 return (EINVAL); 2440 return (error); 2441 } 2442 2443 /* 2444 * Lock a PP mutex. 2445 */ 2446 static int 2447 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2448 struct _umtx_time *timeout, int try) 2449 { 2450 struct umtx_abs_timeout timo; 2451 struct umtx_q *uq, *uq2; 2452 struct umtx_pi *pi; 2453 uint32_t ceiling; 2454 uint32_t owner, id; 2455 int error, pri, old_inherited_pri, su, rv; 2456 2457 id = td->td_tid; 2458 uq = td->td_umtxq; 2459 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2460 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2461 &uq->uq_key)) != 0) 2462 return (error); 2463 2464 if (timeout != NULL) 2465 umtx_abs_timeout_init2(&timo, timeout); 2466 2467 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2468 for (;;) { 2469 old_inherited_pri = uq->uq_inherited_pri; 2470 umtxq_lock(&uq->uq_key); 2471 umtxq_busy(&uq->uq_key); 2472 umtxq_unlock(&uq->uq_key); 2473 2474 rv = fueword32(&m->m_ceilings[0], &ceiling); 2475 if (rv == -1) { 2476 error = EFAULT; 2477 goto out; 2478 } 2479 ceiling = RTP_PRIO_MAX - ceiling; 2480 if (ceiling > RTP_PRIO_MAX) { 2481 error = EINVAL; 2482 goto out; 2483 } 2484 2485 mtx_lock(&umtx_lock); 2486 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2487 mtx_unlock(&umtx_lock); 2488 error = EINVAL; 2489 goto out; 2490 } 2491 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2492 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2493 thread_lock(td); 2494 if (uq->uq_inherited_pri < UPRI(td)) 2495 sched_lend_user_prio(td, uq->uq_inherited_pri); 2496 thread_unlock(td); 2497 } 2498 mtx_unlock(&umtx_lock); 2499 2500 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2501 id | UMUTEX_CONTESTED); 2502 /* The address was invalid. */ 2503 if (rv == -1) { 2504 error = EFAULT; 2505 break; 2506 } 2507 if (rv == 0) { 2508 MPASS(owner == UMUTEX_CONTESTED); 2509 error = 0; 2510 break; 2511 } 2512 /* rv == 1 */ 2513 if (owner == UMUTEX_RB_OWNERDEAD) { 2514 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2515 &owner, id | UMUTEX_CONTESTED); 2516 if (rv == -1) { 2517 error = EFAULT; 2518 break; 2519 } 2520 if (rv == 0) { 2521 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2522 error = EOWNERDEAD; /* success */ 2523 break; 2524 } 2525 2526 /* 2527 * rv == 1, only check for suspension if we 2528 * did not already catched a signal. If we 2529 * get an error from the check, the same 2530 * condition is checked by the umtxq_sleep() 2531 * call below, so we should obliterate the 2532 * error to not skip the last loop iteration. 2533 */ 2534 if (error == 0) { 2535 error = thread_check_susp(td, false); 2536 if (error == 0) { 2537 if (try != 0) 2538 error = EBUSY; 2539 else 2540 continue; 2541 } 2542 error = 0; 2543 } 2544 } else if (owner == UMUTEX_RB_NOTRECOV) { 2545 error = ENOTRECOVERABLE; 2546 } 2547 2548 if (try != 0) 2549 error = EBUSY; 2550 2551 /* 2552 * If we caught a signal, we have retried and now 2553 * exit immediately. 2554 */ 2555 if (error != 0) 2556 break; 2557 2558 umtxq_lock(&uq->uq_key); 2559 umtxq_insert(uq); 2560 umtxq_unbusy(&uq->uq_key); 2561 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2562 NULL : &timo); 2563 umtxq_remove(uq); 2564 umtxq_unlock(&uq->uq_key); 2565 2566 mtx_lock(&umtx_lock); 2567 uq->uq_inherited_pri = old_inherited_pri; 2568 pri = PRI_MAX; 2569 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2570 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2571 if (uq2 != NULL) { 2572 if (pri > UPRI(uq2->uq_thread)) 2573 pri = UPRI(uq2->uq_thread); 2574 } 2575 } 2576 if (pri > uq->uq_inherited_pri) 2577 pri = uq->uq_inherited_pri; 2578 thread_lock(td); 2579 sched_lend_user_prio(td, pri); 2580 thread_unlock(td); 2581 mtx_unlock(&umtx_lock); 2582 } 2583 2584 if (error != 0 && error != EOWNERDEAD) { 2585 mtx_lock(&umtx_lock); 2586 uq->uq_inherited_pri = old_inherited_pri; 2587 pri = PRI_MAX; 2588 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2589 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2590 if (uq2 != NULL) { 2591 if (pri > UPRI(uq2->uq_thread)) 2592 pri = UPRI(uq2->uq_thread); 2593 } 2594 } 2595 if (pri > uq->uq_inherited_pri) 2596 pri = uq->uq_inherited_pri; 2597 thread_lock(td); 2598 sched_lend_user_prio(td, pri); 2599 thread_unlock(td); 2600 mtx_unlock(&umtx_lock); 2601 } 2602 2603 out: 2604 umtxq_unbusy_unlocked(&uq->uq_key); 2605 umtx_key_release(&uq->uq_key); 2606 return (error); 2607 } 2608 2609 /* 2610 * Unlock a PP mutex. 2611 */ 2612 static int 2613 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2614 { 2615 struct umtx_key key; 2616 struct umtx_q *uq, *uq2; 2617 struct umtx_pi *pi; 2618 uint32_t id, owner, rceiling; 2619 int error, pri, new_inherited_pri, su; 2620 2621 id = td->td_tid; 2622 uq = td->td_umtxq; 2623 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2624 2625 /* 2626 * Make sure we own this mtx. 2627 */ 2628 error = fueword32(&m->m_owner, &owner); 2629 if (error == -1) 2630 return (EFAULT); 2631 2632 if ((owner & ~UMUTEX_CONTESTED) != id) 2633 return (EPERM); 2634 2635 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2636 if (error != 0) 2637 return (error); 2638 2639 if (rceiling == -1) 2640 new_inherited_pri = PRI_MAX; 2641 else { 2642 rceiling = RTP_PRIO_MAX - rceiling; 2643 if (rceiling > RTP_PRIO_MAX) 2644 return (EINVAL); 2645 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2646 } 2647 2648 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2649 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2650 &key)) != 0) 2651 return (error); 2652 umtxq_lock(&key); 2653 umtxq_busy(&key); 2654 umtxq_unlock(&key); 2655 /* 2656 * For priority protected mutex, always set unlocked state 2657 * to UMUTEX_CONTESTED, so that userland always enters kernel 2658 * to lock the mutex, it is necessary because thread priority 2659 * has to be adjusted for such mutex. 2660 */ 2661 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2662 UMUTEX_CONTESTED); 2663 2664 umtxq_lock(&key); 2665 if (error == 0) 2666 umtxq_signal(&key, 1); 2667 umtxq_unbusy(&key); 2668 umtxq_unlock(&key); 2669 2670 if (error == -1) 2671 error = EFAULT; 2672 else { 2673 mtx_lock(&umtx_lock); 2674 if (su != 0) 2675 uq->uq_inherited_pri = new_inherited_pri; 2676 pri = PRI_MAX; 2677 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2678 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2679 if (uq2 != NULL) { 2680 if (pri > UPRI(uq2->uq_thread)) 2681 pri = UPRI(uq2->uq_thread); 2682 } 2683 } 2684 if (pri > uq->uq_inherited_pri) 2685 pri = uq->uq_inherited_pri; 2686 thread_lock(td); 2687 sched_lend_user_prio(td, pri); 2688 thread_unlock(td); 2689 mtx_unlock(&umtx_lock); 2690 } 2691 umtx_key_release(&key); 2692 return (error); 2693 } 2694 2695 static int 2696 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2697 uint32_t *old_ceiling) 2698 { 2699 struct umtx_q *uq; 2700 uint32_t flags, id, owner, save_ceiling; 2701 int error, rv, rv1; 2702 2703 error = fueword32(&m->m_flags, &flags); 2704 if (error == -1) 2705 return (EFAULT); 2706 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2707 return (EINVAL); 2708 if (ceiling > RTP_PRIO_MAX) 2709 return (EINVAL); 2710 id = td->td_tid; 2711 uq = td->td_umtxq; 2712 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2713 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2714 &uq->uq_key)) != 0) 2715 return (error); 2716 for (;;) { 2717 umtxq_lock(&uq->uq_key); 2718 umtxq_busy(&uq->uq_key); 2719 umtxq_unlock(&uq->uq_key); 2720 2721 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2722 if (rv == -1) { 2723 error = EFAULT; 2724 break; 2725 } 2726 2727 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2728 id | UMUTEX_CONTESTED); 2729 if (rv == -1) { 2730 error = EFAULT; 2731 break; 2732 } 2733 2734 if (rv == 0) { 2735 MPASS(owner == UMUTEX_CONTESTED); 2736 rv = suword32(&m->m_ceilings[0], ceiling); 2737 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2738 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2739 break; 2740 } 2741 2742 if ((owner & ~UMUTEX_CONTESTED) == id) { 2743 rv = suword32(&m->m_ceilings[0], ceiling); 2744 error = rv == 0 ? 0 : EFAULT; 2745 break; 2746 } 2747 2748 if (owner == UMUTEX_RB_OWNERDEAD) { 2749 error = EOWNERDEAD; 2750 break; 2751 } else if (owner == UMUTEX_RB_NOTRECOV) { 2752 error = ENOTRECOVERABLE; 2753 break; 2754 } 2755 2756 /* 2757 * If we caught a signal, we have retried and now 2758 * exit immediately. 2759 */ 2760 if (error != 0) 2761 break; 2762 2763 /* 2764 * We set the contested bit, sleep. Otherwise the lock changed 2765 * and we need to retry or we lost a race to the thread 2766 * unlocking the umtx. 2767 */ 2768 umtxq_lock(&uq->uq_key); 2769 umtxq_insert(uq); 2770 umtxq_unbusy(&uq->uq_key); 2771 error = umtxq_sleep(uq, "umtxpp", NULL); 2772 umtxq_remove(uq); 2773 umtxq_unlock(&uq->uq_key); 2774 } 2775 umtxq_lock(&uq->uq_key); 2776 if (error == 0) 2777 umtxq_signal(&uq->uq_key, INT_MAX); 2778 umtxq_unbusy(&uq->uq_key); 2779 umtxq_unlock(&uq->uq_key); 2780 umtx_key_release(&uq->uq_key); 2781 if (error == 0 && old_ceiling != NULL) { 2782 rv = suword32(old_ceiling, save_ceiling); 2783 error = rv == 0 ? 0 : EFAULT; 2784 } 2785 return (error); 2786 } 2787 2788 /* 2789 * Lock a userland POSIX mutex. 2790 */ 2791 static int 2792 do_lock_umutex(struct thread *td, struct umutex *m, 2793 struct _umtx_time *timeout, int mode) 2794 { 2795 uint32_t flags; 2796 int error; 2797 2798 error = fueword32(&m->m_flags, &flags); 2799 if (error == -1) 2800 return (EFAULT); 2801 2802 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2803 case 0: 2804 error = do_lock_normal(td, m, flags, timeout, mode); 2805 break; 2806 case UMUTEX_PRIO_INHERIT: 2807 error = do_lock_pi(td, m, flags, timeout, mode); 2808 break; 2809 case UMUTEX_PRIO_PROTECT: 2810 error = do_lock_pp(td, m, flags, timeout, mode); 2811 break; 2812 default: 2813 return (EINVAL); 2814 } 2815 if (timeout == NULL) { 2816 if (error == EINTR && mode != _UMUTEX_WAIT) 2817 error = ERESTART; 2818 } else { 2819 /* Timed-locking is not restarted. */ 2820 if (error == ERESTART) 2821 error = EINTR; 2822 } 2823 return (error); 2824 } 2825 2826 /* 2827 * Unlock a userland POSIX mutex. 2828 */ 2829 static int 2830 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2831 { 2832 uint32_t flags; 2833 int error; 2834 2835 error = fueword32(&m->m_flags, &flags); 2836 if (error == -1) 2837 return (EFAULT); 2838 2839 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2840 case 0: 2841 return (do_unlock_normal(td, m, flags, rb)); 2842 case UMUTEX_PRIO_INHERIT: 2843 return (do_unlock_pi(td, m, flags, rb)); 2844 case UMUTEX_PRIO_PROTECT: 2845 return (do_unlock_pp(td, m, flags, rb)); 2846 } 2847 2848 return (EINVAL); 2849 } 2850 2851 static int 2852 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2853 struct timespec *timeout, u_long wflags) 2854 { 2855 struct umtx_abs_timeout timo; 2856 struct umtx_q *uq; 2857 uint32_t flags, clockid, hasw; 2858 int error; 2859 2860 uq = td->td_umtxq; 2861 error = fueword32(&cv->c_flags, &flags); 2862 if (error == -1) 2863 return (EFAULT); 2864 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2865 if (error != 0) 2866 return (error); 2867 2868 if ((wflags & CVWAIT_CLOCKID) != 0) { 2869 error = fueword32(&cv->c_clockid, &clockid); 2870 if (error == -1) { 2871 umtx_key_release(&uq->uq_key); 2872 return (EFAULT); 2873 } 2874 if (clockid < CLOCK_REALTIME || 2875 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2876 /* hmm, only HW clock id will work. */ 2877 umtx_key_release(&uq->uq_key); 2878 return (EINVAL); 2879 } 2880 } else { 2881 clockid = CLOCK_REALTIME; 2882 } 2883 2884 umtxq_lock(&uq->uq_key); 2885 umtxq_busy(&uq->uq_key); 2886 umtxq_insert(uq); 2887 umtxq_unlock(&uq->uq_key); 2888 2889 /* 2890 * Set c_has_waiters to 1 before releasing user mutex, also 2891 * don't modify cache line when unnecessary. 2892 */ 2893 error = fueword32(&cv->c_has_waiters, &hasw); 2894 if (error == 0 && hasw == 0) 2895 suword32(&cv->c_has_waiters, 1); 2896 2897 umtxq_unbusy_unlocked(&uq->uq_key); 2898 2899 error = do_unlock_umutex(td, m, false); 2900 2901 if (timeout != NULL) 2902 umtx_abs_timeout_init(&timo, clockid, 2903 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2904 2905 umtxq_lock(&uq->uq_key); 2906 if (error == 0) { 2907 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2908 NULL : &timo); 2909 } 2910 2911 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2912 error = 0; 2913 else { 2914 /* 2915 * This must be timeout,interrupted by signal or 2916 * surprious wakeup, clear c_has_waiter flag when 2917 * necessary. 2918 */ 2919 umtxq_busy(&uq->uq_key); 2920 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2921 int oldlen = uq->uq_cur_queue->length; 2922 umtxq_remove(uq); 2923 if (oldlen == 1) { 2924 umtxq_unlock(&uq->uq_key); 2925 suword32(&cv->c_has_waiters, 0); 2926 umtxq_lock(&uq->uq_key); 2927 } 2928 } 2929 umtxq_unbusy(&uq->uq_key); 2930 if (error == ERESTART) 2931 error = EINTR; 2932 } 2933 2934 umtxq_unlock(&uq->uq_key); 2935 umtx_key_release(&uq->uq_key); 2936 return (error); 2937 } 2938 2939 /* 2940 * Signal a userland condition variable. 2941 */ 2942 static int 2943 do_cv_signal(struct thread *td, struct ucond *cv) 2944 { 2945 struct umtx_key key; 2946 int error, cnt, nwake; 2947 uint32_t flags; 2948 2949 error = fueword32(&cv->c_flags, &flags); 2950 if (error == -1) 2951 return (EFAULT); 2952 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2953 return (error); 2954 umtxq_lock(&key); 2955 umtxq_busy(&key); 2956 cnt = umtxq_count(&key); 2957 nwake = umtxq_signal(&key, 1); 2958 if (cnt <= nwake) { 2959 umtxq_unlock(&key); 2960 error = suword32(&cv->c_has_waiters, 0); 2961 if (error == -1) 2962 error = EFAULT; 2963 umtxq_lock(&key); 2964 } 2965 umtxq_unbusy(&key); 2966 umtxq_unlock(&key); 2967 umtx_key_release(&key); 2968 return (error); 2969 } 2970 2971 static int 2972 do_cv_broadcast(struct thread *td, struct ucond *cv) 2973 { 2974 struct umtx_key key; 2975 int error; 2976 uint32_t flags; 2977 2978 error = fueword32(&cv->c_flags, &flags); 2979 if (error == -1) 2980 return (EFAULT); 2981 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2982 return (error); 2983 2984 umtxq_lock(&key); 2985 umtxq_busy(&key); 2986 umtxq_signal(&key, INT_MAX); 2987 umtxq_unlock(&key); 2988 2989 error = suword32(&cv->c_has_waiters, 0); 2990 if (error == -1) 2991 error = EFAULT; 2992 2993 umtxq_unbusy_unlocked(&key); 2994 2995 umtx_key_release(&key); 2996 return (error); 2997 } 2998 2999 static int 3000 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3001 struct _umtx_time *timeout) 3002 { 3003 struct umtx_abs_timeout timo; 3004 struct umtx_q *uq; 3005 uint32_t flags, wrflags; 3006 int32_t state, oldstate; 3007 int32_t blocked_readers; 3008 int error, error1, rv; 3009 3010 uq = td->td_umtxq; 3011 error = fueword32(&rwlock->rw_flags, &flags); 3012 if (error == -1) 3013 return (EFAULT); 3014 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3015 if (error != 0) 3016 return (error); 3017 3018 if (timeout != NULL) 3019 umtx_abs_timeout_init2(&timo, timeout); 3020 3021 wrflags = URWLOCK_WRITE_OWNER; 3022 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3023 wrflags |= URWLOCK_WRITE_WAITERS; 3024 3025 for (;;) { 3026 rv = fueword32(&rwlock->rw_state, &state); 3027 if (rv == -1) { 3028 umtx_key_release(&uq->uq_key); 3029 return (EFAULT); 3030 } 3031 3032 /* try to lock it */ 3033 while (!(state & wrflags)) { 3034 if (__predict_false(URWLOCK_READER_COUNT(state) == 3035 URWLOCK_MAX_READERS)) { 3036 umtx_key_release(&uq->uq_key); 3037 return (EAGAIN); 3038 } 3039 rv = casueword32(&rwlock->rw_state, state, 3040 &oldstate, state + 1); 3041 if (rv == -1) { 3042 umtx_key_release(&uq->uq_key); 3043 return (EFAULT); 3044 } 3045 if (rv == 0) { 3046 MPASS(oldstate == state); 3047 umtx_key_release(&uq->uq_key); 3048 return (0); 3049 } 3050 error = thread_check_susp(td, true); 3051 if (error != 0) 3052 break; 3053 state = oldstate; 3054 } 3055 3056 if (error) 3057 break; 3058 3059 /* grab monitor lock */ 3060 umtxq_lock(&uq->uq_key); 3061 umtxq_busy(&uq->uq_key); 3062 umtxq_unlock(&uq->uq_key); 3063 3064 /* 3065 * re-read the state, in case it changed between the try-lock above 3066 * and the check below 3067 */ 3068 rv = fueword32(&rwlock->rw_state, &state); 3069 if (rv == -1) 3070 error = EFAULT; 3071 3072 /* set read contention bit */ 3073 while (error == 0 && (state & wrflags) && 3074 !(state & URWLOCK_READ_WAITERS)) { 3075 rv = casueword32(&rwlock->rw_state, state, 3076 &oldstate, state | URWLOCK_READ_WAITERS); 3077 if (rv == -1) { 3078 error = EFAULT; 3079 break; 3080 } 3081 if (rv == 0) { 3082 MPASS(oldstate == state); 3083 goto sleep; 3084 } 3085 state = oldstate; 3086 error = thread_check_susp(td, false); 3087 if (error != 0) 3088 break; 3089 } 3090 if (error != 0) { 3091 umtxq_unbusy_unlocked(&uq->uq_key); 3092 break; 3093 } 3094 3095 /* state is changed while setting flags, restart */ 3096 if (!(state & wrflags)) { 3097 umtxq_unbusy_unlocked(&uq->uq_key); 3098 error = thread_check_susp(td, true); 3099 if (error != 0) 3100 break; 3101 continue; 3102 } 3103 3104 sleep: 3105 /* 3106 * Contention bit is set, before sleeping, increase 3107 * read waiter count. 3108 */ 3109 rv = fueword32(&rwlock->rw_blocked_readers, 3110 &blocked_readers); 3111 if (rv == -1) { 3112 umtxq_unbusy_unlocked(&uq->uq_key); 3113 error = EFAULT; 3114 break; 3115 } 3116 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3117 3118 while (state & wrflags) { 3119 umtxq_lock(&uq->uq_key); 3120 umtxq_insert(uq); 3121 umtxq_unbusy(&uq->uq_key); 3122 3123 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3124 NULL : &timo); 3125 3126 umtxq_busy(&uq->uq_key); 3127 umtxq_remove(uq); 3128 umtxq_unlock(&uq->uq_key); 3129 if (error) 3130 break; 3131 rv = fueword32(&rwlock->rw_state, &state); 3132 if (rv == -1) { 3133 error = EFAULT; 3134 break; 3135 } 3136 } 3137 3138 /* decrease read waiter count, and may clear read contention bit */ 3139 rv = fueword32(&rwlock->rw_blocked_readers, 3140 &blocked_readers); 3141 if (rv == -1) { 3142 umtxq_unbusy_unlocked(&uq->uq_key); 3143 error = EFAULT; 3144 break; 3145 } 3146 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3147 if (blocked_readers == 1) { 3148 rv = fueword32(&rwlock->rw_state, &state); 3149 if (rv == -1) { 3150 umtxq_unbusy_unlocked(&uq->uq_key); 3151 error = EFAULT; 3152 break; 3153 } 3154 for (;;) { 3155 rv = casueword32(&rwlock->rw_state, state, 3156 &oldstate, state & ~URWLOCK_READ_WAITERS); 3157 if (rv == -1) { 3158 error = EFAULT; 3159 break; 3160 } 3161 if (rv == 0) { 3162 MPASS(oldstate == state); 3163 break; 3164 } 3165 state = oldstate; 3166 error1 = thread_check_susp(td, false); 3167 if (error1 != 0) { 3168 if (error == 0) 3169 error = error1; 3170 break; 3171 } 3172 } 3173 } 3174 3175 umtxq_unbusy_unlocked(&uq->uq_key); 3176 if (error != 0) 3177 break; 3178 } 3179 umtx_key_release(&uq->uq_key); 3180 if (error == ERESTART) 3181 error = EINTR; 3182 return (error); 3183 } 3184 3185 static int 3186 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3187 { 3188 struct umtx_abs_timeout timo; 3189 struct umtx_q *uq; 3190 uint32_t flags; 3191 int32_t state, oldstate; 3192 int32_t blocked_writers; 3193 int32_t blocked_readers; 3194 int error, error1, rv; 3195 3196 uq = td->td_umtxq; 3197 error = fueword32(&rwlock->rw_flags, &flags); 3198 if (error == -1) 3199 return (EFAULT); 3200 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3201 if (error != 0) 3202 return (error); 3203 3204 if (timeout != NULL) 3205 umtx_abs_timeout_init2(&timo, timeout); 3206 3207 blocked_readers = 0; 3208 for (;;) { 3209 rv = fueword32(&rwlock->rw_state, &state); 3210 if (rv == -1) { 3211 umtx_key_release(&uq->uq_key); 3212 return (EFAULT); 3213 } 3214 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3215 URWLOCK_READER_COUNT(state) == 0) { 3216 rv = casueword32(&rwlock->rw_state, state, 3217 &oldstate, state | URWLOCK_WRITE_OWNER); 3218 if (rv == -1) { 3219 umtx_key_release(&uq->uq_key); 3220 return (EFAULT); 3221 } 3222 if (rv == 0) { 3223 MPASS(oldstate == state); 3224 umtx_key_release(&uq->uq_key); 3225 return (0); 3226 } 3227 state = oldstate; 3228 error = thread_check_susp(td, true); 3229 if (error != 0) 3230 break; 3231 } 3232 3233 if (error) { 3234 if ((state & (URWLOCK_WRITE_OWNER | 3235 URWLOCK_WRITE_WAITERS)) == 0 && 3236 blocked_readers != 0) { 3237 umtxq_lock(&uq->uq_key); 3238 umtxq_busy(&uq->uq_key); 3239 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3240 UMTX_SHARED_QUEUE); 3241 umtxq_unbusy(&uq->uq_key); 3242 umtxq_unlock(&uq->uq_key); 3243 } 3244 3245 break; 3246 } 3247 3248 /* grab monitor lock */ 3249 umtxq_lock(&uq->uq_key); 3250 umtxq_busy(&uq->uq_key); 3251 umtxq_unlock(&uq->uq_key); 3252 3253 /* 3254 * Re-read the state, in case it changed between the 3255 * try-lock above and the check below. 3256 */ 3257 rv = fueword32(&rwlock->rw_state, &state); 3258 if (rv == -1) 3259 error = EFAULT; 3260 3261 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3262 URWLOCK_READER_COUNT(state) != 0) && 3263 (state & URWLOCK_WRITE_WAITERS) == 0) { 3264 rv = casueword32(&rwlock->rw_state, state, 3265 &oldstate, state | URWLOCK_WRITE_WAITERS); 3266 if (rv == -1) { 3267 error = EFAULT; 3268 break; 3269 } 3270 if (rv == 0) { 3271 MPASS(oldstate == state); 3272 goto sleep; 3273 } 3274 state = oldstate; 3275 error = thread_check_susp(td, false); 3276 if (error != 0) 3277 break; 3278 } 3279 if (error != 0) { 3280 umtxq_unbusy_unlocked(&uq->uq_key); 3281 break; 3282 } 3283 3284 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3285 URWLOCK_READER_COUNT(state) == 0) { 3286 umtxq_unbusy_unlocked(&uq->uq_key); 3287 error = thread_check_susp(td, false); 3288 if (error != 0) 3289 break; 3290 continue; 3291 } 3292 sleep: 3293 rv = fueword32(&rwlock->rw_blocked_writers, 3294 &blocked_writers); 3295 if (rv == -1) { 3296 umtxq_unbusy_unlocked(&uq->uq_key); 3297 error = EFAULT; 3298 break; 3299 } 3300 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3301 3302 while ((state & URWLOCK_WRITE_OWNER) || 3303 URWLOCK_READER_COUNT(state) != 0) { 3304 umtxq_lock(&uq->uq_key); 3305 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3306 umtxq_unbusy(&uq->uq_key); 3307 3308 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3309 NULL : &timo); 3310 3311 umtxq_busy(&uq->uq_key); 3312 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3313 umtxq_unlock(&uq->uq_key); 3314 if (error) 3315 break; 3316 rv = fueword32(&rwlock->rw_state, &state); 3317 if (rv == -1) { 3318 error = EFAULT; 3319 break; 3320 } 3321 } 3322 3323 rv = fueword32(&rwlock->rw_blocked_writers, 3324 &blocked_writers); 3325 if (rv == -1) { 3326 umtxq_unbusy_unlocked(&uq->uq_key); 3327 error = EFAULT; 3328 break; 3329 } 3330 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3331 if (blocked_writers == 1) { 3332 rv = fueword32(&rwlock->rw_state, &state); 3333 if (rv == -1) { 3334 umtxq_unbusy_unlocked(&uq->uq_key); 3335 error = EFAULT; 3336 break; 3337 } 3338 for (;;) { 3339 rv = casueword32(&rwlock->rw_state, state, 3340 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3341 if (rv == -1) { 3342 error = EFAULT; 3343 break; 3344 } 3345 if (rv == 0) { 3346 MPASS(oldstate == state); 3347 break; 3348 } 3349 state = oldstate; 3350 error1 = thread_check_susp(td, false); 3351 /* 3352 * We are leaving the URWLOCK_WRITE_WAITERS 3353 * behind, but this should not harm the 3354 * correctness. 3355 */ 3356 if (error1 != 0) { 3357 if (error == 0) 3358 error = error1; 3359 break; 3360 } 3361 } 3362 rv = fueword32(&rwlock->rw_blocked_readers, 3363 &blocked_readers); 3364 if (rv == -1) { 3365 umtxq_unbusy_unlocked(&uq->uq_key); 3366 error = EFAULT; 3367 break; 3368 } 3369 } else 3370 blocked_readers = 0; 3371 3372 umtxq_unbusy_unlocked(&uq->uq_key); 3373 } 3374 3375 umtx_key_release(&uq->uq_key); 3376 if (error == ERESTART) 3377 error = EINTR; 3378 return (error); 3379 } 3380 3381 static int 3382 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3383 { 3384 struct umtx_q *uq; 3385 uint32_t flags; 3386 int32_t state, oldstate; 3387 int error, rv, q, count; 3388 3389 uq = td->td_umtxq; 3390 error = fueword32(&rwlock->rw_flags, &flags); 3391 if (error == -1) 3392 return (EFAULT); 3393 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3394 if (error != 0) 3395 return (error); 3396 3397 error = fueword32(&rwlock->rw_state, &state); 3398 if (error == -1) { 3399 error = EFAULT; 3400 goto out; 3401 } 3402 if (state & URWLOCK_WRITE_OWNER) { 3403 for (;;) { 3404 rv = casueword32(&rwlock->rw_state, state, 3405 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3406 if (rv == -1) { 3407 error = EFAULT; 3408 goto out; 3409 } 3410 if (rv == 1) { 3411 state = oldstate; 3412 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3413 error = EPERM; 3414 goto out; 3415 } 3416 error = thread_check_susp(td, true); 3417 if (error != 0) 3418 goto out; 3419 } else 3420 break; 3421 } 3422 } else if (URWLOCK_READER_COUNT(state) != 0) { 3423 for (;;) { 3424 rv = casueword32(&rwlock->rw_state, state, 3425 &oldstate, state - 1); 3426 if (rv == -1) { 3427 error = EFAULT; 3428 goto out; 3429 } 3430 if (rv == 1) { 3431 state = oldstate; 3432 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3433 error = EPERM; 3434 goto out; 3435 } 3436 error = thread_check_susp(td, true); 3437 if (error != 0) 3438 goto out; 3439 } else 3440 break; 3441 } 3442 } else { 3443 error = EPERM; 3444 goto out; 3445 } 3446 3447 count = 0; 3448 3449 if (!(flags & URWLOCK_PREFER_READER)) { 3450 if (state & URWLOCK_WRITE_WAITERS) { 3451 count = 1; 3452 q = UMTX_EXCLUSIVE_QUEUE; 3453 } else if (state & URWLOCK_READ_WAITERS) { 3454 count = INT_MAX; 3455 q = UMTX_SHARED_QUEUE; 3456 } 3457 } else { 3458 if (state & URWLOCK_READ_WAITERS) { 3459 count = INT_MAX; 3460 q = UMTX_SHARED_QUEUE; 3461 } else if (state & URWLOCK_WRITE_WAITERS) { 3462 count = 1; 3463 q = UMTX_EXCLUSIVE_QUEUE; 3464 } 3465 } 3466 3467 if (count) { 3468 umtxq_lock(&uq->uq_key); 3469 umtxq_busy(&uq->uq_key); 3470 umtxq_signal_queue(&uq->uq_key, count, q); 3471 umtxq_unbusy(&uq->uq_key); 3472 umtxq_unlock(&uq->uq_key); 3473 } 3474 out: 3475 umtx_key_release(&uq->uq_key); 3476 return (error); 3477 } 3478 3479 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3480 static int 3481 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3482 { 3483 struct umtx_abs_timeout timo; 3484 struct umtx_q *uq; 3485 uint32_t flags, count, count1; 3486 int error, rv, rv1; 3487 3488 uq = td->td_umtxq; 3489 error = fueword32(&sem->_flags, &flags); 3490 if (error == -1) 3491 return (EFAULT); 3492 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3493 if (error != 0) 3494 return (error); 3495 3496 if (timeout != NULL) 3497 umtx_abs_timeout_init2(&timo, timeout); 3498 3499 again: 3500 umtxq_lock(&uq->uq_key); 3501 umtxq_busy(&uq->uq_key); 3502 umtxq_insert(uq); 3503 umtxq_unlock(&uq->uq_key); 3504 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3505 if (rv == 0) 3506 rv1 = fueword32(&sem->_count, &count); 3507 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3508 (rv == 1 && count1 == 0)) { 3509 umtxq_lock(&uq->uq_key); 3510 umtxq_unbusy(&uq->uq_key); 3511 umtxq_remove(uq); 3512 umtxq_unlock(&uq->uq_key); 3513 if (rv == 1) { 3514 rv = thread_check_susp(td, true); 3515 if (rv == 0) 3516 goto again; 3517 error = rv; 3518 goto out; 3519 } 3520 if (rv == 0) 3521 rv = rv1; 3522 error = rv == -1 ? EFAULT : 0; 3523 goto out; 3524 } 3525 umtxq_lock(&uq->uq_key); 3526 umtxq_unbusy(&uq->uq_key); 3527 3528 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3529 3530 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3531 error = 0; 3532 else { 3533 umtxq_remove(uq); 3534 /* A relative timeout cannot be restarted. */ 3535 if (error == ERESTART && timeout != NULL && 3536 (timeout->_flags & UMTX_ABSTIME) == 0) 3537 error = EINTR; 3538 } 3539 umtxq_unlock(&uq->uq_key); 3540 out: 3541 umtx_key_release(&uq->uq_key); 3542 return (error); 3543 } 3544 3545 /* 3546 * Signal a userland semaphore. 3547 */ 3548 static int 3549 do_sem_wake(struct thread *td, struct _usem *sem) 3550 { 3551 struct umtx_key key; 3552 int error, cnt; 3553 uint32_t flags; 3554 3555 error = fueword32(&sem->_flags, &flags); 3556 if (error == -1) 3557 return (EFAULT); 3558 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3559 return (error); 3560 umtxq_lock(&key); 3561 umtxq_busy(&key); 3562 cnt = umtxq_count(&key); 3563 if (cnt > 0) { 3564 /* 3565 * Check if count is greater than 0, this means the memory is 3566 * still being referenced by user code, so we can safely 3567 * update _has_waiters flag. 3568 */ 3569 if (cnt == 1) { 3570 umtxq_unlock(&key); 3571 error = suword32(&sem->_has_waiters, 0); 3572 umtxq_lock(&key); 3573 if (error == -1) 3574 error = EFAULT; 3575 } 3576 umtxq_signal(&key, 1); 3577 } 3578 umtxq_unbusy(&key); 3579 umtxq_unlock(&key); 3580 umtx_key_release(&key); 3581 return (error); 3582 } 3583 #endif 3584 3585 static int 3586 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3587 { 3588 struct umtx_abs_timeout timo; 3589 struct umtx_q *uq; 3590 uint32_t count, flags; 3591 int error, rv; 3592 3593 uq = td->td_umtxq; 3594 flags = fuword32(&sem->_flags); 3595 if (timeout != NULL) 3596 umtx_abs_timeout_init2(&timo, timeout); 3597 3598 again: 3599 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3600 if (error != 0) 3601 return (error); 3602 umtxq_lock(&uq->uq_key); 3603 umtxq_busy(&uq->uq_key); 3604 umtxq_insert(uq); 3605 umtxq_unlock(&uq->uq_key); 3606 rv = fueword32(&sem->_count, &count); 3607 if (rv == -1) { 3608 umtxq_lock(&uq->uq_key); 3609 umtxq_unbusy(&uq->uq_key); 3610 umtxq_remove(uq); 3611 umtxq_unlock(&uq->uq_key); 3612 umtx_key_release(&uq->uq_key); 3613 return (EFAULT); 3614 } 3615 for (;;) { 3616 if (USEM_COUNT(count) != 0) { 3617 umtxq_lock(&uq->uq_key); 3618 umtxq_unbusy(&uq->uq_key); 3619 umtxq_remove(uq); 3620 umtxq_unlock(&uq->uq_key); 3621 umtx_key_release(&uq->uq_key); 3622 return (0); 3623 } 3624 if (count == USEM_HAS_WAITERS) 3625 break; 3626 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3627 if (rv == 0) 3628 break; 3629 umtxq_lock(&uq->uq_key); 3630 umtxq_unbusy(&uq->uq_key); 3631 umtxq_remove(uq); 3632 umtxq_unlock(&uq->uq_key); 3633 umtx_key_release(&uq->uq_key); 3634 if (rv == -1) 3635 return (EFAULT); 3636 rv = thread_check_susp(td, true); 3637 if (rv != 0) 3638 return (rv); 3639 goto again; 3640 } 3641 umtxq_lock(&uq->uq_key); 3642 umtxq_unbusy(&uq->uq_key); 3643 3644 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3645 3646 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3647 error = 0; 3648 else { 3649 umtxq_remove(uq); 3650 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3651 /* A relative timeout cannot be restarted. */ 3652 if (error == ERESTART) 3653 error = EINTR; 3654 if (error == EINTR) { 3655 umtx_abs_timeout_update(&timo); 3656 timespecsub(&timo.end, &timo.cur, 3657 &timeout->_timeout); 3658 } 3659 } 3660 } 3661 umtxq_unlock(&uq->uq_key); 3662 umtx_key_release(&uq->uq_key); 3663 return (error); 3664 } 3665 3666 /* 3667 * Signal a userland semaphore. 3668 */ 3669 static int 3670 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3671 { 3672 struct umtx_key key; 3673 int error, cnt, rv; 3674 uint32_t count, flags; 3675 3676 rv = fueword32(&sem->_flags, &flags); 3677 if (rv == -1) 3678 return (EFAULT); 3679 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3680 return (error); 3681 umtxq_lock(&key); 3682 umtxq_busy(&key); 3683 cnt = umtxq_count(&key); 3684 if (cnt > 0) { 3685 /* 3686 * If this was the last sleeping thread, clear the waiters 3687 * flag in _count. 3688 */ 3689 if (cnt == 1) { 3690 umtxq_unlock(&key); 3691 rv = fueword32(&sem->_count, &count); 3692 while (rv != -1 && count & USEM_HAS_WAITERS) { 3693 rv = casueword32(&sem->_count, count, &count, 3694 count & ~USEM_HAS_WAITERS); 3695 if (rv == 1) { 3696 rv = thread_check_susp(td, true); 3697 if (rv != 0) 3698 break; 3699 } 3700 } 3701 if (rv == -1) 3702 error = EFAULT; 3703 else if (rv > 0) { 3704 error = rv; 3705 } 3706 umtxq_lock(&key); 3707 } 3708 3709 umtxq_signal(&key, 1); 3710 } 3711 umtxq_unbusy(&key); 3712 umtxq_unlock(&key); 3713 umtx_key_release(&key); 3714 return (error); 3715 } 3716 3717 #ifdef COMPAT_FREEBSD10 3718 int 3719 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3720 { 3721 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3722 } 3723 3724 int 3725 freebsd10__umtx_unlock(struct thread *td, 3726 struct freebsd10__umtx_unlock_args *uap) 3727 { 3728 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3729 } 3730 #endif 3731 3732 inline int 3733 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3734 { 3735 int error; 3736 3737 error = copyin(uaddr, tsp, sizeof(*tsp)); 3738 if (error == 0) { 3739 if (tsp->tv_sec < 0 || 3740 tsp->tv_nsec >= 1000000000 || 3741 tsp->tv_nsec < 0) 3742 error = EINVAL; 3743 } 3744 return (error); 3745 } 3746 3747 static inline int 3748 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3749 { 3750 int error; 3751 3752 if (size <= sizeof(tp->_timeout)) { 3753 tp->_clockid = CLOCK_REALTIME; 3754 tp->_flags = 0; 3755 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3756 } else 3757 error = copyin(uaddr, tp, sizeof(*tp)); 3758 if (error != 0) 3759 return (error); 3760 if (tp->_timeout.tv_sec < 0 || 3761 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3762 return (EINVAL); 3763 return (0); 3764 } 3765 3766 static int 3767 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3768 struct umtx_robust_lists_params *rb) 3769 { 3770 3771 if (size > sizeof(*rb)) 3772 return (EINVAL); 3773 return (copyin(uaddr, rb, size)); 3774 } 3775 3776 static int 3777 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3778 { 3779 3780 /* 3781 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3782 * and we're only called if sz >= sizeof(timespec) as supplied in the 3783 * copyops. 3784 */ 3785 KASSERT(sz >= sizeof(*tsp), 3786 ("umtx_copyops specifies incorrect sizes")); 3787 3788 return (copyout(tsp, uaddr, sizeof(*tsp))); 3789 } 3790 3791 #ifdef COMPAT_FREEBSD10 3792 static int 3793 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3794 const struct umtx_copyops *ops) 3795 { 3796 struct timespec *ts, timeout; 3797 int error; 3798 3799 /* Allow a null timespec (wait forever). */ 3800 if (uap->uaddr2 == NULL) 3801 ts = NULL; 3802 else { 3803 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3804 if (error != 0) 3805 return (error); 3806 ts = &timeout; 3807 } 3808 #ifdef COMPAT_FREEBSD32 3809 if (ops->compat32) 3810 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3811 #endif 3812 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3813 } 3814 3815 static int 3816 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3817 const struct umtx_copyops *ops) 3818 { 3819 #ifdef COMPAT_FREEBSD32 3820 if (ops->compat32) 3821 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3822 #endif 3823 return (do_unlock_umtx(td, uap->obj, uap->val)); 3824 } 3825 #endif /* COMPAT_FREEBSD10 */ 3826 3827 #if !defined(COMPAT_FREEBSD10) 3828 static int 3829 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3830 const struct umtx_copyops *ops __unused) 3831 { 3832 return (EOPNOTSUPP); 3833 } 3834 #endif /* COMPAT_FREEBSD10 */ 3835 3836 static int 3837 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3838 const struct umtx_copyops *ops) 3839 { 3840 struct _umtx_time timeout, *tm_p; 3841 int error; 3842 3843 if (uap->uaddr2 == NULL) 3844 tm_p = NULL; 3845 else { 3846 error = ops->copyin_umtx_time( 3847 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3848 if (error != 0) 3849 return (error); 3850 tm_p = &timeout; 3851 } 3852 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3853 } 3854 3855 static int 3856 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3857 const struct umtx_copyops *ops) 3858 { 3859 struct _umtx_time timeout, *tm_p; 3860 int error; 3861 3862 if (uap->uaddr2 == NULL) 3863 tm_p = NULL; 3864 else { 3865 error = ops->copyin_umtx_time( 3866 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3867 if (error != 0) 3868 return (error); 3869 tm_p = &timeout; 3870 } 3871 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3872 } 3873 3874 static int 3875 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3876 const struct umtx_copyops *ops) 3877 { 3878 struct _umtx_time *tm_p, timeout; 3879 int error; 3880 3881 if (uap->uaddr2 == NULL) 3882 tm_p = NULL; 3883 else { 3884 error = ops->copyin_umtx_time( 3885 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3886 if (error != 0) 3887 return (error); 3888 tm_p = &timeout; 3889 } 3890 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3891 } 3892 3893 static int 3894 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3895 const struct umtx_copyops *ops __unused) 3896 { 3897 3898 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3899 } 3900 3901 #define BATCH_SIZE 128 3902 static int 3903 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3904 { 3905 char *uaddrs[BATCH_SIZE], **upp; 3906 int count, error, i, pos, tocopy; 3907 3908 upp = (char **)uap->obj; 3909 error = 0; 3910 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3911 pos += tocopy) { 3912 tocopy = MIN(count, BATCH_SIZE); 3913 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3914 if (error != 0) 3915 break; 3916 for (i = 0; i < tocopy; ++i) { 3917 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3918 } 3919 maybe_yield(); 3920 } 3921 return (error); 3922 } 3923 3924 static int 3925 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3926 { 3927 uint32_t uaddrs[BATCH_SIZE], *upp; 3928 int count, error, i, pos, tocopy; 3929 3930 upp = (uint32_t *)uap->obj; 3931 error = 0; 3932 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3933 pos += tocopy) { 3934 tocopy = MIN(count, BATCH_SIZE); 3935 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3936 if (error != 0) 3937 break; 3938 for (i = 0; i < tocopy; ++i) { 3939 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3940 INT_MAX, 1); 3941 } 3942 maybe_yield(); 3943 } 3944 return (error); 3945 } 3946 3947 static int 3948 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3949 const struct umtx_copyops *ops) 3950 { 3951 3952 if (ops->compat32) 3953 return (__umtx_op_nwake_private_compat32(td, uap)); 3954 return (__umtx_op_nwake_private_native(td, uap)); 3955 } 3956 3957 static int 3958 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3959 const struct umtx_copyops *ops __unused) 3960 { 3961 3962 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3963 } 3964 3965 static int 3966 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3967 const struct umtx_copyops *ops) 3968 { 3969 struct _umtx_time *tm_p, timeout; 3970 int error; 3971 3972 /* Allow a null timespec (wait forever). */ 3973 if (uap->uaddr2 == NULL) 3974 tm_p = NULL; 3975 else { 3976 error = ops->copyin_umtx_time( 3977 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3978 if (error != 0) 3979 return (error); 3980 tm_p = &timeout; 3981 } 3982 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3983 } 3984 3985 static int 3986 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3987 const struct umtx_copyops *ops __unused) 3988 { 3989 3990 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3991 } 3992 3993 static int 3994 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3995 const struct umtx_copyops *ops) 3996 { 3997 struct _umtx_time *tm_p, timeout; 3998 int error; 3999 4000 /* Allow a null timespec (wait forever). */ 4001 if (uap->uaddr2 == NULL) 4002 tm_p = NULL; 4003 else { 4004 error = ops->copyin_umtx_time( 4005 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4006 if (error != 0) 4007 return (error); 4008 tm_p = &timeout; 4009 } 4010 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4011 } 4012 4013 static int 4014 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4015 const struct umtx_copyops *ops __unused) 4016 { 4017 4018 return (do_wake_umutex(td, uap->obj)); 4019 } 4020 4021 static int 4022 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4023 const struct umtx_copyops *ops __unused) 4024 { 4025 4026 return (do_unlock_umutex(td, uap->obj, false)); 4027 } 4028 4029 static int 4030 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4031 const struct umtx_copyops *ops __unused) 4032 { 4033 4034 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4035 } 4036 4037 static int 4038 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4039 const struct umtx_copyops *ops) 4040 { 4041 struct timespec *ts, timeout; 4042 int error; 4043 4044 /* Allow a null timespec (wait forever). */ 4045 if (uap->uaddr2 == NULL) 4046 ts = NULL; 4047 else { 4048 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4049 if (error != 0) 4050 return (error); 4051 ts = &timeout; 4052 } 4053 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4054 } 4055 4056 static int 4057 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4058 const struct umtx_copyops *ops __unused) 4059 { 4060 4061 return (do_cv_signal(td, uap->obj)); 4062 } 4063 4064 static int 4065 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4066 const struct umtx_copyops *ops __unused) 4067 { 4068 4069 return (do_cv_broadcast(td, uap->obj)); 4070 } 4071 4072 static int 4073 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4074 const struct umtx_copyops *ops) 4075 { 4076 struct _umtx_time timeout; 4077 int error; 4078 4079 /* Allow a null timespec (wait forever). */ 4080 if (uap->uaddr2 == NULL) { 4081 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4082 } else { 4083 error = ops->copyin_umtx_time(uap->uaddr2, 4084 (size_t)uap->uaddr1, &timeout); 4085 if (error != 0) 4086 return (error); 4087 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4088 } 4089 return (error); 4090 } 4091 4092 static int 4093 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4094 const struct umtx_copyops *ops) 4095 { 4096 struct _umtx_time timeout; 4097 int error; 4098 4099 /* Allow a null timespec (wait forever). */ 4100 if (uap->uaddr2 == NULL) { 4101 error = do_rw_wrlock(td, uap->obj, 0); 4102 } else { 4103 error = ops->copyin_umtx_time(uap->uaddr2, 4104 (size_t)uap->uaddr1, &timeout); 4105 if (error != 0) 4106 return (error); 4107 4108 error = do_rw_wrlock(td, uap->obj, &timeout); 4109 } 4110 return (error); 4111 } 4112 4113 static int 4114 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4115 const struct umtx_copyops *ops __unused) 4116 { 4117 4118 return (do_rw_unlock(td, uap->obj)); 4119 } 4120 4121 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4122 static int 4123 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4124 const struct umtx_copyops *ops) 4125 { 4126 struct _umtx_time *tm_p, timeout; 4127 int error; 4128 4129 /* Allow a null timespec (wait forever). */ 4130 if (uap->uaddr2 == NULL) 4131 tm_p = NULL; 4132 else { 4133 error = ops->copyin_umtx_time( 4134 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4135 if (error != 0) 4136 return (error); 4137 tm_p = &timeout; 4138 } 4139 return (do_sem_wait(td, uap->obj, tm_p)); 4140 } 4141 4142 static int 4143 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4144 const struct umtx_copyops *ops __unused) 4145 { 4146 4147 return (do_sem_wake(td, uap->obj)); 4148 } 4149 #endif 4150 4151 static int 4152 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4153 const struct umtx_copyops *ops __unused) 4154 { 4155 4156 return (do_wake2_umutex(td, uap->obj, uap->val)); 4157 } 4158 4159 static int 4160 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4161 const struct umtx_copyops *ops) 4162 { 4163 struct _umtx_time *tm_p, timeout; 4164 size_t uasize; 4165 int error; 4166 4167 /* Allow a null timespec (wait forever). */ 4168 if (uap->uaddr2 == NULL) { 4169 uasize = 0; 4170 tm_p = NULL; 4171 } else { 4172 uasize = (size_t)uap->uaddr1; 4173 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4174 if (error != 0) 4175 return (error); 4176 tm_p = &timeout; 4177 } 4178 error = do_sem2_wait(td, uap->obj, tm_p); 4179 if (error == EINTR && uap->uaddr2 != NULL && 4180 (timeout._flags & UMTX_ABSTIME) == 0 && 4181 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4182 error = ops->copyout_timeout( 4183 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4184 uasize - ops->umtx_time_sz, &timeout._timeout); 4185 if (error == 0) { 4186 error = EINTR; 4187 } 4188 } 4189 4190 return (error); 4191 } 4192 4193 static int 4194 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4195 const struct umtx_copyops *ops __unused) 4196 { 4197 4198 return (do_sem2_wake(td, uap->obj)); 4199 } 4200 4201 #define USHM_OBJ_UMTX(o) \ 4202 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4203 4204 #define USHMF_REG_LINKED 0x0001 4205 #define USHMF_OBJ_LINKED 0x0002 4206 struct umtx_shm_reg { 4207 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4208 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4209 struct umtx_key ushm_key; 4210 struct ucred *ushm_cred; 4211 struct shmfd *ushm_obj; 4212 u_int ushm_refcnt; 4213 u_int ushm_flags; 4214 }; 4215 4216 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4217 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4218 4219 static uma_zone_t umtx_shm_reg_zone; 4220 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4221 static struct mtx umtx_shm_lock; 4222 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4223 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4224 4225 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4226 4227 static void 4228 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4229 { 4230 struct umtx_shm_reg_head d; 4231 struct umtx_shm_reg *reg, *reg1; 4232 4233 TAILQ_INIT(&d); 4234 mtx_lock(&umtx_shm_lock); 4235 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4236 mtx_unlock(&umtx_shm_lock); 4237 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4238 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4239 umtx_shm_free_reg(reg); 4240 } 4241 } 4242 4243 static struct task umtx_shm_reg_delfree_task = 4244 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4245 4246 static struct umtx_shm_reg * 4247 umtx_shm_find_reg_locked(const struct umtx_key *key) 4248 { 4249 struct umtx_shm_reg *reg; 4250 struct umtx_shm_reg_head *reg_head; 4251 4252 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4253 mtx_assert(&umtx_shm_lock, MA_OWNED); 4254 reg_head = &umtx_shm_registry[key->hash]; 4255 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4256 KASSERT(reg->ushm_key.shared, 4257 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4258 if (reg->ushm_key.info.shared.object == 4259 key->info.shared.object && 4260 reg->ushm_key.info.shared.offset == 4261 key->info.shared.offset) { 4262 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4263 KASSERT(reg->ushm_refcnt > 0, 4264 ("reg %p refcnt 0 onlist", reg)); 4265 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4266 ("reg %p not linked", reg)); 4267 reg->ushm_refcnt++; 4268 return (reg); 4269 } 4270 } 4271 return (NULL); 4272 } 4273 4274 static struct umtx_shm_reg * 4275 umtx_shm_find_reg(const struct umtx_key *key) 4276 { 4277 struct umtx_shm_reg *reg; 4278 4279 mtx_lock(&umtx_shm_lock); 4280 reg = umtx_shm_find_reg_locked(key); 4281 mtx_unlock(&umtx_shm_lock); 4282 return (reg); 4283 } 4284 4285 static void 4286 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4287 { 4288 4289 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4290 crfree(reg->ushm_cred); 4291 shm_drop(reg->ushm_obj); 4292 uma_zfree(umtx_shm_reg_zone, reg); 4293 } 4294 4295 static bool 4296 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4297 { 4298 bool res; 4299 4300 mtx_assert(&umtx_shm_lock, MA_OWNED); 4301 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4302 reg->ushm_refcnt--; 4303 res = reg->ushm_refcnt == 0; 4304 if (res || force) { 4305 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4306 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4307 reg, ushm_reg_link); 4308 reg->ushm_flags &= ~USHMF_REG_LINKED; 4309 } 4310 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4311 LIST_REMOVE(reg, ushm_obj_link); 4312 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4313 } 4314 } 4315 return (res); 4316 } 4317 4318 static void 4319 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4320 { 4321 vm_object_t object; 4322 bool dofree; 4323 4324 if (force) { 4325 object = reg->ushm_obj->shm_object; 4326 VM_OBJECT_WLOCK(object); 4327 object->flags |= OBJ_UMTXDEAD; 4328 VM_OBJECT_WUNLOCK(object); 4329 } 4330 mtx_lock(&umtx_shm_lock); 4331 dofree = umtx_shm_unref_reg_locked(reg, force); 4332 mtx_unlock(&umtx_shm_lock); 4333 if (dofree) 4334 umtx_shm_free_reg(reg); 4335 } 4336 4337 void 4338 umtx_shm_object_init(vm_object_t object) 4339 { 4340 4341 LIST_INIT(USHM_OBJ_UMTX(object)); 4342 } 4343 4344 void 4345 umtx_shm_object_terminated(vm_object_t object) 4346 { 4347 struct umtx_shm_reg *reg, *reg1; 4348 bool dofree; 4349 4350 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4351 return; 4352 4353 dofree = false; 4354 mtx_lock(&umtx_shm_lock); 4355 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4356 if (umtx_shm_unref_reg_locked(reg, true)) { 4357 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4358 ushm_reg_link); 4359 dofree = true; 4360 } 4361 } 4362 mtx_unlock(&umtx_shm_lock); 4363 if (dofree) 4364 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4365 } 4366 4367 static int 4368 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4369 struct umtx_shm_reg **res) 4370 { 4371 struct umtx_shm_reg *reg, *reg1; 4372 struct ucred *cred; 4373 int error; 4374 4375 reg = umtx_shm_find_reg(key); 4376 if (reg != NULL) { 4377 *res = reg; 4378 return (0); 4379 } 4380 cred = td->td_ucred; 4381 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4382 return (ENOMEM); 4383 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4384 reg->ushm_refcnt = 1; 4385 bcopy(key, ®->ushm_key, sizeof(*key)); 4386 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4387 reg->ushm_cred = crhold(cred); 4388 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4389 if (error != 0) { 4390 umtx_shm_free_reg(reg); 4391 return (error); 4392 } 4393 mtx_lock(&umtx_shm_lock); 4394 reg1 = umtx_shm_find_reg_locked(key); 4395 if (reg1 != NULL) { 4396 mtx_unlock(&umtx_shm_lock); 4397 umtx_shm_free_reg(reg); 4398 *res = reg1; 4399 return (0); 4400 } 4401 reg->ushm_refcnt++; 4402 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4403 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4404 ushm_obj_link); 4405 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4406 mtx_unlock(&umtx_shm_lock); 4407 *res = reg; 4408 return (0); 4409 } 4410 4411 static int 4412 umtx_shm_alive(struct thread *td, void *addr) 4413 { 4414 vm_map_t map; 4415 vm_map_entry_t entry; 4416 vm_object_t object; 4417 vm_pindex_t pindex; 4418 vm_prot_t prot; 4419 int res, ret; 4420 boolean_t wired; 4421 4422 map = &td->td_proc->p_vmspace->vm_map; 4423 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4424 &object, &pindex, &prot, &wired); 4425 if (res != KERN_SUCCESS) 4426 return (EFAULT); 4427 if (object == NULL) 4428 ret = EINVAL; 4429 else 4430 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4431 vm_map_lookup_done(map, entry); 4432 return (ret); 4433 } 4434 4435 static void 4436 umtx_shm_init(void) 4437 { 4438 int i; 4439 4440 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4441 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4442 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4443 for (i = 0; i < nitems(umtx_shm_registry); i++) 4444 TAILQ_INIT(&umtx_shm_registry[i]); 4445 } 4446 4447 static int 4448 umtx_shm(struct thread *td, void *addr, u_int flags) 4449 { 4450 struct umtx_key key; 4451 struct umtx_shm_reg *reg; 4452 struct file *fp; 4453 int error, fd; 4454 4455 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4456 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4457 return (EINVAL); 4458 if ((flags & UMTX_SHM_ALIVE) != 0) 4459 return (umtx_shm_alive(td, addr)); 4460 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4461 if (error != 0) 4462 return (error); 4463 KASSERT(key.shared == 1, ("non-shared key")); 4464 if ((flags & UMTX_SHM_CREAT) != 0) { 4465 error = umtx_shm_create_reg(td, &key, ®); 4466 } else { 4467 reg = umtx_shm_find_reg(&key); 4468 if (reg == NULL) 4469 error = ESRCH; 4470 } 4471 umtx_key_release(&key); 4472 if (error != 0) 4473 return (error); 4474 KASSERT(reg != NULL, ("no reg")); 4475 if ((flags & UMTX_SHM_DESTROY) != 0) { 4476 umtx_shm_unref_reg(reg, true); 4477 } else { 4478 #if 0 4479 #ifdef MAC 4480 error = mac_posixshm_check_open(td->td_ucred, 4481 reg->ushm_obj, FFLAGS(O_RDWR)); 4482 if (error == 0) 4483 #endif 4484 error = shm_access(reg->ushm_obj, td->td_ucred, 4485 FFLAGS(O_RDWR)); 4486 if (error == 0) 4487 #endif 4488 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4489 if (error == 0) { 4490 shm_hold(reg->ushm_obj); 4491 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4492 &shm_ops); 4493 td->td_retval[0] = fd; 4494 fdrop(fp, td); 4495 } 4496 } 4497 umtx_shm_unref_reg(reg, false); 4498 return (error); 4499 } 4500 4501 static int 4502 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4503 const struct umtx_copyops *ops __unused) 4504 { 4505 4506 return (umtx_shm(td, uap->uaddr1, uap->val)); 4507 } 4508 4509 static int 4510 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4511 const struct umtx_copyops *ops) 4512 { 4513 struct umtx_robust_lists_params rb; 4514 int error; 4515 4516 if (ops->compat32) { 4517 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4518 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4519 td->td_rb_inact != 0)) 4520 return (EBUSY); 4521 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4522 return (EBUSY); 4523 } 4524 4525 bzero(&rb, sizeof(rb)); 4526 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4527 if (error != 0) 4528 return (error); 4529 4530 if (ops->compat32) 4531 td->td_pflags2 |= TDP2_COMPAT32RB; 4532 4533 td->td_rb_list = rb.robust_list_offset; 4534 td->td_rbp_list = rb.robust_priv_list_offset; 4535 td->td_rb_inact = rb.robust_inact_offset; 4536 return (0); 4537 } 4538 4539 #if defined(__i386__) || defined(__amd64__) 4540 /* 4541 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4542 * 32-bit time_t there. Other architectures just need the i386 definitions 4543 * along with their standard compat32. 4544 */ 4545 struct timespecx32 { 4546 int64_t tv_sec; 4547 int32_t tv_nsec; 4548 }; 4549 4550 struct umtx_timex32 { 4551 struct timespecx32 _timeout; 4552 uint32_t _flags; 4553 uint32_t _clockid; 4554 }; 4555 4556 #ifndef __i386__ 4557 #define timespeci386 timespec32 4558 #define umtx_timei386 umtx_time32 4559 #endif 4560 #else /* !__i386__ && !__amd64__ */ 4561 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4562 struct timespeci386 { 4563 int32_t tv_sec; 4564 int32_t tv_nsec; 4565 }; 4566 4567 struct umtx_timei386 { 4568 struct timespeci386 _timeout; 4569 uint32_t _flags; 4570 uint32_t _clockid; 4571 }; 4572 4573 #if defined(__LP64__) 4574 #define timespecx32 timespec32 4575 #define umtx_timex32 umtx_time32 4576 #endif 4577 #endif 4578 4579 static int 4580 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4581 struct umtx_robust_lists_params *rbp) 4582 { 4583 struct umtx_robust_lists_params_compat32 rb32; 4584 int error; 4585 4586 if (size > sizeof(rb32)) 4587 return (EINVAL); 4588 bzero(&rb32, sizeof(rb32)); 4589 error = copyin(uaddr, &rb32, size); 4590 if (error != 0) 4591 return (error); 4592 CP(rb32, *rbp, robust_list_offset); 4593 CP(rb32, *rbp, robust_priv_list_offset); 4594 CP(rb32, *rbp, robust_inact_offset); 4595 return (0); 4596 } 4597 4598 #ifndef __i386__ 4599 static inline int 4600 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4601 { 4602 struct timespeci386 ts32; 4603 int error; 4604 4605 error = copyin(uaddr, &ts32, sizeof(ts32)); 4606 if (error == 0) { 4607 if (ts32.tv_sec < 0 || 4608 ts32.tv_nsec >= 1000000000 || 4609 ts32.tv_nsec < 0) 4610 error = EINVAL; 4611 else { 4612 CP(ts32, *tsp, tv_sec); 4613 CP(ts32, *tsp, tv_nsec); 4614 } 4615 } 4616 return (error); 4617 } 4618 4619 static inline int 4620 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4621 { 4622 struct umtx_timei386 t32; 4623 int error; 4624 4625 t32._clockid = CLOCK_REALTIME; 4626 t32._flags = 0; 4627 if (size <= sizeof(t32._timeout)) 4628 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4629 else 4630 error = copyin(uaddr, &t32, sizeof(t32)); 4631 if (error != 0) 4632 return (error); 4633 if (t32._timeout.tv_sec < 0 || 4634 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4635 return (EINVAL); 4636 TS_CP(t32, *tp, _timeout); 4637 CP(t32, *tp, _flags); 4638 CP(t32, *tp, _clockid); 4639 return (0); 4640 } 4641 4642 static int 4643 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4644 { 4645 struct timespeci386 remain32 = { 4646 .tv_sec = tsp->tv_sec, 4647 .tv_nsec = tsp->tv_nsec, 4648 }; 4649 4650 /* 4651 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4652 * and we're only called if sz >= sizeof(timespec) as supplied in the 4653 * copyops. 4654 */ 4655 KASSERT(sz >= sizeof(remain32), 4656 ("umtx_copyops specifies incorrect sizes")); 4657 4658 return (copyout(&remain32, uaddr, sizeof(remain32))); 4659 } 4660 #endif /* !__i386__ */ 4661 4662 #if defined(__i386__) || defined(__LP64__) 4663 static inline int 4664 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4665 { 4666 struct timespecx32 ts32; 4667 int error; 4668 4669 error = copyin(uaddr, &ts32, sizeof(ts32)); 4670 if (error == 0) { 4671 if (ts32.tv_sec < 0 || 4672 ts32.tv_nsec >= 1000000000 || 4673 ts32.tv_nsec < 0) 4674 error = EINVAL; 4675 else { 4676 CP(ts32, *tsp, tv_sec); 4677 CP(ts32, *tsp, tv_nsec); 4678 } 4679 } 4680 return (error); 4681 } 4682 4683 static inline int 4684 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4685 { 4686 struct umtx_timex32 t32; 4687 int error; 4688 4689 t32._clockid = CLOCK_REALTIME; 4690 t32._flags = 0; 4691 if (size <= sizeof(t32._timeout)) 4692 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4693 else 4694 error = copyin(uaddr, &t32, sizeof(t32)); 4695 if (error != 0) 4696 return (error); 4697 if (t32._timeout.tv_sec < 0 || 4698 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4699 return (EINVAL); 4700 TS_CP(t32, *tp, _timeout); 4701 CP(t32, *tp, _flags); 4702 CP(t32, *tp, _clockid); 4703 return (0); 4704 } 4705 4706 static int 4707 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4708 { 4709 struct timespecx32 remain32 = { 4710 .tv_sec = tsp->tv_sec, 4711 .tv_nsec = tsp->tv_nsec, 4712 }; 4713 4714 /* 4715 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4716 * and we're only called if sz >= sizeof(timespec) as supplied in the 4717 * copyops. 4718 */ 4719 KASSERT(sz >= sizeof(remain32), 4720 ("umtx_copyops specifies incorrect sizes")); 4721 4722 return (copyout(&remain32, uaddr, sizeof(remain32))); 4723 } 4724 #endif /* __i386__ || __LP64__ */ 4725 4726 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4727 const struct umtx_copyops *umtx_ops); 4728 4729 static const _umtx_op_func op_table[] = { 4730 #ifdef COMPAT_FREEBSD10 4731 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4732 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4733 #else 4734 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4735 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4736 #endif 4737 [UMTX_OP_WAIT] = __umtx_op_wait, 4738 [UMTX_OP_WAKE] = __umtx_op_wake, 4739 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4740 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4741 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4742 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4743 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4744 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4745 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4746 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4747 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4748 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4749 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4750 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4751 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4752 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4753 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4754 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4755 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4756 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4757 #else 4758 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4759 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4760 #endif 4761 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4762 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4763 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4764 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4765 [UMTX_OP_SHM] = __umtx_op_shm, 4766 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4767 }; 4768 4769 static const struct umtx_copyops umtx_native_ops = { 4770 .copyin_timeout = umtx_copyin_timeout, 4771 .copyin_umtx_time = umtx_copyin_umtx_time, 4772 .copyin_robust_lists = umtx_copyin_robust_lists, 4773 .copyout_timeout = umtx_copyout_timeout, 4774 .timespec_sz = sizeof(struct timespec), 4775 .umtx_time_sz = sizeof(struct _umtx_time), 4776 }; 4777 4778 #ifndef __i386__ 4779 static const struct umtx_copyops umtx_native_opsi386 = { 4780 .copyin_timeout = umtx_copyin_timeouti386, 4781 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4782 .copyin_robust_lists = umtx_copyin_robust_lists32, 4783 .copyout_timeout = umtx_copyout_timeouti386, 4784 .timespec_sz = sizeof(struct timespeci386), 4785 .umtx_time_sz = sizeof(struct umtx_timei386), 4786 .compat32 = true, 4787 }; 4788 #endif 4789 4790 #if defined(__i386__) || defined(__LP64__) 4791 /* i386 can emulate other 32-bit archs, too! */ 4792 static const struct umtx_copyops umtx_native_opsx32 = { 4793 .copyin_timeout = umtx_copyin_timeoutx32, 4794 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4795 .copyin_robust_lists = umtx_copyin_robust_lists32, 4796 .copyout_timeout = umtx_copyout_timeoutx32, 4797 .timespec_sz = sizeof(struct timespecx32), 4798 .umtx_time_sz = sizeof(struct umtx_timex32), 4799 .compat32 = true, 4800 }; 4801 4802 #ifdef COMPAT_FREEBSD32 4803 #ifdef __amd64__ 4804 #define umtx_native_ops32 umtx_native_opsi386 4805 #else 4806 #define umtx_native_ops32 umtx_native_opsx32 4807 #endif 4808 #endif /* COMPAT_FREEBSD32 */ 4809 #endif /* __i386__ || __LP64__ */ 4810 4811 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4812 4813 static int 4814 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4815 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4816 { 4817 struct _umtx_op_args uap = { 4818 .obj = obj, 4819 .op = op & ~UMTX_OP__FLAGS, 4820 .val = val, 4821 .uaddr1 = uaddr1, 4822 .uaddr2 = uaddr2 4823 }; 4824 4825 if ((uap.op >= nitems(op_table))) 4826 return (EINVAL); 4827 return ((*op_table[uap.op])(td, &uap, ops)); 4828 } 4829 4830 int 4831 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4832 { 4833 static const struct umtx_copyops *umtx_ops; 4834 4835 umtx_ops = &umtx_native_ops; 4836 #ifdef __LP64__ 4837 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4838 if ((uap->op & UMTX_OP__I386) != 0) 4839 umtx_ops = &umtx_native_opsi386; 4840 else 4841 umtx_ops = &umtx_native_opsx32; 4842 } 4843 #elif !defined(__i386__) 4844 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4845 if ((uap->op & UMTX_OP__I386) != 0) 4846 umtx_ops = &umtx_native_opsi386; 4847 #else 4848 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4849 if ((uap->op & UMTX_OP__32BIT) != 0) 4850 umtx_ops = &umtx_native_opsx32; 4851 #endif 4852 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4853 uap->uaddr2, umtx_ops)); 4854 } 4855 4856 #ifdef COMPAT_FREEBSD32 4857 #ifdef COMPAT_FREEBSD10 4858 int 4859 freebsd10_freebsd32__umtx_lock(struct thread *td, 4860 struct freebsd10_freebsd32__umtx_lock_args *uap) 4861 { 4862 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4863 } 4864 4865 int 4866 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4867 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4868 { 4869 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4870 } 4871 #endif /* COMPAT_FREEBSD10 */ 4872 4873 int 4874 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4875 { 4876 4877 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4878 uap->uaddr2, &umtx_native_ops32)); 4879 } 4880 #endif /* COMPAT_FREEBSD32 */ 4881 4882 void 4883 umtx_thread_init(struct thread *td) 4884 { 4885 4886 td->td_umtxq = umtxq_alloc(); 4887 td->td_umtxq->uq_thread = td; 4888 } 4889 4890 void 4891 umtx_thread_fini(struct thread *td) 4892 { 4893 4894 umtxq_free(td->td_umtxq); 4895 } 4896 4897 /* 4898 * It will be called when new thread is created, e.g fork(). 4899 */ 4900 void 4901 umtx_thread_alloc(struct thread *td) 4902 { 4903 struct umtx_q *uq; 4904 4905 uq = td->td_umtxq; 4906 uq->uq_inherited_pri = PRI_MAX; 4907 4908 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4909 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4910 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4911 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4912 } 4913 4914 /* 4915 * exec() hook. 4916 * 4917 * Clear robust lists for all process' threads, not delaying the 4918 * cleanup to thread exit, since the relevant address space is 4919 * destroyed right now. 4920 */ 4921 void 4922 umtx_exec(struct proc *p) 4923 { 4924 struct thread *td; 4925 4926 KASSERT(p == curproc, ("need curproc")); 4927 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4928 (p->p_flag & P_STOPPED_SINGLE) != 0, 4929 ("curproc must be single-threaded")); 4930 /* 4931 * There is no need to lock the list as only this thread can be 4932 * running. 4933 */ 4934 FOREACH_THREAD_IN_PROC(p, td) { 4935 KASSERT(td == curthread || 4936 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4937 ("running thread %p %p", p, td)); 4938 umtx_thread_cleanup(td); 4939 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4940 } 4941 } 4942 4943 /* 4944 * thread exit hook. 4945 */ 4946 void 4947 umtx_thread_exit(struct thread *td) 4948 { 4949 4950 umtx_thread_cleanup(td); 4951 } 4952 4953 static int 4954 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4955 { 4956 u_long res1; 4957 uint32_t res32; 4958 int error; 4959 4960 if (compat32) { 4961 error = fueword32((void *)ptr, &res32); 4962 if (error == 0) 4963 res1 = res32; 4964 } else { 4965 error = fueword((void *)ptr, &res1); 4966 } 4967 if (error == 0) 4968 *res = res1; 4969 else 4970 error = EFAULT; 4971 return (error); 4972 } 4973 4974 static void 4975 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4976 bool compat32) 4977 { 4978 struct umutex32 m32; 4979 4980 if (compat32) { 4981 memcpy(&m32, m, sizeof(m32)); 4982 *rb_list = m32.m_rb_lnk; 4983 } else { 4984 *rb_list = m->m_rb_lnk; 4985 } 4986 } 4987 4988 static int 4989 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4990 bool compat32) 4991 { 4992 struct umutex m; 4993 int error; 4994 4995 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4996 error = copyin((void *)rbp, &m, sizeof(m)); 4997 if (error != 0) 4998 return (error); 4999 if (rb_list != NULL) 5000 umtx_read_rb_list(td, &m, rb_list, compat32); 5001 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5002 return (EINVAL); 5003 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5004 /* inact is cleared after unlock, allow the inconsistency */ 5005 return (inact ? 0 : EINVAL); 5006 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5007 } 5008 5009 static void 5010 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5011 const char *name, bool compat32) 5012 { 5013 int error, i; 5014 uintptr_t rbp; 5015 bool inact; 5016 5017 if (rb_list == 0) 5018 return; 5019 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5020 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5021 if (rbp == *rb_inact) { 5022 inact = true; 5023 *rb_inact = 0; 5024 } else 5025 inact = false; 5026 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5027 } 5028 if (i == umtx_max_rb && umtx_verbose_rb) { 5029 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5030 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5031 } 5032 if (error != 0 && umtx_verbose_rb) { 5033 uprintf("comm %s pid %d: handling %srb error %d\n", 5034 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5035 } 5036 } 5037 5038 /* 5039 * Clean up umtx data. 5040 */ 5041 static void 5042 umtx_thread_cleanup(struct thread *td) 5043 { 5044 struct umtx_q *uq; 5045 struct umtx_pi *pi; 5046 uintptr_t rb_inact; 5047 bool compat32; 5048 5049 /* 5050 * Disown pi mutexes. 5051 */ 5052 uq = td->td_umtxq; 5053 if (uq != NULL) { 5054 if (uq->uq_inherited_pri != PRI_MAX || 5055 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5056 mtx_lock(&umtx_lock); 5057 uq->uq_inherited_pri = PRI_MAX; 5058 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5059 pi->pi_owner = NULL; 5060 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5061 } 5062 mtx_unlock(&umtx_lock); 5063 } 5064 sched_lend_user_prio_cond(td, PRI_MAX); 5065 } 5066 5067 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5068 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5069 5070 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5071 return; 5072 5073 /* 5074 * Handle terminated robust mutexes. Must be done after 5075 * robust pi disown, otherwise unlock could see unowned 5076 * entries. 5077 */ 5078 rb_inact = td->td_rb_inact; 5079 if (rb_inact != 0) 5080 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5081 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5082 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5083 if (rb_inact != 0) 5084 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5085 } 5086