1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 #include <sys/umtxvar.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #include <compat/freebsd32/freebsd32.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32_proto.h> 82 #endif 83 84 #define _UMUTEX_TRY 1 85 #define _UMUTEX_WAIT 2 86 87 #ifdef UMTX_PROFILING 88 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 89 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 90 #endif 91 92 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 93 #ifdef INVARIANTS 94 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 95 struct umtxq_chain *uc; \ 96 \ 97 uc = umtxq_getchain(key); \ 98 mtx_assert(&uc->uc_lock, MA_OWNED); \ 99 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 100 } while (0) 101 #else 102 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 103 #endif 104 105 /* 106 * Don't propagate time-sharing priority, there is a security reason, 107 * a user can simply introduce PI-mutex, let thread A lock the mutex, 108 * and let another thread B block on the mutex, because B is 109 * sleeping, its priority will be boosted, this causes A's priority to 110 * be boosted via priority propagating too and will never be lowered even 111 * if it is using 100%CPU, this is unfair to other processes. 112 */ 113 114 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 115 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 116 PRI_MAX_TIMESHARE : (td)->td_user_pri) 117 118 #define GOLDEN_RATIO_PRIME 2654404609U 119 #ifndef UMTX_CHAINS 120 #define UMTX_CHAINS 512 121 #endif 122 #define UMTX_SHIFTS (__WORD_BIT - 9) 123 124 #define GET_SHARE(flags) \ 125 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 126 127 #define BUSY_SPINS 200 128 129 struct umtx_copyops { 130 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 131 int (*copyin_umtx_time)(const void *uaddr, size_t size, 132 struct _umtx_time *tp); 133 int (*copyin_robust_lists)(const void *uaddr, size_t size, 134 struct umtx_robust_lists_params *rbp); 135 int (*copyout_timeout)(void *uaddr, size_t size, 136 struct timespec *tsp); 137 const size_t timespec_sz; 138 const size_t umtx_time_sz; 139 const bool compat32; 140 }; 141 142 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 143 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 144 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 145 146 int umtx_shm_vnobj_persistent = 0; 147 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 148 &umtx_shm_vnobj_persistent, 0, 149 "False forces destruction of umtx attached to file, on last close"); 150 static int umtx_max_rb = 1000; 151 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 152 &umtx_max_rb, 0, 153 "Maximum number of robust mutexes allowed for each thread"); 154 155 static uma_zone_t umtx_pi_zone; 156 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 157 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 158 static int umtx_pi_allocated; 159 160 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 161 "umtx debug"); 162 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 163 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 164 static int umtx_verbose_rb = 1; 165 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 166 &umtx_verbose_rb, 0, 167 ""); 168 169 #ifdef UMTX_PROFILING 170 static long max_length; 171 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 172 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 173 "umtx chain stats"); 174 #endif 175 176 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 177 const struct _umtx_time *umtxtime); 178 179 static void umtx_shm_init(void); 180 static void umtxq_sysinit(void *); 181 static void umtxq_hash(struct umtx_key *key); 182 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 183 bool rb); 184 static void umtx_thread_cleanup(struct thread *td); 185 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 186 187 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 188 189 static struct mtx umtx_lock; 190 191 #ifdef UMTX_PROFILING 192 static void 193 umtx_init_profiling(void) 194 { 195 struct sysctl_oid *chain_oid; 196 char chain_name[10]; 197 int i; 198 199 for (i = 0; i < UMTX_CHAINS; ++i) { 200 snprintf(chain_name, sizeof(chain_name), "%d", i); 201 chain_oid = SYSCTL_ADD_NODE(NULL, 202 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 203 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 204 "umtx hash stats"); 205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 206 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 207 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 208 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 209 } 210 } 211 212 static int 213 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 214 { 215 char buf[512]; 216 struct sbuf sb; 217 struct umtxq_chain *uc; 218 u_int fract, i, j, tot, whole; 219 u_int sf0, sf1, sf2, sf3, sf4; 220 u_int si0, si1, si2, si3, si4; 221 u_int sw0, sw1, sw2, sw3, sw4; 222 223 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 224 for (i = 0; i < 2; i++) { 225 tot = 0; 226 for (j = 0; j < UMTX_CHAINS; ++j) { 227 uc = &umtxq_chains[i][j]; 228 mtx_lock(&uc->uc_lock); 229 tot += uc->max_length; 230 mtx_unlock(&uc->uc_lock); 231 } 232 if (tot == 0) 233 sbuf_printf(&sb, "%u) Empty ", i); 234 else { 235 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 236 si0 = si1 = si2 = si3 = si4 = 0; 237 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 238 for (j = 0; j < UMTX_CHAINS; j++) { 239 uc = &umtxq_chains[i][j]; 240 mtx_lock(&uc->uc_lock); 241 whole = uc->max_length * 100; 242 mtx_unlock(&uc->uc_lock); 243 fract = (whole % tot) * 100; 244 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 245 sf0 = fract; 246 si0 = j; 247 sw0 = whole; 248 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 249 sf1)) { 250 sf1 = fract; 251 si1 = j; 252 sw1 = whole; 253 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 254 sf2)) { 255 sf2 = fract; 256 si2 = j; 257 sw2 = whole; 258 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 259 sf3)) { 260 sf3 = fract; 261 si3 = j; 262 sw3 = whole; 263 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 264 sf4)) { 265 sf4 = fract; 266 si4 = j; 267 sw4 = whole; 268 } 269 } 270 sbuf_printf(&sb, "queue %u:\n", i); 271 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 272 sf0 / tot, si0); 273 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 274 sf1 / tot, si1); 275 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 276 sf2 / tot, si2); 277 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 278 sf3 / tot, si3); 279 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 280 sf4 / tot, si4); 281 } 282 } 283 sbuf_trim(&sb); 284 sbuf_finish(&sb); 285 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 286 sbuf_delete(&sb); 287 return (0); 288 } 289 290 static int 291 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 292 { 293 struct umtxq_chain *uc; 294 u_int i, j; 295 int clear, error; 296 297 clear = 0; 298 error = sysctl_handle_int(oidp, &clear, 0, req); 299 if (error != 0 || req->newptr == NULL) 300 return (error); 301 302 if (clear != 0) { 303 for (i = 0; i < 2; ++i) { 304 for (j = 0; j < UMTX_CHAINS; ++j) { 305 uc = &umtxq_chains[i][j]; 306 mtx_lock(&uc->uc_lock); 307 uc->length = 0; 308 uc->max_length = 0; 309 mtx_unlock(&uc->uc_lock); 310 } 311 } 312 } 313 return (0); 314 } 315 316 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 317 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 318 sysctl_debug_umtx_chains_clear, "I", 319 "Clear umtx chains statistics"); 320 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 321 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 322 sysctl_debug_umtx_chains_peaks, "A", 323 "Highest peaks in chains max length"); 324 #endif 325 326 static void 327 umtxq_sysinit(void *arg __unused) 328 { 329 int i, j; 330 331 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 332 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 333 for (i = 0; i < 2; ++i) { 334 for (j = 0; j < UMTX_CHAINS; ++j) { 335 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 336 MTX_DEF | MTX_DUPOK); 337 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 338 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 339 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 340 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 341 umtxq_chains[i][j].uc_busy = 0; 342 umtxq_chains[i][j].uc_waiters = 0; 343 #ifdef UMTX_PROFILING 344 umtxq_chains[i][j].length = 0; 345 umtxq_chains[i][j].max_length = 0; 346 #endif 347 } 348 } 349 #ifdef UMTX_PROFILING 350 umtx_init_profiling(); 351 #endif 352 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 353 umtx_shm_init(); 354 } 355 356 struct umtx_q * 357 umtxq_alloc(void) 358 { 359 struct umtx_q *uq; 360 361 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 362 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 363 M_WAITOK | M_ZERO); 364 TAILQ_INIT(&uq->uq_spare_queue->head); 365 TAILQ_INIT(&uq->uq_pi_contested); 366 uq->uq_inherited_pri = PRI_MAX; 367 return (uq); 368 } 369 370 void 371 umtxq_free(struct umtx_q *uq) 372 { 373 374 MPASS(uq->uq_spare_queue != NULL); 375 free(uq->uq_spare_queue, M_UMTX); 376 free(uq, M_UMTX); 377 } 378 379 static inline void 380 umtxq_hash(struct umtx_key *key) 381 { 382 unsigned n; 383 384 n = (uintptr_t)key->info.both.a + key->info.both.b; 385 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 386 } 387 388 struct umtxq_chain * 389 umtxq_getchain(struct umtx_key *key) 390 { 391 392 if (key->type <= TYPE_SEM) 393 return (&umtxq_chains[1][key->hash]); 394 return (&umtxq_chains[0][key->hash]); 395 } 396 397 /* 398 * Set chain to busy state when following operation 399 * may be blocked (kernel mutex can not be used). 400 */ 401 void 402 umtxq_busy(struct umtx_key *key) 403 { 404 struct umtxq_chain *uc; 405 406 uc = umtxq_getchain(key); 407 mtx_assert(&uc->uc_lock, MA_OWNED); 408 if (uc->uc_busy) { 409 #ifdef SMP 410 if (smp_cpus > 1) { 411 int count = BUSY_SPINS; 412 if (count > 0) { 413 umtxq_unlock(key); 414 while (uc->uc_busy && --count > 0) 415 cpu_spinwait(); 416 umtxq_lock(key); 417 } 418 } 419 #endif 420 while (uc->uc_busy) { 421 uc->uc_waiters++; 422 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 423 uc->uc_waiters--; 424 } 425 } 426 uc->uc_busy = 1; 427 } 428 429 /* 430 * Unbusy a chain. 431 */ 432 void 433 umtxq_unbusy(struct umtx_key *key) 434 { 435 struct umtxq_chain *uc; 436 437 uc = umtxq_getchain(key); 438 mtx_assert(&uc->uc_lock, MA_OWNED); 439 KASSERT(uc->uc_busy != 0, ("not busy")); 440 uc->uc_busy = 0; 441 if (uc->uc_waiters) 442 wakeup_one(uc); 443 } 444 445 void 446 umtxq_unbusy_unlocked(struct umtx_key *key) 447 { 448 449 umtxq_lock(key); 450 umtxq_unbusy(key); 451 umtxq_unlock(key); 452 } 453 454 static struct umtxq_queue * 455 umtxq_queue_lookup(struct umtx_key *key, int q) 456 { 457 struct umtxq_queue *uh; 458 struct umtxq_chain *uc; 459 460 uc = umtxq_getchain(key); 461 UMTXQ_LOCKED_ASSERT(uc); 462 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 463 if (umtx_key_match(&uh->key, key)) 464 return (uh); 465 } 466 467 return (NULL); 468 } 469 470 void 471 umtxq_insert_queue(struct umtx_q *uq, int q) 472 { 473 struct umtxq_queue *uh; 474 struct umtxq_chain *uc; 475 476 uc = umtxq_getchain(&uq->uq_key); 477 UMTXQ_LOCKED_ASSERT(uc); 478 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 479 uh = umtxq_queue_lookup(&uq->uq_key, q); 480 if (uh != NULL) { 481 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 482 } else { 483 uh = uq->uq_spare_queue; 484 uh->key = uq->uq_key; 485 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 486 #ifdef UMTX_PROFILING 487 uc->length++; 488 if (uc->length > uc->max_length) { 489 uc->max_length = uc->length; 490 if (uc->max_length > max_length) 491 max_length = uc->max_length; 492 } 493 #endif 494 } 495 uq->uq_spare_queue = NULL; 496 497 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 498 uh->length++; 499 uq->uq_flags |= UQF_UMTXQ; 500 uq->uq_cur_queue = uh; 501 return; 502 } 503 504 void 505 umtxq_remove_queue(struct umtx_q *uq, int q) 506 { 507 struct umtxq_chain *uc; 508 struct umtxq_queue *uh; 509 510 uc = umtxq_getchain(&uq->uq_key); 511 UMTXQ_LOCKED_ASSERT(uc); 512 if (uq->uq_flags & UQF_UMTXQ) { 513 uh = uq->uq_cur_queue; 514 TAILQ_REMOVE(&uh->head, uq, uq_link); 515 uh->length--; 516 uq->uq_flags &= ~UQF_UMTXQ; 517 if (TAILQ_EMPTY(&uh->head)) { 518 KASSERT(uh->length == 0, 519 ("inconsistent umtxq_queue length")); 520 #ifdef UMTX_PROFILING 521 uc->length--; 522 #endif 523 LIST_REMOVE(uh, link); 524 } else { 525 uh = LIST_FIRST(&uc->uc_spare_queue); 526 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 527 LIST_REMOVE(uh, link); 528 } 529 uq->uq_spare_queue = uh; 530 uq->uq_cur_queue = NULL; 531 } 532 } 533 534 /* 535 * Check if there are multiple waiters 536 */ 537 int 538 umtxq_count(struct umtx_key *key) 539 { 540 struct umtxq_queue *uh; 541 542 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 543 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 544 if (uh != NULL) 545 return (uh->length); 546 return (0); 547 } 548 549 /* 550 * Check if there are multiple PI waiters and returns first 551 * waiter. 552 */ 553 static int 554 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 555 { 556 struct umtxq_queue *uh; 557 558 *first = NULL; 559 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 560 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 561 if (uh != NULL) { 562 *first = TAILQ_FIRST(&uh->head); 563 return (uh->length); 564 } 565 return (0); 566 } 567 568 /* 569 * Wake up threads waiting on an userland object by a bit mask. 570 */ 571 int 572 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 573 { 574 struct umtxq_queue *uh; 575 struct umtx_q *uq, *uq_temp; 576 int ret; 577 578 ret = 0; 579 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 580 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 581 if (uh == NULL) 582 return (0); 583 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 584 if ((uq->uq_bitset & bitset) == 0) 585 continue; 586 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 587 wakeup_one(uq); 588 if (++ret >= n_wake) 589 break; 590 } 591 return (ret); 592 } 593 594 /* 595 * Wake up threads waiting on an userland object. 596 */ 597 598 static int 599 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 600 { 601 struct umtxq_queue *uh; 602 struct umtx_q *uq; 603 int ret; 604 605 ret = 0; 606 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 607 uh = umtxq_queue_lookup(key, q); 608 if (uh != NULL) { 609 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 610 umtxq_remove_queue(uq, q); 611 wakeup(uq); 612 if (++ret >= n_wake) 613 return (ret); 614 } 615 } 616 return (ret); 617 } 618 619 /* 620 * Wake up specified thread. 621 */ 622 static inline void 623 umtxq_signal_thread(struct umtx_q *uq) 624 { 625 626 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 627 umtxq_remove(uq); 628 wakeup(uq); 629 } 630 631 /* 632 * Wake up a maximum of n_wake threads that are waiting on an userland 633 * object identified by key. The remaining threads are removed from queue 634 * identified by key and added to the queue identified by key2 (requeued). 635 * The n_requeue specifies an upper limit on the number of threads that 636 * are requeued to the second queue. 637 */ 638 int 639 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 640 int n_requeue) 641 { 642 struct umtxq_queue *uh; 643 struct umtx_q *uq, *uq_temp; 644 int ret; 645 646 ret = 0; 647 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 648 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 649 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 650 if (uh == NULL) 651 return (0); 652 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 653 if (++ret <= n_wake) { 654 umtxq_remove(uq); 655 wakeup_one(uq); 656 } else { 657 umtxq_remove(uq); 658 uq->uq_key = *key2; 659 umtxq_insert(uq); 660 if (ret - n_wake == n_requeue) 661 break; 662 } 663 } 664 return (ret); 665 } 666 667 static inline int 668 tstohz(const struct timespec *tsp) 669 { 670 struct timeval tv; 671 672 TIMESPEC_TO_TIMEVAL(&tv, tsp); 673 return tvtohz(&tv); 674 } 675 676 void 677 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 678 int absolute, const struct timespec *timeout) 679 { 680 681 timo->clockid = clockid; 682 if (!absolute) { 683 timo->is_abs_real = false; 684 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 685 timespecadd(&timo->cur, timeout, &timo->end); 686 } else { 687 timo->end = *timeout; 688 timo->is_abs_real = clockid == CLOCK_REALTIME || 689 clockid == CLOCK_REALTIME_FAST || 690 clockid == CLOCK_REALTIME_PRECISE || 691 clockid == CLOCK_SECOND; 692 } 693 } 694 695 static void 696 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 697 const struct _umtx_time *umtxtime) 698 { 699 700 umtx_abs_timeout_init(timo, umtxtime->_clockid, 701 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 702 } 703 704 static int 705 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 706 int *flags) 707 { 708 struct bintime bt, bbt; 709 struct timespec tts; 710 711 switch (timo->clockid) { 712 713 /* Clocks that can be converted into absolute time. */ 714 case CLOCK_REALTIME: 715 case CLOCK_REALTIME_PRECISE: 716 case CLOCK_REALTIME_FAST: 717 case CLOCK_MONOTONIC: 718 case CLOCK_MONOTONIC_PRECISE: 719 case CLOCK_MONOTONIC_FAST: 720 case CLOCK_UPTIME: 721 case CLOCK_UPTIME_PRECISE: 722 case CLOCK_UPTIME_FAST: 723 case CLOCK_SECOND: 724 timespec2bintime(&timo->end, &bt); 725 switch (timo->clockid) { 726 case CLOCK_REALTIME: 727 case CLOCK_REALTIME_PRECISE: 728 case CLOCK_REALTIME_FAST: 729 case CLOCK_SECOND: 730 getboottimebin(&bbt); 731 bintime_sub(&bt, &bbt); 732 break; 733 } 734 if (bt.sec < 0) 735 return (ETIMEDOUT); 736 if (bt.sec >= (SBT_MAX >> 32)) { 737 *sbt = 0; 738 *flags = 0; 739 return (0); 740 } 741 *sbt = bttosbt(bt); 742 switch (timo->clockid) { 743 case CLOCK_REALTIME_FAST: 744 case CLOCK_MONOTONIC_FAST: 745 case CLOCK_UPTIME_FAST: 746 *sbt += tc_tick_sbt; 747 break; 748 case CLOCK_SECOND: 749 *sbt += SBT_1S; 750 break; 751 } 752 *flags = C_ABSOLUTE; 753 return (0); 754 755 /* Clocks that has to be periodically polled. */ 756 case CLOCK_VIRTUAL: 757 case CLOCK_PROF: 758 case CLOCK_THREAD_CPUTIME_ID: 759 case CLOCK_PROCESS_CPUTIME_ID: 760 default: 761 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 762 if (timespeccmp(&timo->end, &timo->cur, <=)) 763 return (ETIMEDOUT); 764 timespecsub(&timo->end, &timo->cur, &tts); 765 *sbt = tick_sbt * tstohz(&tts); 766 *flags = C_HARDCLOCK; 767 return (0); 768 } 769 } 770 771 static uint32_t 772 umtx_unlock_val(uint32_t flags, bool rb) 773 { 774 775 if (rb) 776 return (UMUTEX_RB_OWNERDEAD); 777 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 778 return (UMUTEX_RB_NOTRECOV); 779 else 780 return (UMUTEX_UNOWNED); 781 782 } 783 784 /* 785 * Put thread into sleep state, before sleeping, check if 786 * thread was removed from umtx queue. 787 */ 788 int 789 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 790 struct umtx_abs_timeout *timo) 791 { 792 struct umtxq_chain *uc; 793 sbintime_t sbt = 0; 794 int error, flags = 0; 795 796 uc = umtxq_getchain(&uq->uq_key); 797 UMTXQ_LOCKED_ASSERT(uc); 798 for (;;) { 799 if (!(uq->uq_flags & UQF_UMTXQ)) { 800 error = 0; 801 break; 802 } 803 if (timo != NULL) { 804 if (timo->is_abs_real) 805 curthread->td_rtcgen = 806 atomic_load_acq_int(&rtc_generation); 807 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 808 if (error != 0) 809 break; 810 } 811 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 812 sbt, 0, flags); 813 uc = umtxq_getchain(&uq->uq_key); 814 mtx_lock(&uc->uc_lock); 815 if (error == EINTR || error == ERESTART) 816 break; 817 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 818 error = ETIMEDOUT; 819 break; 820 } 821 } 822 823 curthread->td_rtcgen = 0; 824 return (error); 825 } 826 827 /* 828 * Convert userspace address into unique logical address. 829 */ 830 int 831 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 832 { 833 struct thread *td = curthread; 834 vm_map_t map; 835 vm_map_entry_t entry; 836 vm_pindex_t pindex; 837 vm_prot_t prot; 838 boolean_t wired; 839 840 key->type = type; 841 if (share == THREAD_SHARE) { 842 key->shared = 0; 843 key->info.private.vs = td->td_proc->p_vmspace; 844 key->info.private.addr = (uintptr_t)addr; 845 } else { 846 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 847 map = &td->td_proc->p_vmspace->vm_map; 848 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 849 &entry, &key->info.shared.object, &pindex, &prot, 850 &wired) != KERN_SUCCESS) { 851 return (EFAULT); 852 } 853 854 if ((share == PROCESS_SHARE) || 855 (share == AUTO_SHARE && 856 VM_INHERIT_SHARE == entry->inheritance)) { 857 key->shared = 1; 858 key->info.shared.offset = (vm_offset_t)addr - 859 entry->start + entry->offset; 860 vm_object_reference(key->info.shared.object); 861 } else { 862 key->shared = 0; 863 key->info.private.vs = td->td_proc->p_vmspace; 864 key->info.private.addr = (uintptr_t)addr; 865 } 866 vm_map_lookup_done(map, entry); 867 } 868 869 umtxq_hash(key); 870 return (0); 871 } 872 873 /* 874 * Release key. 875 */ 876 void 877 umtx_key_release(struct umtx_key *key) 878 { 879 if (key->shared) 880 vm_object_deallocate(key->info.shared.object); 881 } 882 883 #ifdef COMPAT_FREEBSD10 884 /* 885 * Lock a umtx object. 886 */ 887 static int 888 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 889 const struct timespec *timeout) 890 { 891 struct umtx_abs_timeout timo; 892 struct umtx_q *uq; 893 u_long owner; 894 u_long old; 895 int error = 0; 896 897 uq = td->td_umtxq; 898 if (timeout != NULL) 899 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 900 901 /* 902 * Care must be exercised when dealing with umtx structure. It 903 * can fault on any access. 904 */ 905 for (;;) { 906 /* 907 * Try the uncontested case. This should be done in userland. 908 */ 909 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 910 911 /* The acquire succeeded. */ 912 if (owner == UMTX_UNOWNED) 913 return (0); 914 915 /* The address was invalid. */ 916 if (owner == -1) 917 return (EFAULT); 918 919 /* If no one owns it but it is contested try to acquire it. */ 920 if (owner == UMTX_CONTESTED) { 921 owner = casuword(&umtx->u_owner, 922 UMTX_CONTESTED, id | UMTX_CONTESTED); 923 924 if (owner == UMTX_CONTESTED) 925 return (0); 926 927 /* The address was invalid. */ 928 if (owner == -1) 929 return (EFAULT); 930 931 error = thread_check_susp(td, false); 932 if (error != 0) 933 break; 934 935 /* If this failed the lock has changed, restart. */ 936 continue; 937 } 938 939 /* 940 * If we caught a signal, we have retried and now 941 * exit immediately. 942 */ 943 if (error != 0) 944 break; 945 946 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 947 AUTO_SHARE, &uq->uq_key)) != 0) 948 return (error); 949 950 umtxq_lock(&uq->uq_key); 951 umtxq_busy(&uq->uq_key); 952 umtxq_insert(uq); 953 umtxq_unbusy(&uq->uq_key); 954 umtxq_unlock(&uq->uq_key); 955 956 /* 957 * Set the contested bit so that a release in user space 958 * knows to use the system call for unlock. If this fails 959 * either some one else has acquired the lock or it has been 960 * released. 961 */ 962 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 963 964 /* The address was invalid. */ 965 if (old == -1) { 966 umtxq_lock(&uq->uq_key); 967 umtxq_remove(uq); 968 umtxq_unlock(&uq->uq_key); 969 umtx_key_release(&uq->uq_key); 970 return (EFAULT); 971 } 972 973 /* 974 * We set the contested bit, sleep. Otherwise the lock changed 975 * and we need to retry or we lost a race to the thread 976 * unlocking the umtx. 977 */ 978 umtxq_lock(&uq->uq_key); 979 if (old == owner) 980 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 981 &timo); 982 umtxq_remove(uq); 983 umtxq_unlock(&uq->uq_key); 984 umtx_key_release(&uq->uq_key); 985 986 if (error == 0) 987 error = thread_check_susp(td, false); 988 } 989 990 if (timeout == NULL) { 991 /* Mutex locking is restarted if it is interrupted. */ 992 if (error == EINTR) 993 error = ERESTART; 994 } else { 995 /* Timed-locking is not restarted. */ 996 if (error == ERESTART) 997 error = EINTR; 998 } 999 return (error); 1000 } 1001 1002 /* 1003 * Unlock a umtx object. 1004 */ 1005 static int 1006 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1007 { 1008 struct umtx_key key; 1009 u_long owner; 1010 u_long old; 1011 int error; 1012 int count; 1013 1014 /* 1015 * Make sure we own this mtx. 1016 */ 1017 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1018 if (owner == -1) 1019 return (EFAULT); 1020 1021 if ((owner & ~UMTX_CONTESTED) != id) 1022 return (EPERM); 1023 1024 /* This should be done in userland */ 1025 if ((owner & UMTX_CONTESTED) == 0) { 1026 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1027 if (old == -1) 1028 return (EFAULT); 1029 if (old == owner) 1030 return (0); 1031 owner = old; 1032 } 1033 1034 /* We should only ever be in here for contested locks */ 1035 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1036 &key)) != 0) 1037 return (error); 1038 1039 umtxq_lock(&key); 1040 umtxq_busy(&key); 1041 count = umtxq_count(&key); 1042 umtxq_unlock(&key); 1043 1044 /* 1045 * When unlocking the umtx, it must be marked as unowned if 1046 * there is zero or one thread only waiting for it. 1047 * Otherwise, it must be marked as contested. 1048 */ 1049 old = casuword(&umtx->u_owner, owner, 1050 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1051 umtxq_lock(&key); 1052 umtxq_signal(&key,1); 1053 umtxq_unbusy(&key); 1054 umtxq_unlock(&key); 1055 umtx_key_release(&key); 1056 if (old == -1) 1057 return (EFAULT); 1058 if (old != owner) 1059 return (EINVAL); 1060 return (0); 1061 } 1062 1063 #ifdef COMPAT_FREEBSD32 1064 1065 /* 1066 * Lock a umtx object. 1067 */ 1068 static int 1069 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1070 const struct timespec *timeout) 1071 { 1072 struct umtx_abs_timeout timo; 1073 struct umtx_q *uq; 1074 uint32_t owner; 1075 uint32_t old; 1076 int error = 0; 1077 1078 uq = td->td_umtxq; 1079 1080 if (timeout != NULL) 1081 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1082 1083 /* 1084 * Care must be exercised when dealing with umtx structure. It 1085 * can fault on any access. 1086 */ 1087 for (;;) { 1088 /* 1089 * Try the uncontested case. This should be done in userland. 1090 */ 1091 owner = casuword32(m, UMUTEX_UNOWNED, id); 1092 1093 /* The acquire succeeded. */ 1094 if (owner == UMUTEX_UNOWNED) 1095 return (0); 1096 1097 /* The address was invalid. */ 1098 if (owner == -1) 1099 return (EFAULT); 1100 1101 /* If no one owns it but it is contested try to acquire it. */ 1102 if (owner == UMUTEX_CONTESTED) { 1103 owner = casuword32(m, 1104 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1105 if (owner == UMUTEX_CONTESTED) 1106 return (0); 1107 1108 /* The address was invalid. */ 1109 if (owner == -1) 1110 return (EFAULT); 1111 1112 error = thread_check_susp(td, false); 1113 if (error != 0) 1114 break; 1115 1116 /* If this failed the lock has changed, restart. */ 1117 continue; 1118 } 1119 1120 /* 1121 * If we caught a signal, we have retried and now 1122 * exit immediately. 1123 */ 1124 if (error != 0) 1125 return (error); 1126 1127 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1128 AUTO_SHARE, &uq->uq_key)) != 0) 1129 return (error); 1130 1131 umtxq_lock(&uq->uq_key); 1132 umtxq_busy(&uq->uq_key); 1133 umtxq_insert(uq); 1134 umtxq_unbusy(&uq->uq_key); 1135 umtxq_unlock(&uq->uq_key); 1136 1137 /* 1138 * Set the contested bit so that a release in user space 1139 * knows to use the system call for unlock. If this fails 1140 * either some one else has acquired the lock or it has been 1141 * released. 1142 */ 1143 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1144 1145 /* The address was invalid. */ 1146 if (old == -1) { 1147 umtxq_lock(&uq->uq_key); 1148 umtxq_remove(uq); 1149 umtxq_unlock(&uq->uq_key); 1150 umtx_key_release(&uq->uq_key); 1151 return (EFAULT); 1152 } 1153 1154 /* 1155 * We set the contested bit, sleep. Otherwise the lock changed 1156 * and we need to retry or we lost a race to the thread 1157 * unlocking the umtx. 1158 */ 1159 umtxq_lock(&uq->uq_key); 1160 if (old == owner) 1161 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1162 NULL : &timo); 1163 umtxq_remove(uq); 1164 umtxq_unlock(&uq->uq_key); 1165 umtx_key_release(&uq->uq_key); 1166 1167 if (error == 0) 1168 error = thread_check_susp(td, false); 1169 } 1170 1171 if (timeout == NULL) { 1172 /* Mutex locking is restarted if it is interrupted. */ 1173 if (error == EINTR) 1174 error = ERESTART; 1175 } else { 1176 /* Timed-locking is not restarted. */ 1177 if (error == ERESTART) 1178 error = EINTR; 1179 } 1180 return (error); 1181 } 1182 1183 /* 1184 * Unlock a umtx object. 1185 */ 1186 static int 1187 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1188 { 1189 struct umtx_key key; 1190 uint32_t owner; 1191 uint32_t old; 1192 int error; 1193 int count; 1194 1195 /* 1196 * Make sure we own this mtx. 1197 */ 1198 owner = fuword32(m); 1199 if (owner == -1) 1200 return (EFAULT); 1201 1202 if ((owner & ~UMUTEX_CONTESTED) != id) 1203 return (EPERM); 1204 1205 /* This should be done in userland */ 1206 if ((owner & UMUTEX_CONTESTED) == 0) { 1207 old = casuword32(m, owner, UMUTEX_UNOWNED); 1208 if (old == -1) 1209 return (EFAULT); 1210 if (old == owner) 1211 return (0); 1212 owner = old; 1213 } 1214 1215 /* We should only ever be in here for contested locks */ 1216 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1217 &key)) != 0) 1218 return (error); 1219 1220 umtxq_lock(&key); 1221 umtxq_busy(&key); 1222 count = umtxq_count(&key); 1223 umtxq_unlock(&key); 1224 1225 /* 1226 * When unlocking the umtx, it must be marked as unowned if 1227 * there is zero or one thread only waiting for it. 1228 * Otherwise, it must be marked as contested. 1229 */ 1230 old = casuword32(m, owner, 1231 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1232 umtxq_lock(&key); 1233 umtxq_signal(&key,1); 1234 umtxq_unbusy(&key); 1235 umtxq_unlock(&key); 1236 umtx_key_release(&key); 1237 if (old == -1) 1238 return (EFAULT); 1239 if (old != owner) 1240 return (EINVAL); 1241 return (0); 1242 } 1243 #endif /* COMPAT_FREEBSD32 */ 1244 #endif /* COMPAT_FREEBSD10 */ 1245 1246 /* 1247 * Fetch and compare value, sleep on the address if value is not changed. 1248 */ 1249 static int 1250 do_wait(struct thread *td, void *addr, u_long id, 1251 struct _umtx_time *timeout, int compat32, int is_private) 1252 { 1253 struct umtx_abs_timeout timo; 1254 struct umtx_q *uq; 1255 u_long tmp; 1256 uint32_t tmp32; 1257 int error = 0; 1258 1259 uq = td->td_umtxq; 1260 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1261 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1262 return (error); 1263 1264 if (timeout != NULL) 1265 umtx_abs_timeout_init2(&timo, timeout); 1266 1267 umtxq_lock(&uq->uq_key); 1268 umtxq_insert(uq); 1269 umtxq_unlock(&uq->uq_key); 1270 if (compat32 == 0) { 1271 error = fueword(addr, &tmp); 1272 if (error != 0) 1273 error = EFAULT; 1274 } else { 1275 error = fueword32(addr, &tmp32); 1276 if (error == 0) 1277 tmp = tmp32; 1278 else 1279 error = EFAULT; 1280 } 1281 umtxq_lock(&uq->uq_key); 1282 if (error == 0) { 1283 if (tmp == id) 1284 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1285 NULL : &timo); 1286 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1287 error = 0; 1288 else 1289 umtxq_remove(uq); 1290 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1291 umtxq_remove(uq); 1292 } 1293 umtxq_unlock(&uq->uq_key); 1294 umtx_key_release(&uq->uq_key); 1295 if (error == ERESTART) 1296 error = EINTR; 1297 return (error); 1298 } 1299 1300 /* 1301 * Wake up threads sleeping on the specified address. 1302 */ 1303 int 1304 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1305 { 1306 struct umtx_key key; 1307 int ret; 1308 1309 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1310 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1311 return (ret); 1312 umtxq_lock(&key); 1313 umtxq_signal(&key, n_wake); 1314 umtxq_unlock(&key); 1315 umtx_key_release(&key); 1316 return (0); 1317 } 1318 1319 /* 1320 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1321 */ 1322 static int 1323 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1324 struct _umtx_time *timeout, int mode) 1325 { 1326 struct umtx_abs_timeout timo; 1327 struct umtx_q *uq; 1328 uint32_t owner, old, id; 1329 int error, rv; 1330 1331 id = td->td_tid; 1332 uq = td->td_umtxq; 1333 error = 0; 1334 if (timeout != NULL) 1335 umtx_abs_timeout_init2(&timo, timeout); 1336 1337 /* 1338 * Care must be exercised when dealing with umtx structure. It 1339 * can fault on any access. 1340 */ 1341 for (;;) { 1342 rv = fueword32(&m->m_owner, &owner); 1343 if (rv == -1) 1344 return (EFAULT); 1345 if (mode == _UMUTEX_WAIT) { 1346 if (owner == UMUTEX_UNOWNED || 1347 owner == UMUTEX_CONTESTED || 1348 owner == UMUTEX_RB_OWNERDEAD || 1349 owner == UMUTEX_RB_NOTRECOV) 1350 return (0); 1351 } else { 1352 /* 1353 * Robust mutex terminated. Kernel duty is to 1354 * return EOWNERDEAD to the userspace. The 1355 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1356 * by the common userspace code. 1357 */ 1358 if (owner == UMUTEX_RB_OWNERDEAD) { 1359 rv = casueword32(&m->m_owner, 1360 UMUTEX_RB_OWNERDEAD, &owner, 1361 id | UMUTEX_CONTESTED); 1362 if (rv == -1) 1363 return (EFAULT); 1364 if (rv == 0) { 1365 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1366 return (EOWNERDEAD); /* success */ 1367 } 1368 MPASS(rv == 1); 1369 rv = thread_check_susp(td, false); 1370 if (rv != 0) 1371 return (rv); 1372 continue; 1373 } 1374 if (owner == UMUTEX_RB_NOTRECOV) 1375 return (ENOTRECOVERABLE); 1376 1377 /* 1378 * Try the uncontested case. This should be 1379 * done in userland. 1380 */ 1381 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1382 &owner, id); 1383 /* The address was invalid. */ 1384 if (rv == -1) 1385 return (EFAULT); 1386 1387 /* The acquire succeeded. */ 1388 if (rv == 0) { 1389 MPASS(owner == UMUTEX_UNOWNED); 1390 return (0); 1391 } 1392 1393 /* 1394 * If no one owns it but it is contested try 1395 * to acquire it. 1396 */ 1397 MPASS(rv == 1); 1398 if (owner == UMUTEX_CONTESTED) { 1399 rv = casueword32(&m->m_owner, 1400 UMUTEX_CONTESTED, &owner, 1401 id | UMUTEX_CONTESTED); 1402 /* The address was invalid. */ 1403 if (rv == -1) 1404 return (EFAULT); 1405 if (rv == 0) { 1406 MPASS(owner == UMUTEX_CONTESTED); 1407 return (0); 1408 } 1409 if (rv == 1) { 1410 rv = thread_check_susp(td, false); 1411 if (rv != 0) 1412 return (rv); 1413 } 1414 1415 /* 1416 * If this failed the lock has 1417 * changed, restart. 1418 */ 1419 continue; 1420 } 1421 1422 /* rv == 1 but not contested, likely store failure */ 1423 rv = thread_check_susp(td, false); 1424 if (rv != 0) 1425 return (rv); 1426 } 1427 1428 if (mode == _UMUTEX_TRY) 1429 return (EBUSY); 1430 1431 /* 1432 * If we caught a signal, we have retried and now 1433 * exit immediately. 1434 */ 1435 if (error != 0) 1436 return (error); 1437 1438 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1439 GET_SHARE(flags), &uq->uq_key)) != 0) 1440 return (error); 1441 1442 umtxq_lock(&uq->uq_key); 1443 umtxq_busy(&uq->uq_key); 1444 umtxq_insert(uq); 1445 umtxq_unlock(&uq->uq_key); 1446 1447 /* 1448 * Set the contested bit so that a release in user space 1449 * knows to use the system call for unlock. If this fails 1450 * either some one else has acquired the lock or it has been 1451 * released. 1452 */ 1453 rv = casueword32(&m->m_owner, owner, &old, 1454 owner | UMUTEX_CONTESTED); 1455 1456 /* The address was invalid or casueword failed to store. */ 1457 if (rv == -1 || rv == 1) { 1458 umtxq_lock(&uq->uq_key); 1459 umtxq_remove(uq); 1460 umtxq_unbusy(&uq->uq_key); 1461 umtxq_unlock(&uq->uq_key); 1462 umtx_key_release(&uq->uq_key); 1463 if (rv == -1) 1464 return (EFAULT); 1465 if (rv == 1) { 1466 rv = thread_check_susp(td, false); 1467 if (rv != 0) 1468 return (rv); 1469 } 1470 continue; 1471 } 1472 1473 /* 1474 * We set the contested bit, sleep. Otherwise the lock changed 1475 * and we need to retry or we lost a race to the thread 1476 * unlocking the umtx. 1477 */ 1478 umtxq_lock(&uq->uq_key); 1479 umtxq_unbusy(&uq->uq_key); 1480 MPASS(old == owner); 1481 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1482 NULL : &timo); 1483 umtxq_remove(uq); 1484 umtxq_unlock(&uq->uq_key); 1485 umtx_key_release(&uq->uq_key); 1486 1487 if (error == 0) 1488 error = thread_check_susp(td, false); 1489 } 1490 1491 return (0); 1492 } 1493 1494 /* 1495 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1496 */ 1497 static int 1498 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1499 { 1500 struct umtx_key key; 1501 uint32_t owner, old, id, newlock; 1502 int error, count; 1503 1504 id = td->td_tid; 1505 1506 again: 1507 /* 1508 * Make sure we own this mtx. 1509 */ 1510 error = fueword32(&m->m_owner, &owner); 1511 if (error == -1) 1512 return (EFAULT); 1513 1514 if ((owner & ~UMUTEX_CONTESTED) != id) 1515 return (EPERM); 1516 1517 newlock = umtx_unlock_val(flags, rb); 1518 if ((owner & UMUTEX_CONTESTED) == 0) { 1519 error = casueword32(&m->m_owner, owner, &old, newlock); 1520 if (error == -1) 1521 return (EFAULT); 1522 if (error == 1) { 1523 error = thread_check_susp(td, false); 1524 if (error != 0) 1525 return (error); 1526 goto again; 1527 } 1528 MPASS(old == owner); 1529 return (0); 1530 } 1531 1532 /* We should only ever be in here for contested locks */ 1533 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1534 &key)) != 0) 1535 return (error); 1536 1537 umtxq_lock(&key); 1538 umtxq_busy(&key); 1539 count = umtxq_count(&key); 1540 umtxq_unlock(&key); 1541 1542 /* 1543 * When unlocking the umtx, it must be marked as unowned if 1544 * there is zero or one thread only waiting for it. 1545 * Otherwise, it must be marked as contested. 1546 */ 1547 if (count > 1) 1548 newlock |= UMUTEX_CONTESTED; 1549 error = casueword32(&m->m_owner, owner, &old, newlock); 1550 umtxq_lock(&key); 1551 umtxq_signal(&key, 1); 1552 umtxq_unbusy(&key); 1553 umtxq_unlock(&key); 1554 umtx_key_release(&key); 1555 if (error == -1) 1556 return (EFAULT); 1557 if (error == 1) { 1558 if (old != owner) 1559 return (EINVAL); 1560 error = thread_check_susp(td, false); 1561 if (error != 0) 1562 return (error); 1563 goto again; 1564 } 1565 return (0); 1566 } 1567 1568 /* 1569 * Check if the mutex is available and wake up a waiter, 1570 * only for simple mutex. 1571 */ 1572 static int 1573 do_wake_umutex(struct thread *td, struct umutex *m) 1574 { 1575 struct umtx_key key; 1576 uint32_t owner; 1577 uint32_t flags; 1578 int error; 1579 int count; 1580 1581 again: 1582 error = fueword32(&m->m_owner, &owner); 1583 if (error == -1) 1584 return (EFAULT); 1585 1586 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1587 owner != UMUTEX_RB_NOTRECOV) 1588 return (0); 1589 1590 error = fueword32(&m->m_flags, &flags); 1591 if (error == -1) 1592 return (EFAULT); 1593 1594 /* We should only ever be in here for contested locks */ 1595 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1596 &key)) != 0) 1597 return (error); 1598 1599 umtxq_lock(&key); 1600 umtxq_busy(&key); 1601 count = umtxq_count(&key); 1602 umtxq_unlock(&key); 1603 1604 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1605 owner != UMUTEX_RB_NOTRECOV) { 1606 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1607 UMUTEX_UNOWNED); 1608 if (error == -1) { 1609 error = EFAULT; 1610 } else if (error == 1) { 1611 umtxq_lock(&key); 1612 umtxq_unbusy(&key); 1613 umtxq_unlock(&key); 1614 umtx_key_release(&key); 1615 error = thread_check_susp(td, false); 1616 if (error != 0) 1617 return (error); 1618 goto again; 1619 } 1620 } 1621 1622 umtxq_lock(&key); 1623 if (error == 0 && count != 0) { 1624 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1625 owner == UMUTEX_RB_OWNERDEAD || 1626 owner == UMUTEX_RB_NOTRECOV); 1627 umtxq_signal(&key, 1); 1628 } 1629 umtxq_unbusy(&key); 1630 umtxq_unlock(&key); 1631 umtx_key_release(&key); 1632 return (error); 1633 } 1634 1635 /* 1636 * Check if the mutex has waiters and tries to fix contention bit. 1637 */ 1638 static int 1639 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1640 { 1641 struct umtx_key key; 1642 uint32_t owner, old; 1643 int type; 1644 int error; 1645 int count; 1646 1647 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1648 UMUTEX_ROBUST)) { 1649 case 0: 1650 case UMUTEX_ROBUST: 1651 type = TYPE_NORMAL_UMUTEX; 1652 break; 1653 case UMUTEX_PRIO_INHERIT: 1654 type = TYPE_PI_UMUTEX; 1655 break; 1656 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1657 type = TYPE_PI_ROBUST_UMUTEX; 1658 break; 1659 case UMUTEX_PRIO_PROTECT: 1660 type = TYPE_PP_UMUTEX; 1661 break; 1662 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1663 type = TYPE_PP_ROBUST_UMUTEX; 1664 break; 1665 default: 1666 return (EINVAL); 1667 } 1668 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1669 return (error); 1670 1671 owner = 0; 1672 umtxq_lock(&key); 1673 umtxq_busy(&key); 1674 count = umtxq_count(&key); 1675 umtxq_unlock(&key); 1676 1677 error = fueword32(&m->m_owner, &owner); 1678 if (error == -1) 1679 error = EFAULT; 1680 1681 /* 1682 * Only repair contention bit if there is a waiter, this means 1683 * the mutex is still being referenced by userland code, 1684 * otherwise don't update any memory. 1685 */ 1686 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1687 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1688 error = casueword32(&m->m_owner, owner, &old, 1689 owner | UMUTEX_CONTESTED); 1690 if (error == -1) { 1691 error = EFAULT; 1692 break; 1693 } 1694 if (error == 0) { 1695 MPASS(old == owner); 1696 break; 1697 } 1698 owner = old; 1699 error = thread_check_susp(td, false); 1700 } 1701 1702 umtxq_lock(&key); 1703 if (error == EFAULT) { 1704 umtxq_signal(&key, INT_MAX); 1705 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1706 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1707 umtxq_signal(&key, 1); 1708 umtxq_unbusy(&key); 1709 umtxq_unlock(&key); 1710 umtx_key_release(&key); 1711 return (error); 1712 } 1713 1714 struct umtx_pi * 1715 umtx_pi_alloc(int flags) 1716 { 1717 struct umtx_pi *pi; 1718 1719 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1720 TAILQ_INIT(&pi->pi_blocked); 1721 atomic_add_int(&umtx_pi_allocated, 1); 1722 return (pi); 1723 } 1724 1725 void 1726 umtx_pi_free(struct umtx_pi *pi) 1727 { 1728 uma_zfree(umtx_pi_zone, pi); 1729 atomic_add_int(&umtx_pi_allocated, -1); 1730 } 1731 1732 /* 1733 * Adjust the thread's position on a pi_state after its priority has been 1734 * changed. 1735 */ 1736 static int 1737 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1738 { 1739 struct umtx_q *uq, *uq1, *uq2; 1740 struct thread *td1; 1741 1742 mtx_assert(&umtx_lock, MA_OWNED); 1743 if (pi == NULL) 1744 return (0); 1745 1746 uq = td->td_umtxq; 1747 1748 /* 1749 * Check if the thread needs to be moved on the blocked chain. 1750 * It needs to be moved if either its priority is lower than 1751 * the previous thread or higher than the next thread. 1752 */ 1753 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1754 uq2 = TAILQ_NEXT(uq, uq_lockq); 1755 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1756 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1757 /* 1758 * Remove thread from blocked chain and determine where 1759 * it should be moved to. 1760 */ 1761 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1762 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1763 td1 = uq1->uq_thread; 1764 MPASS(td1->td_proc->p_magic == P_MAGIC); 1765 if (UPRI(td1) > UPRI(td)) 1766 break; 1767 } 1768 1769 if (uq1 == NULL) 1770 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1771 else 1772 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1773 } 1774 return (1); 1775 } 1776 1777 static struct umtx_pi * 1778 umtx_pi_next(struct umtx_pi *pi) 1779 { 1780 struct umtx_q *uq_owner; 1781 1782 if (pi->pi_owner == NULL) 1783 return (NULL); 1784 uq_owner = pi->pi_owner->td_umtxq; 1785 if (uq_owner == NULL) 1786 return (NULL); 1787 return (uq_owner->uq_pi_blocked); 1788 } 1789 1790 /* 1791 * Floyd's Cycle-Finding Algorithm. 1792 */ 1793 static bool 1794 umtx_pi_check_loop(struct umtx_pi *pi) 1795 { 1796 struct umtx_pi *pi1; /* fast iterator */ 1797 1798 mtx_assert(&umtx_lock, MA_OWNED); 1799 if (pi == NULL) 1800 return (false); 1801 pi1 = pi; 1802 for (;;) { 1803 pi = umtx_pi_next(pi); 1804 if (pi == NULL) 1805 break; 1806 pi1 = umtx_pi_next(pi1); 1807 if (pi1 == NULL) 1808 break; 1809 pi1 = umtx_pi_next(pi1); 1810 if (pi1 == NULL) 1811 break; 1812 if (pi == pi1) 1813 return (true); 1814 } 1815 return (false); 1816 } 1817 1818 /* 1819 * Propagate priority when a thread is blocked on POSIX 1820 * PI mutex. 1821 */ 1822 static void 1823 umtx_propagate_priority(struct thread *td) 1824 { 1825 struct umtx_q *uq; 1826 struct umtx_pi *pi; 1827 int pri; 1828 1829 mtx_assert(&umtx_lock, MA_OWNED); 1830 pri = UPRI(td); 1831 uq = td->td_umtxq; 1832 pi = uq->uq_pi_blocked; 1833 if (pi == NULL) 1834 return; 1835 if (umtx_pi_check_loop(pi)) 1836 return; 1837 1838 for (;;) { 1839 td = pi->pi_owner; 1840 if (td == NULL || td == curthread) 1841 return; 1842 1843 MPASS(td->td_proc != NULL); 1844 MPASS(td->td_proc->p_magic == P_MAGIC); 1845 1846 thread_lock(td); 1847 if (td->td_lend_user_pri > pri) 1848 sched_lend_user_prio(td, pri); 1849 else { 1850 thread_unlock(td); 1851 break; 1852 } 1853 thread_unlock(td); 1854 1855 /* 1856 * Pick up the lock that td is blocked on. 1857 */ 1858 uq = td->td_umtxq; 1859 pi = uq->uq_pi_blocked; 1860 if (pi == NULL) 1861 break; 1862 /* Resort td on the list if needed. */ 1863 umtx_pi_adjust_thread(pi, td); 1864 } 1865 } 1866 1867 /* 1868 * Unpropagate priority for a PI mutex when a thread blocked on 1869 * it is interrupted by signal or resumed by others. 1870 */ 1871 static void 1872 umtx_repropagate_priority(struct umtx_pi *pi) 1873 { 1874 struct umtx_q *uq, *uq_owner; 1875 struct umtx_pi *pi2; 1876 int pri; 1877 1878 mtx_assert(&umtx_lock, MA_OWNED); 1879 1880 if (umtx_pi_check_loop(pi)) 1881 return; 1882 while (pi != NULL && pi->pi_owner != NULL) { 1883 pri = PRI_MAX; 1884 uq_owner = pi->pi_owner->td_umtxq; 1885 1886 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1887 uq = TAILQ_FIRST(&pi2->pi_blocked); 1888 if (uq != NULL) { 1889 if (pri > UPRI(uq->uq_thread)) 1890 pri = UPRI(uq->uq_thread); 1891 } 1892 } 1893 1894 if (pri > uq_owner->uq_inherited_pri) 1895 pri = uq_owner->uq_inherited_pri; 1896 thread_lock(pi->pi_owner); 1897 sched_lend_user_prio(pi->pi_owner, pri); 1898 thread_unlock(pi->pi_owner); 1899 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1900 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1901 } 1902 } 1903 1904 /* 1905 * Insert a PI mutex into owned list. 1906 */ 1907 static void 1908 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1909 { 1910 struct umtx_q *uq_owner; 1911 1912 uq_owner = owner->td_umtxq; 1913 mtx_assert(&umtx_lock, MA_OWNED); 1914 MPASS(pi->pi_owner == NULL); 1915 pi->pi_owner = owner; 1916 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1917 } 1918 1919 /* 1920 * Disown a PI mutex, and remove it from the owned list. 1921 */ 1922 static void 1923 umtx_pi_disown(struct umtx_pi *pi) 1924 { 1925 1926 mtx_assert(&umtx_lock, MA_OWNED); 1927 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1928 pi->pi_owner = NULL; 1929 } 1930 1931 /* 1932 * Claim ownership of a PI mutex. 1933 */ 1934 int 1935 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1936 { 1937 struct umtx_q *uq; 1938 int pri; 1939 1940 mtx_lock(&umtx_lock); 1941 if (pi->pi_owner == owner) { 1942 mtx_unlock(&umtx_lock); 1943 return (0); 1944 } 1945 1946 if (pi->pi_owner != NULL) { 1947 /* 1948 * userland may have already messed the mutex, sigh. 1949 */ 1950 mtx_unlock(&umtx_lock); 1951 return (EPERM); 1952 } 1953 umtx_pi_setowner(pi, owner); 1954 uq = TAILQ_FIRST(&pi->pi_blocked); 1955 if (uq != NULL) { 1956 pri = UPRI(uq->uq_thread); 1957 thread_lock(owner); 1958 if (pri < UPRI(owner)) 1959 sched_lend_user_prio(owner, pri); 1960 thread_unlock(owner); 1961 } 1962 mtx_unlock(&umtx_lock); 1963 return (0); 1964 } 1965 1966 /* 1967 * Adjust a thread's order position in its blocked PI mutex, 1968 * this may result new priority propagating process. 1969 */ 1970 void 1971 umtx_pi_adjust(struct thread *td, u_char oldpri) 1972 { 1973 struct umtx_q *uq; 1974 struct umtx_pi *pi; 1975 1976 uq = td->td_umtxq; 1977 mtx_lock(&umtx_lock); 1978 /* 1979 * Pick up the lock that td is blocked on. 1980 */ 1981 pi = uq->uq_pi_blocked; 1982 if (pi != NULL) { 1983 umtx_pi_adjust_thread(pi, td); 1984 umtx_repropagate_priority(pi); 1985 } 1986 mtx_unlock(&umtx_lock); 1987 } 1988 1989 /* 1990 * Sleep on a PI mutex. 1991 */ 1992 int 1993 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1994 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1995 { 1996 struct thread *td, *td1; 1997 struct umtx_q *uq1; 1998 int error, pri; 1999 #ifdef INVARIANTS 2000 struct umtxq_chain *uc; 2001 2002 uc = umtxq_getchain(&pi->pi_key); 2003 #endif 2004 error = 0; 2005 td = uq->uq_thread; 2006 KASSERT(td == curthread, ("inconsistent uq_thread")); 2007 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2008 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2009 umtxq_insert(uq); 2010 mtx_lock(&umtx_lock); 2011 if (pi->pi_owner == NULL) { 2012 mtx_unlock(&umtx_lock); 2013 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2014 mtx_lock(&umtx_lock); 2015 if (td1 != NULL) { 2016 if (pi->pi_owner == NULL) 2017 umtx_pi_setowner(pi, td1); 2018 PROC_UNLOCK(td1->td_proc); 2019 } 2020 } 2021 2022 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2023 pri = UPRI(uq1->uq_thread); 2024 if (pri > UPRI(td)) 2025 break; 2026 } 2027 2028 if (uq1 != NULL) 2029 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2030 else 2031 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2032 2033 uq->uq_pi_blocked = pi; 2034 thread_lock(td); 2035 td->td_flags |= TDF_UPIBLOCKED; 2036 thread_unlock(td); 2037 umtx_propagate_priority(td); 2038 mtx_unlock(&umtx_lock); 2039 umtxq_unbusy(&uq->uq_key); 2040 2041 error = umtxq_sleep(uq, wmesg, timo); 2042 umtxq_remove(uq); 2043 2044 mtx_lock(&umtx_lock); 2045 uq->uq_pi_blocked = NULL; 2046 thread_lock(td); 2047 td->td_flags &= ~TDF_UPIBLOCKED; 2048 thread_unlock(td); 2049 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2050 umtx_repropagate_priority(pi); 2051 mtx_unlock(&umtx_lock); 2052 umtxq_unlock(&uq->uq_key); 2053 2054 return (error); 2055 } 2056 2057 /* 2058 * Add reference count for a PI mutex. 2059 */ 2060 void 2061 umtx_pi_ref(struct umtx_pi *pi) 2062 { 2063 2064 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2065 pi->pi_refcount++; 2066 } 2067 2068 /* 2069 * Decrease reference count for a PI mutex, if the counter 2070 * is decreased to zero, its memory space is freed. 2071 */ 2072 void 2073 umtx_pi_unref(struct umtx_pi *pi) 2074 { 2075 struct umtxq_chain *uc; 2076 2077 uc = umtxq_getchain(&pi->pi_key); 2078 UMTXQ_LOCKED_ASSERT(uc); 2079 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2080 if (--pi->pi_refcount == 0) { 2081 mtx_lock(&umtx_lock); 2082 if (pi->pi_owner != NULL) 2083 umtx_pi_disown(pi); 2084 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2085 ("blocked queue not empty")); 2086 mtx_unlock(&umtx_lock); 2087 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2088 umtx_pi_free(pi); 2089 } 2090 } 2091 2092 /* 2093 * Find a PI mutex in hash table. 2094 */ 2095 struct umtx_pi * 2096 umtx_pi_lookup(struct umtx_key *key) 2097 { 2098 struct umtxq_chain *uc; 2099 struct umtx_pi *pi; 2100 2101 uc = umtxq_getchain(key); 2102 UMTXQ_LOCKED_ASSERT(uc); 2103 2104 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2105 if (umtx_key_match(&pi->pi_key, key)) { 2106 return (pi); 2107 } 2108 } 2109 return (NULL); 2110 } 2111 2112 /* 2113 * Insert a PI mutex into hash table. 2114 */ 2115 void 2116 umtx_pi_insert(struct umtx_pi *pi) 2117 { 2118 struct umtxq_chain *uc; 2119 2120 uc = umtxq_getchain(&pi->pi_key); 2121 UMTXQ_LOCKED_ASSERT(uc); 2122 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2123 } 2124 2125 /* 2126 * Drop a PI mutex and wakeup a top waiter. 2127 */ 2128 int 2129 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2130 { 2131 struct umtx_q *uq_first, *uq_first2, *uq_me; 2132 struct umtx_pi *pi, *pi2; 2133 int pri; 2134 2135 UMTXQ_ASSERT_LOCKED_BUSY(key); 2136 *count = umtxq_count_pi(key, &uq_first); 2137 if (uq_first != NULL) { 2138 mtx_lock(&umtx_lock); 2139 pi = uq_first->uq_pi_blocked; 2140 KASSERT(pi != NULL, ("pi == NULL?")); 2141 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2142 mtx_unlock(&umtx_lock); 2143 /* userland messed the mutex */ 2144 return (EPERM); 2145 } 2146 uq_me = td->td_umtxq; 2147 if (pi->pi_owner == td) 2148 umtx_pi_disown(pi); 2149 /* get highest priority thread which is still sleeping. */ 2150 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2151 while (uq_first != NULL && 2152 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2153 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2154 } 2155 pri = PRI_MAX; 2156 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2157 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2158 if (uq_first2 != NULL) { 2159 if (pri > UPRI(uq_first2->uq_thread)) 2160 pri = UPRI(uq_first2->uq_thread); 2161 } 2162 } 2163 thread_lock(td); 2164 sched_lend_user_prio(td, pri); 2165 thread_unlock(td); 2166 mtx_unlock(&umtx_lock); 2167 if (uq_first) 2168 umtxq_signal_thread(uq_first); 2169 } else { 2170 pi = umtx_pi_lookup(key); 2171 /* 2172 * A umtx_pi can exist if a signal or timeout removed the 2173 * last waiter from the umtxq, but there is still 2174 * a thread in do_lock_pi() holding the umtx_pi. 2175 */ 2176 if (pi != NULL) { 2177 /* 2178 * The umtx_pi can be unowned, such as when a thread 2179 * has just entered do_lock_pi(), allocated the 2180 * umtx_pi, and unlocked the umtxq. 2181 * If the current thread owns it, it must disown it. 2182 */ 2183 mtx_lock(&umtx_lock); 2184 if (pi->pi_owner == td) 2185 umtx_pi_disown(pi); 2186 mtx_unlock(&umtx_lock); 2187 } 2188 } 2189 return (0); 2190 } 2191 2192 /* 2193 * Lock a PI mutex. 2194 */ 2195 static int 2196 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2197 struct _umtx_time *timeout, int try) 2198 { 2199 struct umtx_abs_timeout timo; 2200 struct umtx_q *uq; 2201 struct umtx_pi *pi, *new_pi; 2202 uint32_t id, old_owner, owner, old; 2203 int error, rv; 2204 2205 id = td->td_tid; 2206 uq = td->td_umtxq; 2207 2208 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2209 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2210 &uq->uq_key)) != 0) 2211 return (error); 2212 2213 if (timeout != NULL) 2214 umtx_abs_timeout_init2(&timo, timeout); 2215 2216 umtxq_lock(&uq->uq_key); 2217 pi = umtx_pi_lookup(&uq->uq_key); 2218 if (pi == NULL) { 2219 new_pi = umtx_pi_alloc(M_NOWAIT); 2220 if (new_pi == NULL) { 2221 umtxq_unlock(&uq->uq_key); 2222 new_pi = umtx_pi_alloc(M_WAITOK); 2223 umtxq_lock(&uq->uq_key); 2224 pi = umtx_pi_lookup(&uq->uq_key); 2225 if (pi != NULL) { 2226 umtx_pi_free(new_pi); 2227 new_pi = NULL; 2228 } 2229 } 2230 if (new_pi != NULL) { 2231 new_pi->pi_key = uq->uq_key; 2232 umtx_pi_insert(new_pi); 2233 pi = new_pi; 2234 } 2235 } 2236 umtx_pi_ref(pi); 2237 umtxq_unlock(&uq->uq_key); 2238 2239 /* 2240 * Care must be exercised when dealing with umtx structure. It 2241 * can fault on any access. 2242 */ 2243 for (;;) { 2244 /* 2245 * Try the uncontested case. This should be done in userland. 2246 */ 2247 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2248 /* The address was invalid. */ 2249 if (rv == -1) { 2250 error = EFAULT; 2251 break; 2252 } 2253 /* The acquire succeeded. */ 2254 if (rv == 0) { 2255 MPASS(owner == UMUTEX_UNOWNED); 2256 error = 0; 2257 break; 2258 } 2259 2260 if (owner == UMUTEX_RB_NOTRECOV) { 2261 error = ENOTRECOVERABLE; 2262 break; 2263 } 2264 2265 /* 2266 * Nobody owns it, but the acquire failed. This can happen 2267 * with ll/sc atomics. 2268 */ 2269 if (owner == UMUTEX_UNOWNED) { 2270 error = thread_check_susp(td, true); 2271 if (error != 0) 2272 break; 2273 continue; 2274 } 2275 2276 /* 2277 * Avoid overwriting a possible error from sleep due 2278 * to the pending signal with suspension check result. 2279 */ 2280 if (error == 0) { 2281 error = thread_check_susp(td, true); 2282 if (error != 0) 2283 break; 2284 } 2285 2286 /* If no one owns it but it is contested try to acquire it. */ 2287 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2288 old_owner = owner; 2289 rv = casueword32(&m->m_owner, owner, &owner, 2290 id | UMUTEX_CONTESTED); 2291 /* The address was invalid. */ 2292 if (rv == -1) { 2293 error = EFAULT; 2294 break; 2295 } 2296 if (rv == 1) { 2297 if (error == 0) { 2298 error = thread_check_susp(td, true); 2299 if (error != 0) 2300 break; 2301 } 2302 2303 /* 2304 * If this failed the lock could 2305 * changed, restart. 2306 */ 2307 continue; 2308 } 2309 2310 MPASS(rv == 0); 2311 MPASS(owner == old_owner); 2312 umtxq_lock(&uq->uq_key); 2313 umtxq_busy(&uq->uq_key); 2314 error = umtx_pi_claim(pi, td); 2315 umtxq_unbusy(&uq->uq_key); 2316 umtxq_unlock(&uq->uq_key); 2317 if (error != 0) { 2318 /* 2319 * Since we're going to return an 2320 * error, restore the m_owner to its 2321 * previous, unowned state to avoid 2322 * compounding the problem. 2323 */ 2324 (void)casuword32(&m->m_owner, 2325 id | UMUTEX_CONTESTED, old_owner); 2326 } 2327 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2328 error = EOWNERDEAD; 2329 break; 2330 } 2331 2332 if ((owner & ~UMUTEX_CONTESTED) == id) { 2333 error = EDEADLK; 2334 break; 2335 } 2336 2337 if (try != 0) { 2338 error = EBUSY; 2339 break; 2340 } 2341 2342 /* 2343 * If we caught a signal, we have retried and now 2344 * exit immediately. 2345 */ 2346 if (error != 0) 2347 break; 2348 2349 umtxq_lock(&uq->uq_key); 2350 umtxq_busy(&uq->uq_key); 2351 umtxq_unlock(&uq->uq_key); 2352 2353 /* 2354 * Set the contested bit so that a release in user space 2355 * knows to use the system call for unlock. If this fails 2356 * either some one else has acquired the lock or it has been 2357 * released. 2358 */ 2359 rv = casueword32(&m->m_owner, owner, &old, owner | 2360 UMUTEX_CONTESTED); 2361 2362 /* The address was invalid. */ 2363 if (rv == -1) { 2364 umtxq_unbusy_unlocked(&uq->uq_key); 2365 error = EFAULT; 2366 break; 2367 } 2368 if (rv == 1) { 2369 umtxq_unbusy_unlocked(&uq->uq_key); 2370 error = thread_check_susp(td, true); 2371 if (error != 0) 2372 break; 2373 2374 /* 2375 * The lock changed and we need to retry or we 2376 * lost a race to the thread unlocking the 2377 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2378 * value for owner is impossible there. 2379 */ 2380 continue; 2381 } 2382 2383 umtxq_lock(&uq->uq_key); 2384 2385 /* We set the contested bit, sleep. */ 2386 MPASS(old == owner); 2387 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2388 "umtxpi", timeout == NULL ? NULL : &timo, 2389 (flags & USYNC_PROCESS_SHARED) != 0); 2390 if (error != 0) 2391 continue; 2392 2393 error = thread_check_susp(td, false); 2394 if (error != 0) 2395 break; 2396 } 2397 2398 umtxq_lock(&uq->uq_key); 2399 umtx_pi_unref(pi); 2400 umtxq_unlock(&uq->uq_key); 2401 2402 umtx_key_release(&uq->uq_key); 2403 return (error); 2404 } 2405 2406 /* 2407 * Unlock a PI mutex. 2408 */ 2409 static int 2410 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2411 { 2412 struct umtx_key key; 2413 uint32_t id, new_owner, old, owner; 2414 int count, error; 2415 2416 id = td->td_tid; 2417 2418 usrloop: 2419 /* 2420 * Make sure we own this mtx. 2421 */ 2422 error = fueword32(&m->m_owner, &owner); 2423 if (error == -1) 2424 return (EFAULT); 2425 2426 if ((owner & ~UMUTEX_CONTESTED) != id) 2427 return (EPERM); 2428 2429 new_owner = umtx_unlock_val(flags, rb); 2430 2431 /* This should be done in userland */ 2432 if ((owner & UMUTEX_CONTESTED) == 0) { 2433 error = casueword32(&m->m_owner, owner, &old, new_owner); 2434 if (error == -1) 2435 return (EFAULT); 2436 if (error == 1) { 2437 error = thread_check_susp(td, true); 2438 if (error != 0) 2439 return (error); 2440 goto usrloop; 2441 } 2442 if (old == owner) 2443 return (0); 2444 owner = old; 2445 } 2446 2447 /* We should only ever be in here for contested locks */ 2448 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2449 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2450 &key)) != 0) 2451 return (error); 2452 2453 umtxq_lock(&key); 2454 umtxq_busy(&key); 2455 error = umtx_pi_drop(td, &key, rb, &count); 2456 if (error != 0) { 2457 umtxq_unbusy(&key); 2458 umtxq_unlock(&key); 2459 umtx_key_release(&key); 2460 /* userland messed the mutex */ 2461 return (error); 2462 } 2463 umtxq_unlock(&key); 2464 2465 /* 2466 * When unlocking the umtx, it must be marked as unowned if 2467 * there is zero or one thread only waiting for it. 2468 * Otherwise, it must be marked as contested. 2469 */ 2470 2471 if (count > 1) 2472 new_owner |= UMUTEX_CONTESTED; 2473 again: 2474 error = casueword32(&m->m_owner, owner, &old, new_owner); 2475 if (error == 1) { 2476 error = thread_check_susp(td, false); 2477 if (error == 0) 2478 goto again; 2479 } 2480 umtxq_unbusy_unlocked(&key); 2481 umtx_key_release(&key); 2482 if (error == -1) 2483 return (EFAULT); 2484 if (error == 0 && old != owner) 2485 return (EINVAL); 2486 return (error); 2487 } 2488 2489 /* 2490 * Lock a PP mutex. 2491 */ 2492 static int 2493 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2494 struct _umtx_time *timeout, int try) 2495 { 2496 struct umtx_abs_timeout timo; 2497 struct umtx_q *uq, *uq2; 2498 struct umtx_pi *pi; 2499 uint32_t ceiling; 2500 uint32_t owner, id; 2501 int error, pri, old_inherited_pri, su, rv; 2502 2503 id = td->td_tid; 2504 uq = td->td_umtxq; 2505 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2506 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2507 &uq->uq_key)) != 0) 2508 return (error); 2509 2510 if (timeout != NULL) 2511 umtx_abs_timeout_init2(&timo, timeout); 2512 2513 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2514 for (;;) { 2515 old_inherited_pri = uq->uq_inherited_pri; 2516 umtxq_lock(&uq->uq_key); 2517 umtxq_busy(&uq->uq_key); 2518 umtxq_unlock(&uq->uq_key); 2519 2520 rv = fueword32(&m->m_ceilings[0], &ceiling); 2521 if (rv == -1) { 2522 error = EFAULT; 2523 goto out; 2524 } 2525 ceiling = RTP_PRIO_MAX - ceiling; 2526 if (ceiling > RTP_PRIO_MAX) { 2527 error = EINVAL; 2528 goto out; 2529 } 2530 2531 mtx_lock(&umtx_lock); 2532 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2533 mtx_unlock(&umtx_lock); 2534 error = EINVAL; 2535 goto out; 2536 } 2537 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2538 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2539 thread_lock(td); 2540 if (uq->uq_inherited_pri < UPRI(td)) 2541 sched_lend_user_prio(td, uq->uq_inherited_pri); 2542 thread_unlock(td); 2543 } 2544 mtx_unlock(&umtx_lock); 2545 2546 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2547 id | UMUTEX_CONTESTED); 2548 /* The address was invalid. */ 2549 if (rv == -1) { 2550 error = EFAULT; 2551 break; 2552 } 2553 if (rv == 0) { 2554 MPASS(owner == UMUTEX_CONTESTED); 2555 error = 0; 2556 break; 2557 } 2558 /* rv == 1 */ 2559 if (owner == UMUTEX_RB_OWNERDEAD) { 2560 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2561 &owner, id | UMUTEX_CONTESTED); 2562 if (rv == -1) { 2563 error = EFAULT; 2564 break; 2565 } 2566 if (rv == 0) { 2567 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2568 error = EOWNERDEAD; /* success */ 2569 break; 2570 } 2571 2572 /* 2573 * rv == 1, only check for suspension if we 2574 * did not already catched a signal. If we 2575 * get an error from the check, the same 2576 * condition is checked by the umtxq_sleep() 2577 * call below, so we should obliterate the 2578 * error to not skip the last loop iteration. 2579 */ 2580 if (error == 0) { 2581 error = thread_check_susp(td, false); 2582 if (error == 0) { 2583 if (try != 0) 2584 error = EBUSY; 2585 else 2586 continue; 2587 } 2588 error = 0; 2589 } 2590 } else if (owner == UMUTEX_RB_NOTRECOV) { 2591 error = ENOTRECOVERABLE; 2592 } 2593 2594 if (try != 0) 2595 error = EBUSY; 2596 2597 /* 2598 * If we caught a signal, we have retried and now 2599 * exit immediately. 2600 */ 2601 if (error != 0) 2602 break; 2603 2604 umtxq_lock(&uq->uq_key); 2605 umtxq_insert(uq); 2606 umtxq_unbusy(&uq->uq_key); 2607 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2608 NULL : &timo); 2609 umtxq_remove(uq); 2610 umtxq_unlock(&uq->uq_key); 2611 2612 mtx_lock(&umtx_lock); 2613 uq->uq_inherited_pri = old_inherited_pri; 2614 pri = PRI_MAX; 2615 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2616 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2617 if (uq2 != NULL) { 2618 if (pri > UPRI(uq2->uq_thread)) 2619 pri = UPRI(uq2->uq_thread); 2620 } 2621 } 2622 if (pri > uq->uq_inherited_pri) 2623 pri = uq->uq_inherited_pri; 2624 thread_lock(td); 2625 sched_lend_user_prio(td, pri); 2626 thread_unlock(td); 2627 mtx_unlock(&umtx_lock); 2628 } 2629 2630 if (error != 0 && error != EOWNERDEAD) { 2631 mtx_lock(&umtx_lock); 2632 uq->uq_inherited_pri = old_inherited_pri; 2633 pri = PRI_MAX; 2634 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2635 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2636 if (uq2 != NULL) { 2637 if (pri > UPRI(uq2->uq_thread)) 2638 pri = UPRI(uq2->uq_thread); 2639 } 2640 } 2641 if (pri > uq->uq_inherited_pri) 2642 pri = uq->uq_inherited_pri; 2643 thread_lock(td); 2644 sched_lend_user_prio(td, pri); 2645 thread_unlock(td); 2646 mtx_unlock(&umtx_lock); 2647 } 2648 2649 out: 2650 umtxq_unbusy_unlocked(&uq->uq_key); 2651 umtx_key_release(&uq->uq_key); 2652 return (error); 2653 } 2654 2655 /* 2656 * Unlock a PP mutex. 2657 */ 2658 static int 2659 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2660 { 2661 struct umtx_key key; 2662 struct umtx_q *uq, *uq2; 2663 struct umtx_pi *pi; 2664 uint32_t id, owner, rceiling; 2665 int error, pri, new_inherited_pri, su; 2666 2667 id = td->td_tid; 2668 uq = td->td_umtxq; 2669 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2670 2671 /* 2672 * Make sure we own this mtx. 2673 */ 2674 error = fueword32(&m->m_owner, &owner); 2675 if (error == -1) 2676 return (EFAULT); 2677 2678 if ((owner & ~UMUTEX_CONTESTED) != id) 2679 return (EPERM); 2680 2681 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2682 if (error != 0) 2683 return (error); 2684 2685 if (rceiling == -1) 2686 new_inherited_pri = PRI_MAX; 2687 else { 2688 rceiling = RTP_PRIO_MAX - rceiling; 2689 if (rceiling > RTP_PRIO_MAX) 2690 return (EINVAL); 2691 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2692 } 2693 2694 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2695 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2696 &key)) != 0) 2697 return (error); 2698 umtxq_lock(&key); 2699 umtxq_busy(&key); 2700 umtxq_unlock(&key); 2701 /* 2702 * For priority protected mutex, always set unlocked state 2703 * to UMUTEX_CONTESTED, so that userland always enters kernel 2704 * to lock the mutex, it is necessary because thread priority 2705 * has to be adjusted for such mutex. 2706 */ 2707 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2708 UMUTEX_CONTESTED); 2709 2710 umtxq_lock(&key); 2711 if (error == 0) 2712 umtxq_signal(&key, 1); 2713 umtxq_unbusy(&key); 2714 umtxq_unlock(&key); 2715 2716 if (error == -1) 2717 error = EFAULT; 2718 else { 2719 mtx_lock(&umtx_lock); 2720 if (su != 0) 2721 uq->uq_inherited_pri = new_inherited_pri; 2722 pri = PRI_MAX; 2723 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2724 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2725 if (uq2 != NULL) { 2726 if (pri > UPRI(uq2->uq_thread)) 2727 pri = UPRI(uq2->uq_thread); 2728 } 2729 } 2730 if (pri > uq->uq_inherited_pri) 2731 pri = uq->uq_inherited_pri; 2732 thread_lock(td); 2733 sched_lend_user_prio(td, pri); 2734 thread_unlock(td); 2735 mtx_unlock(&umtx_lock); 2736 } 2737 umtx_key_release(&key); 2738 return (error); 2739 } 2740 2741 static int 2742 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2743 uint32_t *old_ceiling) 2744 { 2745 struct umtx_q *uq; 2746 uint32_t flags, id, owner, save_ceiling; 2747 int error, rv, rv1; 2748 2749 error = fueword32(&m->m_flags, &flags); 2750 if (error == -1) 2751 return (EFAULT); 2752 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2753 return (EINVAL); 2754 if (ceiling > RTP_PRIO_MAX) 2755 return (EINVAL); 2756 id = td->td_tid; 2757 uq = td->td_umtxq; 2758 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2759 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2760 &uq->uq_key)) != 0) 2761 return (error); 2762 for (;;) { 2763 umtxq_lock(&uq->uq_key); 2764 umtxq_busy(&uq->uq_key); 2765 umtxq_unlock(&uq->uq_key); 2766 2767 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2768 if (rv == -1) { 2769 error = EFAULT; 2770 break; 2771 } 2772 2773 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2774 id | UMUTEX_CONTESTED); 2775 if (rv == -1) { 2776 error = EFAULT; 2777 break; 2778 } 2779 2780 if (rv == 0) { 2781 MPASS(owner == UMUTEX_CONTESTED); 2782 rv = suword32(&m->m_ceilings[0], ceiling); 2783 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2784 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2785 break; 2786 } 2787 2788 if ((owner & ~UMUTEX_CONTESTED) == id) { 2789 rv = suword32(&m->m_ceilings[0], ceiling); 2790 error = rv == 0 ? 0 : EFAULT; 2791 break; 2792 } 2793 2794 if (owner == UMUTEX_RB_OWNERDEAD) { 2795 error = EOWNERDEAD; 2796 break; 2797 } else if (owner == UMUTEX_RB_NOTRECOV) { 2798 error = ENOTRECOVERABLE; 2799 break; 2800 } 2801 2802 /* 2803 * If we caught a signal, we have retried and now 2804 * exit immediately. 2805 */ 2806 if (error != 0) 2807 break; 2808 2809 /* 2810 * We set the contested bit, sleep. Otherwise the lock changed 2811 * and we need to retry or we lost a race to the thread 2812 * unlocking the umtx. 2813 */ 2814 umtxq_lock(&uq->uq_key); 2815 umtxq_insert(uq); 2816 umtxq_unbusy(&uq->uq_key); 2817 error = umtxq_sleep(uq, "umtxpp", NULL); 2818 umtxq_remove(uq); 2819 umtxq_unlock(&uq->uq_key); 2820 } 2821 umtxq_lock(&uq->uq_key); 2822 if (error == 0) 2823 umtxq_signal(&uq->uq_key, INT_MAX); 2824 umtxq_unbusy(&uq->uq_key); 2825 umtxq_unlock(&uq->uq_key); 2826 umtx_key_release(&uq->uq_key); 2827 if (error == 0 && old_ceiling != NULL) { 2828 rv = suword32(old_ceiling, save_ceiling); 2829 error = rv == 0 ? 0 : EFAULT; 2830 } 2831 return (error); 2832 } 2833 2834 /* 2835 * Lock a userland POSIX mutex. 2836 */ 2837 static int 2838 do_lock_umutex(struct thread *td, struct umutex *m, 2839 struct _umtx_time *timeout, int mode) 2840 { 2841 uint32_t flags; 2842 int error; 2843 2844 error = fueword32(&m->m_flags, &flags); 2845 if (error == -1) 2846 return (EFAULT); 2847 2848 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2849 case 0: 2850 error = do_lock_normal(td, m, flags, timeout, mode); 2851 break; 2852 case UMUTEX_PRIO_INHERIT: 2853 error = do_lock_pi(td, m, flags, timeout, mode); 2854 break; 2855 case UMUTEX_PRIO_PROTECT: 2856 error = do_lock_pp(td, m, flags, timeout, mode); 2857 break; 2858 default: 2859 return (EINVAL); 2860 } 2861 if (timeout == NULL) { 2862 if (error == EINTR && mode != _UMUTEX_WAIT) 2863 error = ERESTART; 2864 } else { 2865 /* Timed-locking is not restarted. */ 2866 if (error == ERESTART) 2867 error = EINTR; 2868 } 2869 return (error); 2870 } 2871 2872 /* 2873 * Unlock a userland POSIX mutex. 2874 */ 2875 static int 2876 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2877 { 2878 uint32_t flags; 2879 int error; 2880 2881 error = fueword32(&m->m_flags, &flags); 2882 if (error == -1) 2883 return (EFAULT); 2884 2885 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2886 case 0: 2887 return (do_unlock_normal(td, m, flags, rb)); 2888 case UMUTEX_PRIO_INHERIT: 2889 return (do_unlock_pi(td, m, flags, rb)); 2890 case UMUTEX_PRIO_PROTECT: 2891 return (do_unlock_pp(td, m, flags, rb)); 2892 } 2893 2894 return (EINVAL); 2895 } 2896 2897 static int 2898 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2899 struct timespec *timeout, u_long wflags) 2900 { 2901 struct umtx_abs_timeout timo; 2902 struct umtx_q *uq; 2903 uint32_t flags, clockid, hasw; 2904 int error; 2905 2906 uq = td->td_umtxq; 2907 error = fueword32(&cv->c_flags, &flags); 2908 if (error == -1) 2909 return (EFAULT); 2910 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2911 if (error != 0) 2912 return (error); 2913 2914 if ((wflags & CVWAIT_CLOCKID) != 0) { 2915 error = fueword32(&cv->c_clockid, &clockid); 2916 if (error == -1) { 2917 umtx_key_release(&uq->uq_key); 2918 return (EFAULT); 2919 } 2920 if (clockid < CLOCK_REALTIME || 2921 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2922 /* hmm, only HW clock id will work. */ 2923 umtx_key_release(&uq->uq_key); 2924 return (EINVAL); 2925 } 2926 } else { 2927 clockid = CLOCK_REALTIME; 2928 } 2929 2930 umtxq_lock(&uq->uq_key); 2931 umtxq_busy(&uq->uq_key); 2932 umtxq_insert(uq); 2933 umtxq_unlock(&uq->uq_key); 2934 2935 /* 2936 * Set c_has_waiters to 1 before releasing user mutex, also 2937 * don't modify cache line when unnecessary. 2938 */ 2939 error = fueword32(&cv->c_has_waiters, &hasw); 2940 if (error == 0 && hasw == 0) 2941 suword32(&cv->c_has_waiters, 1); 2942 2943 umtxq_unbusy_unlocked(&uq->uq_key); 2944 2945 error = do_unlock_umutex(td, m, false); 2946 2947 if (timeout != NULL) 2948 umtx_abs_timeout_init(&timo, clockid, 2949 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2950 2951 umtxq_lock(&uq->uq_key); 2952 if (error == 0) { 2953 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2954 NULL : &timo); 2955 } 2956 2957 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2958 error = 0; 2959 else { 2960 /* 2961 * This must be timeout,interrupted by signal or 2962 * surprious wakeup, clear c_has_waiter flag when 2963 * necessary. 2964 */ 2965 umtxq_busy(&uq->uq_key); 2966 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2967 int oldlen = uq->uq_cur_queue->length; 2968 umtxq_remove(uq); 2969 if (oldlen == 1) { 2970 umtxq_unlock(&uq->uq_key); 2971 suword32(&cv->c_has_waiters, 0); 2972 umtxq_lock(&uq->uq_key); 2973 } 2974 } 2975 umtxq_unbusy(&uq->uq_key); 2976 if (error == ERESTART) 2977 error = EINTR; 2978 } 2979 2980 umtxq_unlock(&uq->uq_key); 2981 umtx_key_release(&uq->uq_key); 2982 return (error); 2983 } 2984 2985 /* 2986 * Signal a userland condition variable. 2987 */ 2988 static int 2989 do_cv_signal(struct thread *td, struct ucond *cv) 2990 { 2991 struct umtx_key key; 2992 int error, cnt, nwake; 2993 uint32_t flags; 2994 2995 error = fueword32(&cv->c_flags, &flags); 2996 if (error == -1) 2997 return (EFAULT); 2998 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2999 return (error); 3000 umtxq_lock(&key); 3001 umtxq_busy(&key); 3002 cnt = umtxq_count(&key); 3003 nwake = umtxq_signal(&key, 1); 3004 if (cnt <= nwake) { 3005 umtxq_unlock(&key); 3006 error = suword32(&cv->c_has_waiters, 0); 3007 if (error == -1) 3008 error = EFAULT; 3009 umtxq_lock(&key); 3010 } 3011 umtxq_unbusy(&key); 3012 umtxq_unlock(&key); 3013 umtx_key_release(&key); 3014 return (error); 3015 } 3016 3017 static int 3018 do_cv_broadcast(struct thread *td, struct ucond *cv) 3019 { 3020 struct umtx_key key; 3021 int error; 3022 uint32_t flags; 3023 3024 error = fueword32(&cv->c_flags, &flags); 3025 if (error == -1) 3026 return (EFAULT); 3027 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3028 return (error); 3029 3030 umtxq_lock(&key); 3031 umtxq_busy(&key); 3032 umtxq_signal(&key, INT_MAX); 3033 umtxq_unlock(&key); 3034 3035 error = suword32(&cv->c_has_waiters, 0); 3036 if (error == -1) 3037 error = EFAULT; 3038 3039 umtxq_unbusy_unlocked(&key); 3040 3041 umtx_key_release(&key); 3042 return (error); 3043 } 3044 3045 static int 3046 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3047 struct _umtx_time *timeout) 3048 { 3049 struct umtx_abs_timeout timo; 3050 struct umtx_q *uq; 3051 uint32_t flags, wrflags; 3052 int32_t state, oldstate; 3053 int32_t blocked_readers; 3054 int error, error1, rv; 3055 3056 uq = td->td_umtxq; 3057 error = fueword32(&rwlock->rw_flags, &flags); 3058 if (error == -1) 3059 return (EFAULT); 3060 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3061 if (error != 0) 3062 return (error); 3063 3064 if (timeout != NULL) 3065 umtx_abs_timeout_init2(&timo, timeout); 3066 3067 wrflags = URWLOCK_WRITE_OWNER; 3068 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3069 wrflags |= URWLOCK_WRITE_WAITERS; 3070 3071 for (;;) { 3072 rv = fueword32(&rwlock->rw_state, &state); 3073 if (rv == -1) { 3074 umtx_key_release(&uq->uq_key); 3075 return (EFAULT); 3076 } 3077 3078 /* try to lock it */ 3079 while (!(state & wrflags)) { 3080 if (__predict_false(URWLOCK_READER_COUNT(state) == 3081 URWLOCK_MAX_READERS)) { 3082 umtx_key_release(&uq->uq_key); 3083 return (EAGAIN); 3084 } 3085 rv = casueword32(&rwlock->rw_state, state, 3086 &oldstate, state + 1); 3087 if (rv == -1) { 3088 umtx_key_release(&uq->uq_key); 3089 return (EFAULT); 3090 } 3091 if (rv == 0) { 3092 MPASS(oldstate == state); 3093 umtx_key_release(&uq->uq_key); 3094 return (0); 3095 } 3096 error = thread_check_susp(td, true); 3097 if (error != 0) 3098 break; 3099 state = oldstate; 3100 } 3101 3102 if (error) 3103 break; 3104 3105 /* grab monitor lock */ 3106 umtxq_lock(&uq->uq_key); 3107 umtxq_busy(&uq->uq_key); 3108 umtxq_unlock(&uq->uq_key); 3109 3110 /* 3111 * re-read the state, in case it changed between the try-lock above 3112 * and the check below 3113 */ 3114 rv = fueword32(&rwlock->rw_state, &state); 3115 if (rv == -1) 3116 error = EFAULT; 3117 3118 /* set read contention bit */ 3119 while (error == 0 && (state & wrflags) && 3120 !(state & URWLOCK_READ_WAITERS)) { 3121 rv = casueword32(&rwlock->rw_state, state, 3122 &oldstate, state | URWLOCK_READ_WAITERS); 3123 if (rv == -1) { 3124 error = EFAULT; 3125 break; 3126 } 3127 if (rv == 0) { 3128 MPASS(oldstate == state); 3129 goto sleep; 3130 } 3131 state = oldstate; 3132 error = thread_check_susp(td, false); 3133 if (error != 0) 3134 break; 3135 } 3136 if (error != 0) { 3137 umtxq_unbusy_unlocked(&uq->uq_key); 3138 break; 3139 } 3140 3141 /* state is changed while setting flags, restart */ 3142 if (!(state & wrflags)) { 3143 umtxq_unbusy_unlocked(&uq->uq_key); 3144 error = thread_check_susp(td, true); 3145 if (error != 0) 3146 break; 3147 continue; 3148 } 3149 3150 sleep: 3151 /* 3152 * Contention bit is set, before sleeping, increase 3153 * read waiter count. 3154 */ 3155 rv = fueword32(&rwlock->rw_blocked_readers, 3156 &blocked_readers); 3157 if (rv == -1) { 3158 umtxq_unbusy_unlocked(&uq->uq_key); 3159 error = EFAULT; 3160 break; 3161 } 3162 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3163 3164 while (state & wrflags) { 3165 umtxq_lock(&uq->uq_key); 3166 umtxq_insert(uq); 3167 umtxq_unbusy(&uq->uq_key); 3168 3169 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3170 NULL : &timo); 3171 3172 umtxq_busy(&uq->uq_key); 3173 umtxq_remove(uq); 3174 umtxq_unlock(&uq->uq_key); 3175 if (error) 3176 break; 3177 rv = fueword32(&rwlock->rw_state, &state); 3178 if (rv == -1) { 3179 error = EFAULT; 3180 break; 3181 } 3182 } 3183 3184 /* decrease read waiter count, and may clear read contention bit */ 3185 rv = fueword32(&rwlock->rw_blocked_readers, 3186 &blocked_readers); 3187 if (rv == -1) { 3188 umtxq_unbusy_unlocked(&uq->uq_key); 3189 error = EFAULT; 3190 break; 3191 } 3192 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3193 if (blocked_readers == 1) { 3194 rv = fueword32(&rwlock->rw_state, &state); 3195 if (rv == -1) { 3196 umtxq_unbusy_unlocked(&uq->uq_key); 3197 error = EFAULT; 3198 break; 3199 } 3200 for (;;) { 3201 rv = casueword32(&rwlock->rw_state, state, 3202 &oldstate, state & ~URWLOCK_READ_WAITERS); 3203 if (rv == -1) { 3204 error = EFAULT; 3205 break; 3206 } 3207 if (rv == 0) { 3208 MPASS(oldstate == state); 3209 break; 3210 } 3211 state = oldstate; 3212 error1 = thread_check_susp(td, false); 3213 if (error1 != 0) { 3214 if (error == 0) 3215 error = error1; 3216 break; 3217 } 3218 } 3219 } 3220 3221 umtxq_unbusy_unlocked(&uq->uq_key); 3222 if (error != 0) 3223 break; 3224 } 3225 umtx_key_release(&uq->uq_key); 3226 if (error == ERESTART) 3227 error = EINTR; 3228 return (error); 3229 } 3230 3231 static int 3232 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3233 { 3234 struct umtx_abs_timeout timo; 3235 struct umtx_q *uq; 3236 uint32_t flags; 3237 int32_t state, oldstate; 3238 int32_t blocked_writers; 3239 int32_t blocked_readers; 3240 int error, error1, rv; 3241 3242 uq = td->td_umtxq; 3243 error = fueword32(&rwlock->rw_flags, &flags); 3244 if (error == -1) 3245 return (EFAULT); 3246 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3247 if (error != 0) 3248 return (error); 3249 3250 if (timeout != NULL) 3251 umtx_abs_timeout_init2(&timo, timeout); 3252 3253 blocked_readers = 0; 3254 for (;;) { 3255 rv = fueword32(&rwlock->rw_state, &state); 3256 if (rv == -1) { 3257 umtx_key_release(&uq->uq_key); 3258 return (EFAULT); 3259 } 3260 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3261 URWLOCK_READER_COUNT(state) == 0) { 3262 rv = casueword32(&rwlock->rw_state, state, 3263 &oldstate, state | URWLOCK_WRITE_OWNER); 3264 if (rv == -1) { 3265 umtx_key_release(&uq->uq_key); 3266 return (EFAULT); 3267 } 3268 if (rv == 0) { 3269 MPASS(oldstate == state); 3270 umtx_key_release(&uq->uq_key); 3271 return (0); 3272 } 3273 state = oldstate; 3274 error = thread_check_susp(td, true); 3275 if (error != 0) 3276 break; 3277 } 3278 3279 if (error) { 3280 if ((state & (URWLOCK_WRITE_OWNER | 3281 URWLOCK_WRITE_WAITERS)) == 0 && 3282 blocked_readers != 0) { 3283 umtxq_lock(&uq->uq_key); 3284 umtxq_busy(&uq->uq_key); 3285 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3286 UMTX_SHARED_QUEUE); 3287 umtxq_unbusy(&uq->uq_key); 3288 umtxq_unlock(&uq->uq_key); 3289 } 3290 3291 break; 3292 } 3293 3294 /* grab monitor lock */ 3295 umtxq_lock(&uq->uq_key); 3296 umtxq_busy(&uq->uq_key); 3297 umtxq_unlock(&uq->uq_key); 3298 3299 /* 3300 * Re-read the state, in case it changed between the 3301 * try-lock above and the check below. 3302 */ 3303 rv = fueword32(&rwlock->rw_state, &state); 3304 if (rv == -1) 3305 error = EFAULT; 3306 3307 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3308 URWLOCK_READER_COUNT(state) != 0) && 3309 (state & URWLOCK_WRITE_WAITERS) == 0) { 3310 rv = casueword32(&rwlock->rw_state, state, 3311 &oldstate, state | URWLOCK_WRITE_WAITERS); 3312 if (rv == -1) { 3313 error = EFAULT; 3314 break; 3315 } 3316 if (rv == 0) { 3317 MPASS(oldstate == state); 3318 goto sleep; 3319 } 3320 state = oldstate; 3321 error = thread_check_susp(td, false); 3322 if (error != 0) 3323 break; 3324 } 3325 if (error != 0) { 3326 umtxq_unbusy_unlocked(&uq->uq_key); 3327 break; 3328 } 3329 3330 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3331 URWLOCK_READER_COUNT(state) == 0) { 3332 umtxq_unbusy_unlocked(&uq->uq_key); 3333 error = thread_check_susp(td, false); 3334 if (error != 0) 3335 break; 3336 continue; 3337 } 3338 sleep: 3339 rv = fueword32(&rwlock->rw_blocked_writers, 3340 &blocked_writers); 3341 if (rv == -1) { 3342 umtxq_unbusy_unlocked(&uq->uq_key); 3343 error = EFAULT; 3344 break; 3345 } 3346 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3347 3348 while ((state & URWLOCK_WRITE_OWNER) || 3349 URWLOCK_READER_COUNT(state) != 0) { 3350 umtxq_lock(&uq->uq_key); 3351 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3352 umtxq_unbusy(&uq->uq_key); 3353 3354 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3355 NULL : &timo); 3356 3357 umtxq_busy(&uq->uq_key); 3358 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3359 umtxq_unlock(&uq->uq_key); 3360 if (error) 3361 break; 3362 rv = fueword32(&rwlock->rw_state, &state); 3363 if (rv == -1) { 3364 error = EFAULT; 3365 break; 3366 } 3367 } 3368 3369 rv = fueword32(&rwlock->rw_blocked_writers, 3370 &blocked_writers); 3371 if (rv == -1) { 3372 umtxq_unbusy_unlocked(&uq->uq_key); 3373 error = EFAULT; 3374 break; 3375 } 3376 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3377 if (blocked_writers == 1) { 3378 rv = fueword32(&rwlock->rw_state, &state); 3379 if (rv == -1) { 3380 umtxq_unbusy_unlocked(&uq->uq_key); 3381 error = EFAULT; 3382 break; 3383 } 3384 for (;;) { 3385 rv = casueword32(&rwlock->rw_state, state, 3386 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3387 if (rv == -1) { 3388 error = EFAULT; 3389 break; 3390 } 3391 if (rv == 0) { 3392 MPASS(oldstate == state); 3393 break; 3394 } 3395 state = oldstate; 3396 error1 = thread_check_susp(td, false); 3397 /* 3398 * We are leaving the URWLOCK_WRITE_WAITERS 3399 * behind, but this should not harm the 3400 * correctness. 3401 */ 3402 if (error1 != 0) { 3403 if (error == 0) 3404 error = error1; 3405 break; 3406 } 3407 } 3408 rv = fueword32(&rwlock->rw_blocked_readers, 3409 &blocked_readers); 3410 if (rv == -1) { 3411 umtxq_unbusy_unlocked(&uq->uq_key); 3412 error = EFAULT; 3413 break; 3414 } 3415 } else 3416 blocked_readers = 0; 3417 3418 umtxq_unbusy_unlocked(&uq->uq_key); 3419 } 3420 3421 umtx_key_release(&uq->uq_key); 3422 if (error == ERESTART) 3423 error = EINTR; 3424 return (error); 3425 } 3426 3427 static int 3428 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3429 { 3430 struct umtx_q *uq; 3431 uint32_t flags; 3432 int32_t state, oldstate; 3433 int error, rv, q, count; 3434 3435 uq = td->td_umtxq; 3436 error = fueword32(&rwlock->rw_flags, &flags); 3437 if (error == -1) 3438 return (EFAULT); 3439 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3440 if (error != 0) 3441 return (error); 3442 3443 error = fueword32(&rwlock->rw_state, &state); 3444 if (error == -1) { 3445 error = EFAULT; 3446 goto out; 3447 } 3448 if (state & URWLOCK_WRITE_OWNER) { 3449 for (;;) { 3450 rv = casueword32(&rwlock->rw_state, state, 3451 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3452 if (rv == -1) { 3453 error = EFAULT; 3454 goto out; 3455 } 3456 if (rv == 1) { 3457 state = oldstate; 3458 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3459 error = EPERM; 3460 goto out; 3461 } 3462 error = thread_check_susp(td, true); 3463 if (error != 0) 3464 goto out; 3465 } else 3466 break; 3467 } 3468 } else if (URWLOCK_READER_COUNT(state) != 0) { 3469 for (;;) { 3470 rv = casueword32(&rwlock->rw_state, state, 3471 &oldstate, state - 1); 3472 if (rv == -1) { 3473 error = EFAULT; 3474 goto out; 3475 } 3476 if (rv == 1) { 3477 state = oldstate; 3478 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3479 error = EPERM; 3480 goto out; 3481 } 3482 error = thread_check_susp(td, true); 3483 if (error != 0) 3484 goto out; 3485 } else 3486 break; 3487 } 3488 } else { 3489 error = EPERM; 3490 goto out; 3491 } 3492 3493 count = 0; 3494 3495 if (!(flags & URWLOCK_PREFER_READER)) { 3496 if (state & URWLOCK_WRITE_WAITERS) { 3497 count = 1; 3498 q = UMTX_EXCLUSIVE_QUEUE; 3499 } else if (state & URWLOCK_READ_WAITERS) { 3500 count = INT_MAX; 3501 q = UMTX_SHARED_QUEUE; 3502 } 3503 } else { 3504 if (state & URWLOCK_READ_WAITERS) { 3505 count = INT_MAX; 3506 q = UMTX_SHARED_QUEUE; 3507 } else if (state & URWLOCK_WRITE_WAITERS) { 3508 count = 1; 3509 q = UMTX_EXCLUSIVE_QUEUE; 3510 } 3511 } 3512 3513 if (count) { 3514 umtxq_lock(&uq->uq_key); 3515 umtxq_busy(&uq->uq_key); 3516 umtxq_signal_queue(&uq->uq_key, count, q); 3517 umtxq_unbusy(&uq->uq_key); 3518 umtxq_unlock(&uq->uq_key); 3519 } 3520 out: 3521 umtx_key_release(&uq->uq_key); 3522 return (error); 3523 } 3524 3525 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3526 static int 3527 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3528 { 3529 struct umtx_abs_timeout timo; 3530 struct umtx_q *uq; 3531 uint32_t flags, count, count1; 3532 int error, rv, rv1; 3533 3534 uq = td->td_umtxq; 3535 error = fueword32(&sem->_flags, &flags); 3536 if (error == -1) 3537 return (EFAULT); 3538 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3539 if (error != 0) 3540 return (error); 3541 3542 if (timeout != NULL) 3543 umtx_abs_timeout_init2(&timo, timeout); 3544 3545 again: 3546 umtxq_lock(&uq->uq_key); 3547 umtxq_busy(&uq->uq_key); 3548 umtxq_insert(uq); 3549 umtxq_unlock(&uq->uq_key); 3550 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3551 if (rv == 0) 3552 rv1 = fueword32(&sem->_count, &count); 3553 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3554 (rv == 1 && count1 == 0)) { 3555 umtxq_lock(&uq->uq_key); 3556 umtxq_unbusy(&uq->uq_key); 3557 umtxq_remove(uq); 3558 umtxq_unlock(&uq->uq_key); 3559 if (rv == 1) { 3560 rv = thread_check_susp(td, true); 3561 if (rv == 0) 3562 goto again; 3563 error = rv; 3564 goto out; 3565 } 3566 if (rv == 0) 3567 rv = rv1; 3568 error = rv == -1 ? EFAULT : 0; 3569 goto out; 3570 } 3571 umtxq_lock(&uq->uq_key); 3572 umtxq_unbusy(&uq->uq_key); 3573 3574 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3575 3576 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3577 error = 0; 3578 else { 3579 umtxq_remove(uq); 3580 /* A relative timeout cannot be restarted. */ 3581 if (error == ERESTART && timeout != NULL && 3582 (timeout->_flags & UMTX_ABSTIME) == 0) 3583 error = EINTR; 3584 } 3585 umtxq_unlock(&uq->uq_key); 3586 out: 3587 umtx_key_release(&uq->uq_key); 3588 return (error); 3589 } 3590 3591 /* 3592 * Signal a userland semaphore. 3593 */ 3594 static int 3595 do_sem_wake(struct thread *td, struct _usem *sem) 3596 { 3597 struct umtx_key key; 3598 int error, cnt; 3599 uint32_t flags; 3600 3601 error = fueword32(&sem->_flags, &flags); 3602 if (error == -1) 3603 return (EFAULT); 3604 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3605 return (error); 3606 umtxq_lock(&key); 3607 umtxq_busy(&key); 3608 cnt = umtxq_count(&key); 3609 if (cnt > 0) { 3610 /* 3611 * Check if count is greater than 0, this means the memory is 3612 * still being referenced by user code, so we can safely 3613 * update _has_waiters flag. 3614 */ 3615 if (cnt == 1) { 3616 umtxq_unlock(&key); 3617 error = suword32(&sem->_has_waiters, 0); 3618 umtxq_lock(&key); 3619 if (error == -1) 3620 error = EFAULT; 3621 } 3622 umtxq_signal(&key, 1); 3623 } 3624 umtxq_unbusy(&key); 3625 umtxq_unlock(&key); 3626 umtx_key_release(&key); 3627 return (error); 3628 } 3629 #endif 3630 3631 static int 3632 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3633 { 3634 struct umtx_abs_timeout timo; 3635 struct umtx_q *uq; 3636 uint32_t count, flags; 3637 int error, rv; 3638 3639 uq = td->td_umtxq; 3640 flags = fuword32(&sem->_flags); 3641 if (timeout != NULL) 3642 umtx_abs_timeout_init2(&timo, timeout); 3643 3644 again: 3645 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3646 if (error != 0) 3647 return (error); 3648 umtxq_lock(&uq->uq_key); 3649 umtxq_busy(&uq->uq_key); 3650 umtxq_insert(uq); 3651 umtxq_unlock(&uq->uq_key); 3652 rv = fueword32(&sem->_count, &count); 3653 if (rv == -1) { 3654 umtxq_lock(&uq->uq_key); 3655 umtxq_unbusy(&uq->uq_key); 3656 umtxq_remove(uq); 3657 umtxq_unlock(&uq->uq_key); 3658 umtx_key_release(&uq->uq_key); 3659 return (EFAULT); 3660 } 3661 for (;;) { 3662 if (USEM_COUNT(count) != 0) { 3663 umtxq_lock(&uq->uq_key); 3664 umtxq_unbusy(&uq->uq_key); 3665 umtxq_remove(uq); 3666 umtxq_unlock(&uq->uq_key); 3667 umtx_key_release(&uq->uq_key); 3668 return (0); 3669 } 3670 if (count == USEM_HAS_WAITERS) 3671 break; 3672 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3673 if (rv == 0) 3674 break; 3675 umtxq_lock(&uq->uq_key); 3676 umtxq_unbusy(&uq->uq_key); 3677 umtxq_remove(uq); 3678 umtxq_unlock(&uq->uq_key); 3679 umtx_key_release(&uq->uq_key); 3680 if (rv == -1) 3681 return (EFAULT); 3682 rv = thread_check_susp(td, true); 3683 if (rv != 0) 3684 return (rv); 3685 goto again; 3686 } 3687 umtxq_lock(&uq->uq_key); 3688 umtxq_unbusy(&uq->uq_key); 3689 3690 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3691 3692 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3693 error = 0; 3694 else { 3695 umtxq_remove(uq); 3696 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3697 /* A relative timeout cannot be restarted. */ 3698 if (error == ERESTART) 3699 error = EINTR; 3700 if (error == EINTR) { 3701 kern_clock_gettime(curthread, timo.clockid, 3702 &timo.cur); 3703 timespecsub(&timo.end, &timo.cur, 3704 &timeout->_timeout); 3705 } 3706 } 3707 } 3708 umtxq_unlock(&uq->uq_key); 3709 umtx_key_release(&uq->uq_key); 3710 return (error); 3711 } 3712 3713 /* 3714 * Signal a userland semaphore. 3715 */ 3716 static int 3717 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3718 { 3719 struct umtx_key key; 3720 int error, cnt, rv; 3721 uint32_t count, flags; 3722 3723 rv = fueword32(&sem->_flags, &flags); 3724 if (rv == -1) 3725 return (EFAULT); 3726 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3727 return (error); 3728 umtxq_lock(&key); 3729 umtxq_busy(&key); 3730 cnt = umtxq_count(&key); 3731 if (cnt > 0) { 3732 /* 3733 * If this was the last sleeping thread, clear the waiters 3734 * flag in _count. 3735 */ 3736 if (cnt == 1) { 3737 umtxq_unlock(&key); 3738 rv = fueword32(&sem->_count, &count); 3739 while (rv != -1 && count & USEM_HAS_WAITERS) { 3740 rv = casueword32(&sem->_count, count, &count, 3741 count & ~USEM_HAS_WAITERS); 3742 if (rv == 1) { 3743 rv = thread_check_susp(td, true); 3744 if (rv != 0) 3745 break; 3746 } 3747 } 3748 if (rv == -1) 3749 error = EFAULT; 3750 else if (rv > 0) { 3751 error = rv; 3752 } 3753 umtxq_lock(&key); 3754 } 3755 3756 umtxq_signal(&key, 1); 3757 } 3758 umtxq_unbusy(&key); 3759 umtxq_unlock(&key); 3760 umtx_key_release(&key); 3761 return (error); 3762 } 3763 3764 #ifdef COMPAT_FREEBSD10 3765 int 3766 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3767 { 3768 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3769 } 3770 3771 int 3772 freebsd10__umtx_unlock(struct thread *td, 3773 struct freebsd10__umtx_unlock_args *uap) 3774 { 3775 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3776 } 3777 #endif 3778 3779 inline int 3780 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3781 { 3782 int error; 3783 3784 error = copyin(uaddr, tsp, sizeof(*tsp)); 3785 if (error == 0) { 3786 if (!timespecvalid_interval(tsp)) 3787 error = EINVAL; 3788 } 3789 return (error); 3790 } 3791 3792 static inline int 3793 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3794 { 3795 int error; 3796 3797 if (size <= sizeof(tp->_timeout)) { 3798 tp->_clockid = CLOCK_REALTIME; 3799 tp->_flags = 0; 3800 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3801 } else 3802 error = copyin(uaddr, tp, sizeof(*tp)); 3803 if (error != 0) 3804 return (error); 3805 if (!timespecvalid_interval(&tp->_timeout)) 3806 return (EINVAL); 3807 return (0); 3808 } 3809 3810 static int 3811 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3812 struct umtx_robust_lists_params *rb) 3813 { 3814 3815 if (size > sizeof(*rb)) 3816 return (EINVAL); 3817 return (copyin(uaddr, rb, size)); 3818 } 3819 3820 static int 3821 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3822 { 3823 3824 /* 3825 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3826 * and we're only called if sz >= sizeof(timespec) as supplied in the 3827 * copyops. 3828 */ 3829 KASSERT(sz >= sizeof(*tsp), 3830 ("umtx_copyops specifies incorrect sizes")); 3831 3832 return (copyout(tsp, uaddr, sizeof(*tsp))); 3833 } 3834 3835 #ifdef COMPAT_FREEBSD10 3836 static int 3837 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3838 const struct umtx_copyops *ops) 3839 { 3840 struct timespec *ts, timeout; 3841 int error; 3842 3843 /* Allow a null timespec (wait forever). */ 3844 if (uap->uaddr2 == NULL) 3845 ts = NULL; 3846 else { 3847 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3848 if (error != 0) 3849 return (error); 3850 ts = &timeout; 3851 } 3852 #ifdef COMPAT_FREEBSD32 3853 if (ops->compat32) 3854 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3855 #endif 3856 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3857 } 3858 3859 static int 3860 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3861 const struct umtx_copyops *ops) 3862 { 3863 #ifdef COMPAT_FREEBSD32 3864 if (ops->compat32) 3865 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3866 #endif 3867 return (do_unlock_umtx(td, uap->obj, uap->val)); 3868 } 3869 #endif /* COMPAT_FREEBSD10 */ 3870 3871 #if !defined(COMPAT_FREEBSD10) 3872 static int 3873 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3874 const struct umtx_copyops *ops __unused) 3875 { 3876 return (EOPNOTSUPP); 3877 } 3878 #endif /* COMPAT_FREEBSD10 */ 3879 3880 static int 3881 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3882 const struct umtx_copyops *ops) 3883 { 3884 struct _umtx_time timeout, *tm_p; 3885 int error; 3886 3887 if (uap->uaddr2 == NULL) 3888 tm_p = NULL; 3889 else { 3890 error = ops->copyin_umtx_time( 3891 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3892 if (error != 0) 3893 return (error); 3894 tm_p = &timeout; 3895 } 3896 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3897 } 3898 3899 static int 3900 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3901 const struct umtx_copyops *ops) 3902 { 3903 struct _umtx_time timeout, *tm_p; 3904 int error; 3905 3906 if (uap->uaddr2 == NULL) 3907 tm_p = NULL; 3908 else { 3909 error = ops->copyin_umtx_time( 3910 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3911 if (error != 0) 3912 return (error); 3913 tm_p = &timeout; 3914 } 3915 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3916 } 3917 3918 static int 3919 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3920 const struct umtx_copyops *ops) 3921 { 3922 struct _umtx_time *tm_p, timeout; 3923 int error; 3924 3925 if (uap->uaddr2 == NULL) 3926 tm_p = NULL; 3927 else { 3928 error = ops->copyin_umtx_time( 3929 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3930 if (error != 0) 3931 return (error); 3932 tm_p = &timeout; 3933 } 3934 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3935 } 3936 3937 static int 3938 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3939 const struct umtx_copyops *ops __unused) 3940 { 3941 3942 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3943 } 3944 3945 #define BATCH_SIZE 128 3946 static int 3947 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3948 { 3949 char *uaddrs[BATCH_SIZE], **upp; 3950 int count, error, i, pos, tocopy; 3951 3952 upp = (char **)uap->obj; 3953 error = 0; 3954 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3955 pos += tocopy) { 3956 tocopy = MIN(count, BATCH_SIZE); 3957 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3958 if (error != 0) 3959 break; 3960 for (i = 0; i < tocopy; ++i) { 3961 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3962 } 3963 maybe_yield(); 3964 } 3965 return (error); 3966 } 3967 3968 static int 3969 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3970 { 3971 uint32_t uaddrs[BATCH_SIZE], *upp; 3972 int count, error, i, pos, tocopy; 3973 3974 upp = (uint32_t *)uap->obj; 3975 error = 0; 3976 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3977 pos += tocopy) { 3978 tocopy = MIN(count, BATCH_SIZE); 3979 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3980 if (error != 0) 3981 break; 3982 for (i = 0; i < tocopy; ++i) { 3983 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3984 INT_MAX, 1); 3985 } 3986 maybe_yield(); 3987 } 3988 return (error); 3989 } 3990 3991 static int 3992 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3993 const struct umtx_copyops *ops) 3994 { 3995 3996 if (ops->compat32) 3997 return (__umtx_op_nwake_private_compat32(td, uap)); 3998 return (__umtx_op_nwake_private_native(td, uap)); 3999 } 4000 4001 static int 4002 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4003 const struct umtx_copyops *ops __unused) 4004 { 4005 4006 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4007 } 4008 4009 static int 4010 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4011 const struct umtx_copyops *ops) 4012 { 4013 struct _umtx_time *tm_p, timeout; 4014 int error; 4015 4016 /* Allow a null timespec (wait forever). */ 4017 if (uap->uaddr2 == NULL) 4018 tm_p = NULL; 4019 else { 4020 error = ops->copyin_umtx_time( 4021 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4022 if (error != 0) 4023 return (error); 4024 tm_p = &timeout; 4025 } 4026 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4027 } 4028 4029 static int 4030 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4031 const struct umtx_copyops *ops __unused) 4032 { 4033 4034 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4035 } 4036 4037 static int 4038 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4039 const struct umtx_copyops *ops) 4040 { 4041 struct _umtx_time *tm_p, timeout; 4042 int error; 4043 4044 /* Allow a null timespec (wait forever). */ 4045 if (uap->uaddr2 == NULL) 4046 tm_p = NULL; 4047 else { 4048 error = ops->copyin_umtx_time( 4049 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4050 if (error != 0) 4051 return (error); 4052 tm_p = &timeout; 4053 } 4054 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4055 } 4056 4057 static int 4058 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4059 const struct umtx_copyops *ops __unused) 4060 { 4061 4062 return (do_wake_umutex(td, uap->obj)); 4063 } 4064 4065 static int 4066 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4067 const struct umtx_copyops *ops __unused) 4068 { 4069 4070 return (do_unlock_umutex(td, uap->obj, false)); 4071 } 4072 4073 static int 4074 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4075 const struct umtx_copyops *ops __unused) 4076 { 4077 4078 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4079 } 4080 4081 static int 4082 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4083 const struct umtx_copyops *ops) 4084 { 4085 struct timespec *ts, timeout; 4086 int error; 4087 4088 /* Allow a null timespec (wait forever). */ 4089 if (uap->uaddr2 == NULL) 4090 ts = NULL; 4091 else { 4092 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4093 if (error != 0) 4094 return (error); 4095 ts = &timeout; 4096 } 4097 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4098 } 4099 4100 static int 4101 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4102 const struct umtx_copyops *ops __unused) 4103 { 4104 4105 return (do_cv_signal(td, uap->obj)); 4106 } 4107 4108 static int 4109 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4110 const struct umtx_copyops *ops __unused) 4111 { 4112 4113 return (do_cv_broadcast(td, uap->obj)); 4114 } 4115 4116 static int 4117 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4118 const struct umtx_copyops *ops) 4119 { 4120 struct _umtx_time timeout; 4121 int error; 4122 4123 /* Allow a null timespec (wait forever). */ 4124 if (uap->uaddr2 == NULL) { 4125 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4126 } else { 4127 error = ops->copyin_umtx_time(uap->uaddr2, 4128 (size_t)uap->uaddr1, &timeout); 4129 if (error != 0) 4130 return (error); 4131 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4132 } 4133 return (error); 4134 } 4135 4136 static int 4137 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4138 const struct umtx_copyops *ops) 4139 { 4140 struct _umtx_time timeout; 4141 int error; 4142 4143 /* Allow a null timespec (wait forever). */ 4144 if (uap->uaddr2 == NULL) { 4145 error = do_rw_wrlock(td, uap->obj, 0); 4146 } else { 4147 error = ops->copyin_umtx_time(uap->uaddr2, 4148 (size_t)uap->uaddr1, &timeout); 4149 if (error != 0) 4150 return (error); 4151 4152 error = do_rw_wrlock(td, uap->obj, &timeout); 4153 } 4154 return (error); 4155 } 4156 4157 static int 4158 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4159 const struct umtx_copyops *ops __unused) 4160 { 4161 4162 return (do_rw_unlock(td, uap->obj)); 4163 } 4164 4165 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4166 static int 4167 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4168 const struct umtx_copyops *ops) 4169 { 4170 struct _umtx_time *tm_p, timeout; 4171 int error; 4172 4173 /* Allow a null timespec (wait forever). */ 4174 if (uap->uaddr2 == NULL) 4175 tm_p = NULL; 4176 else { 4177 error = ops->copyin_umtx_time( 4178 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4179 if (error != 0) 4180 return (error); 4181 tm_p = &timeout; 4182 } 4183 return (do_sem_wait(td, uap->obj, tm_p)); 4184 } 4185 4186 static int 4187 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4188 const struct umtx_copyops *ops __unused) 4189 { 4190 4191 return (do_sem_wake(td, uap->obj)); 4192 } 4193 #endif 4194 4195 static int 4196 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4197 const struct umtx_copyops *ops __unused) 4198 { 4199 4200 return (do_wake2_umutex(td, uap->obj, uap->val)); 4201 } 4202 4203 static int 4204 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4205 const struct umtx_copyops *ops) 4206 { 4207 struct _umtx_time *tm_p, timeout; 4208 size_t uasize; 4209 int error; 4210 4211 /* Allow a null timespec (wait forever). */ 4212 if (uap->uaddr2 == NULL) { 4213 uasize = 0; 4214 tm_p = NULL; 4215 } else { 4216 uasize = (size_t)uap->uaddr1; 4217 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4218 if (error != 0) 4219 return (error); 4220 tm_p = &timeout; 4221 } 4222 error = do_sem2_wait(td, uap->obj, tm_p); 4223 if (error == EINTR && uap->uaddr2 != NULL && 4224 (timeout._flags & UMTX_ABSTIME) == 0 && 4225 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4226 error = ops->copyout_timeout( 4227 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4228 uasize - ops->umtx_time_sz, &timeout._timeout); 4229 if (error == 0) { 4230 error = EINTR; 4231 } 4232 } 4233 4234 return (error); 4235 } 4236 4237 static int 4238 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4239 const struct umtx_copyops *ops __unused) 4240 { 4241 4242 return (do_sem2_wake(td, uap->obj)); 4243 } 4244 4245 #define USHM_OBJ_UMTX(o) \ 4246 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4247 4248 #define USHMF_REG_LINKED 0x0001 4249 #define USHMF_OBJ_LINKED 0x0002 4250 struct umtx_shm_reg { 4251 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4252 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4253 struct umtx_key ushm_key; 4254 struct ucred *ushm_cred; 4255 struct shmfd *ushm_obj; 4256 u_int ushm_refcnt; 4257 u_int ushm_flags; 4258 }; 4259 4260 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4261 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4262 4263 static uma_zone_t umtx_shm_reg_zone; 4264 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4265 static struct mtx umtx_shm_lock; 4266 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4267 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4268 4269 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4270 4271 static void 4272 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4273 { 4274 struct umtx_shm_reg_head d; 4275 struct umtx_shm_reg *reg, *reg1; 4276 4277 TAILQ_INIT(&d); 4278 mtx_lock(&umtx_shm_lock); 4279 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4280 mtx_unlock(&umtx_shm_lock); 4281 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4282 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4283 umtx_shm_free_reg(reg); 4284 } 4285 } 4286 4287 static struct task umtx_shm_reg_delfree_task = 4288 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4289 4290 static struct umtx_shm_reg * 4291 umtx_shm_find_reg_locked(const struct umtx_key *key) 4292 { 4293 struct umtx_shm_reg *reg; 4294 struct umtx_shm_reg_head *reg_head; 4295 4296 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4297 mtx_assert(&umtx_shm_lock, MA_OWNED); 4298 reg_head = &umtx_shm_registry[key->hash]; 4299 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4300 KASSERT(reg->ushm_key.shared, 4301 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4302 if (reg->ushm_key.info.shared.object == 4303 key->info.shared.object && 4304 reg->ushm_key.info.shared.offset == 4305 key->info.shared.offset) { 4306 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4307 KASSERT(reg->ushm_refcnt > 0, 4308 ("reg %p refcnt 0 onlist", reg)); 4309 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4310 ("reg %p not linked", reg)); 4311 reg->ushm_refcnt++; 4312 return (reg); 4313 } 4314 } 4315 return (NULL); 4316 } 4317 4318 static struct umtx_shm_reg * 4319 umtx_shm_find_reg(const struct umtx_key *key) 4320 { 4321 struct umtx_shm_reg *reg; 4322 4323 mtx_lock(&umtx_shm_lock); 4324 reg = umtx_shm_find_reg_locked(key); 4325 mtx_unlock(&umtx_shm_lock); 4326 return (reg); 4327 } 4328 4329 static void 4330 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4331 { 4332 4333 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4334 crfree(reg->ushm_cred); 4335 shm_drop(reg->ushm_obj); 4336 uma_zfree(umtx_shm_reg_zone, reg); 4337 } 4338 4339 static bool 4340 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4341 { 4342 bool res; 4343 4344 mtx_assert(&umtx_shm_lock, MA_OWNED); 4345 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4346 reg->ushm_refcnt--; 4347 res = reg->ushm_refcnt == 0; 4348 if (res || force) { 4349 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4350 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4351 reg, ushm_reg_link); 4352 reg->ushm_flags &= ~USHMF_REG_LINKED; 4353 } 4354 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4355 LIST_REMOVE(reg, ushm_obj_link); 4356 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4357 } 4358 } 4359 return (res); 4360 } 4361 4362 static void 4363 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4364 { 4365 vm_object_t object; 4366 bool dofree; 4367 4368 if (force) { 4369 object = reg->ushm_obj->shm_object; 4370 VM_OBJECT_WLOCK(object); 4371 object->flags |= OBJ_UMTXDEAD; 4372 VM_OBJECT_WUNLOCK(object); 4373 } 4374 mtx_lock(&umtx_shm_lock); 4375 dofree = umtx_shm_unref_reg_locked(reg, force); 4376 mtx_unlock(&umtx_shm_lock); 4377 if (dofree) 4378 umtx_shm_free_reg(reg); 4379 } 4380 4381 void 4382 umtx_shm_object_init(vm_object_t object) 4383 { 4384 4385 LIST_INIT(USHM_OBJ_UMTX(object)); 4386 } 4387 4388 void 4389 umtx_shm_object_terminated(vm_object_t object) 4390 { 4391 struct umtx_shm_reg *reg, *reg1; 4392 bool dofree; 4393 4394 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4395 return; 4396 4397 dofree = false; 4398 mtx_lock(&umtx_shm_lock); 4399 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4400 if (umtx_shm_unref_reg_locked(reg, true)) { 4401 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4402 ushm_reg_link); 4403 dofree = true; 4404 } 4405 } 4406 mtx_unlock(&umtx_shm_lock); 4407 if (dofree) 4408 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4409 } 4410 4411 static int 4412 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4413 struct umtx_shm_reg **res) 4414 { 4415 struct umtx_shm_reg *reg, *reg1; 4416 struct ucred *cred; 4417 int error; 4418 4419 reg = umtx_shm_find_reg(key); 4420 if (reg != NULL) { 4421 *res = reg; 4422 return (0); 4423 } 4424 cred = td->td_ucred; 4425 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4426 return (ENOMEM); 4427 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4428 reg->ushm_refcnt = 1; 4429 bcopy(key, ®->ushm_key, sizeof(*key)); 4430 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4431 reg->ushm_cred = crhold(cred); 4432 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4433 if (error != 0) { 4434 umtx_shm_free_reg(reg); 4435 return (error); 4436 } 4437 mtx_lock(&umtx_shm_lock); 4438 reg1 = umtx_shm_find_reg_locked(key); 4439 if (reg1 != NULL) { 4440 mtx_unlock(&umtx_shm_lock); 4441 umtx_shm_free_reg(reg); 4442 *res = reg1; 4443 return (0); 4444 } 4445 reg->ushm_refcnt++; 4446 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4447 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4448 ushm_obj_link); 4449 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4450 mtx_unlock(&umtx_shm_lock); 4451 *res = reg; 4452 return (0); 4453 } 4454 4455 static int 4456 umtx_shm_alive(struct thread *td, void *addr) 4457 { 4458 vm_map_t map; 4459 vm_map_entry_t entry; 4460 vm_object_t object; 4461 vm_pindex_t pindex; 4462 vm_prot_t prot; 4463 int res, ret; 4464 boolean_t wired; 4465 4466 map = &td->td_proc->p_vmspace->vm_map; 4467 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4468 &object, &pindex, &prot, &wired); 4469 if (res != KERN_SUCCESS) 4470 return (EFAULT); 4471 if (object == NULL) 4472 ret = EINVAL; 4473 else 4474 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4475 vm_map_lookup_done(map, entry); 4476 return (ret); 4477 } 4478 4479 static void 4480 umtx_shm_init(void) 4481 { 4482 int i; 4483 4484 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4485 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4486 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4487 for (i = 0; i < nitems(umtx_shm_registry); i++) 4488 TAILQ_INIT(&umtx_shm_registry[i]); 4489 } 4490 4491 static int 4492 umtx_shm(struct thread *td, void *addr, u_int flags) 4493 { 4494 struct umtx_key key; 4495 struct umtx_shm_reg *reg; 4496 struct file *fp; 4497 int error, fd; 4498 4499 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4500 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4501 return (EINVAL); 4502 if ((flags & UMTX_SHM_ALIVE) != 0) 4503 return (umtx_shm_alive(td, addr)); 4504 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4505 if (error != 0) 4506 return (error); 4507 KASSERT(key.shared == 1, ("non-shared key")); 4508 if ((flags & UMTX_SHM_CREAT) != 0) { 4509 error = umtx_shm_create_reg(td, &key, ®); 4510 } else { 4511 reg = umtx_shm_find_reg(&key); 4512 if (reg == NULL) 4513 error = ESRCH; 4514 } 4515 umtx_key_release(&key); 4516 if (error != 0) 4517 return (error); 4518 KASSERT(reg != NULL, ("no reg")); 4519 if ((flags & UMTX_SHM_DESTROY) != 0) { 4520 umtx_shm_unref_reg(reg, true); 4521 } else { 4522 #if 0 4523 #ifdef MAC 4524 error = mac_posixshm_check_open(td->td_ucred, 4525 reg->ushm_obj, FFLAGS(O_RDWR)); 4526 if (error == 0) 4527 #endif 4528 error = shm_access(reg->ushm_obj, td->td_ucred, 4529 FFLAGS(O_RDWR)); 4530 if (error == 0) 4531 #endif 4532 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4533 if (error == 0) { 4534 shm_hold(reg->ushm_obj); 4535 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4536 &shm_ops); 4537 td->td_retval[0] = fd; 4538 fdrop(fp, td); 4539 } 4540 } 4541 umtx_shm_unref_reg(reg, false); 4542 return (error); 4543 } 4544 4545 static int 4546 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4547 const struct umtx_copyops *ops __unused) 4548 { 4549 4550 return (umtx_shm(td, uap->uaddr1, uap->val)); 4551 } 4552 4553 static int 4554 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4555 const struct umtx_copyops *ops) 4556 { 4557 struct umtx_robust_lists_params rb; 4558 int error; 4559 4560 if (ops->compat32) { 4561 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4562 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4563 td->td_rb_inact != 0)) 4564 return (EBUSY); 4565 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4566 return (EBUSY); 4567 } 4568 4569 bzero(&rb, sizeof(rb)); 4570 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4571 if (error != 0) 4572 return (error); 4573 4574 if (ops->compat32) 4575 td->td_pflags2 |= TDP2_COMPAT32RB; 4576 4577 td->td_rb_list = rb.robust_list_offset; 4578 td->td_rbp_list = rb.robust_priv_list_offset; 4579 td->td_rb_inact = rb.robust_inact_offset; 4580 return (0); 4581 } 4582 4583 #if defined(__i386__) || defined(__amd64__) 4584 /* 4585 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4586 * 32-bit time_t there. Other architectures just need the i386 definitions 4587 * along with their standard compat32. 4588 */ 4589 struct timespecx32 { 4590 int64_t tv_sec; 4591 int32_t tv_nsec; 4592 }; 4593 4594 struct umtx_timex32 { 4595 struct timespecx32 _timeout; 4596 uint32_t _flags; 4597 uint32_t _clockid; 4598 }; 4599 4600 #ifndef __i386__ 4601 #define timespeci386 timespec32 4602 #define umtx_timei386 umtx_time32 4603 #endif 4604 #else /* !__i386__ && !__amd64__ */ 4605 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4606 struct timespeci386 { 4607 int32_t tv_sec; 4608 int32_t tv_nsec; 4609 }; 4610 4611 struct umtx_timei386 { 4612 struct timespeci386 _timeout; 4613 uint32_t _flags; 4614 uint32_t _clockid; 4615 }; 4616 4617 #if defined(__LP64__) 4618 #define timespecx32 timespec32 4619 #define umtx_timex32 umtx_time32 4620 #endif 4621 #endif 4622 4623 static int 4624 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4625 struct umtx_robust_lists_params *rbp) 4626 { 4627 struct umtx_robust_lists_params_compat32 rb32; 4628 int error; 4629 4630 if (size > sizeof(rb32)) 4631 return (EINVAL); 4632 bzero(&rb32, sizeof(rb32)); 4633 error = copyin(uaddr, &rb32, size); 4634 if (error != 0) 4635 return (error); 4636 CP(rb32, *rbp, robust_list_offset); 4637 CP(rb32, *rbp, robust_priv_list_offset); 4638 CP(rb32, *rbp, robust_inact_offset); 4639 return (0); 4640 } 4641 4642 #ifndef __i386__ 4643 static inline int 4644 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4645 { 4646 struct timespeci386 ts32; 4647 int error; 4648 4649 error = copyin(uaddr, &ts32, sizeof(ts32)); 4650 if (error == 0) { 4651 if (!timespecvalid_interval(&ts32)) 4652 error = EINVAL; 4653 else { 4654 CP(ts32, *tsp, tv_sec); 4655 CP(ts32, *tsp, tv_nsec); 4656 } 4657 } 4658 return (error); 4659 } 4660 4661 static inline int 4662 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4663 { 4664 struct umtx_timei386 t32; 4665 int error; 4666 4667 t32._clockid = CLOCK_REALTIME; 4668 t32._flags = 0; 4669 if (size <= sizeof(t32._timeout)) 4670 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4671 else 4672 error = copyin(uaddr, &t32, sizeof(t32)); 4673 if (error != 0) 4674 return (error); 4675 if (!timespecvalid_interval(&t32._timeout)) 4676 return (EINVAL); 4677 TS_CP(t32, *tp, _timeout); 4678 CP(t32, *tp, _flags); 4679 CP(t32, *tp, _clockid); 4680 return (0); 4681 } 4682 4683 static int 4684 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4685 { 4686 struct timespeci386 remain32 = { 4687 .tv_sec = tsp->tv_sec, 4688 .tv_nsec = tsp->tv_nsec, 4689 }; 4690 4691 /* 4692 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4693 * and we're only called if sz >= sizeof(timespec) as supplied in the 4694 * copyops. 4695 */ 4696 KASSERT(sz >= sizeof(remain32), 4697 ("umtx_copyops specifies incorrect sizes")); 4698 4699 return (copyout(&remain32, uaddr, sizeof(remain32))); 4700 } 4701 #endif /* !__i386__ */ 4702 4703 #if defined(__i386__) || defined(__LP64__) 4704 static inline int 4705 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4706 { 4707 struct timespecx32 ts32; 4708 int error; 4709 4710 error = copyin(uaddr, &ts32, sizeof(ts32)); 4711 if (error == 0) { 4712 if (!timespecvalid_interval(&ts32)) 4713 error = EINVAL; 4714 else { 4715 CP(ts32, *tsp, tv_sec); 4716 CP(ts32, *tsp, tv_nsec); 4717 } 4718 } 4719 return (error); 4720 } 4721 4722 static inline int 4723 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4724 { 4725 struct umtx_timex32 t32; 4726 int error; 4727 4728 t32._clockid = CLOCK_REALTIME; 4729 t32._flags = 0; 4730 if (size <= sizeof(t32._timeout)) 4731 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4732 else 4733 error = copyin(uaddr, &t32, sizeof(t32)); 4734 if (error != 0) 4735 return (error); 4736 if (!timespecvalid_interval(&t32._timeout)) 4737 return (EINVAL); 4738 TS_CP(t32, *tp, _timeout); 4739 CP(t32, *tp, _flags); 4740 CP(t32, *tp, _clockid); 4741 return (0); 4742 } 4743 4744 static int 4745 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4746 { 4747 struct timespecx32 remain32 = { 4748 .tv_sec = tsp->tv_sec, 4749 .tv_nsec = tsp->tv_nsec, 4750 }; 4751 4752 /* 4753 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4754 * and we're only called if sz >= sizeof(timespec) as supplied in the 4755 * copyops. 4756 */ 4757 KASSERT(sz >= sizeof(remain32), 4758 ("umtx_copyops specifies incorrect sizes")); 4759 4760 return (copyout(&remain32, uaddr, sizeof(remain32))); 4761 } 4762 #endif /* __i386__ || __LP64__ */ 4763 4764 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4765 const struct umtx_copyops *umtx_ops); 4766 4767 static const _umtx_op_func op_table[] = { 4768 #ifdef COMPAT_FREEBSD10 4769 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4770 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4771 #else 4772 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4773 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4774 #endif 4775 [UMTX_OP_WAIT] = __umtx_op_wait, 4776 [UMTX_OP_WAKE] = __umtx_op_wake, 4777 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4778 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4779 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4780 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4781 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4782 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4783 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4784 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4785 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4786 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4787 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4788 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4789 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4790 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4791 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4792 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4793 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4794 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4795 #else 4796 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4797 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4798 #endif 4799 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4800 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4801 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4802 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4803 [UMTX_OP_SHM] = __umtx_op_shm, 4804 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4805 }; 4806 4807 static const struct umtx_copyops umtx_native_ops = { 4808 .copyin_timeout = umtx_copyin_timeout, 4809 .copyin_umtx_time = umtx_copyin_umtx_time, 4810 .copyin_robust_lists = umtx_copyin_robust_lists, 4811 .copyout_timeout = umtx_copyout_timeout, 4812 .timespec_sz = sizeof(struct timespec), 4813 .umtx_time_sz = sizeof(struct _umtx_time), 4814 }; 4815 4816 #ifndef __i386__ 4817 static const struct umtx_copyops umtx_native_opsi386 = { 4818 .copyin_timeout = umtx_copyin_timeouti386, 4819 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4820 .copyin_robust_lists = umtx_copyin_robust_lists32, 4821 .copyout_timeout = umtx_copyout_timeouti386, 4822 .timespec_sz = sizeof(struct timespeci386), 4823 .umtx_time_sz = sizeof(struct umtx_timei386), 4824 .compat32 = true, 4825 }; 4826 #endif 4827 4828 #if defined(__i386__) || defined(__LP64__) 4829 /* i386 can emulate other 32-bit archs, too! */ 4830 static const struct umtx_copyops umtx_native_opsx32 = { 4831 .copyin_timeout = umtx_copyin_timeoutx32, 4832 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4833 .copyin_robust_lists = umtx_copyin_robust_lists32, 4834 .copyout_timeout = umtx_copyout_timeoutx32, 4835 .timespec_sz = sizeof(struct timespecx32), 4836 .umtx_time_sz = sizeof(struct umtx_timex32), 4837 .compat32 = true, 4838 }; 4839 4840 #ifdef COMPAT_FREEBSD32 4841 #ifdef __amd64__ 4842 #define umtx_native_ops32 umtx_native_opsi386 4843 #else 4844 #define umtx_native_ops32 umtx_native_opsx32 4845 #endif 4846 #endif /* COMPAT_FREEBSD32 */ 4847 #endif /* __i386__ || __LP64__ */ 4848 4849 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4850 4851 static int 4852 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4853 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4854 { 4855 struct _umtx_op_args uap = { 4856 .obj = obj, 4857 .op = op & ~UMTX_OP__FLAGS, 4858 .val = val, 4859 .uaddr1 = uaddr1, 4860 .uaddr2 = uaddr2 4861 }; 4862 4863 if ((uap.op >= nitems(op_table))) 4864 return (EINVAL); 4865 return ((*op_table[uap.op])(td, &uap, ops)); 4866 } 4867 4868 int 4869 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4870 { 4871 static const struct umtx_copyops *umtx_ops; 4872 4873 umtx_ops = &umtx_native_ops; 4874 #ifdef __LP64__ 4875 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4876 if ((uap->op & UMTX_OP__I386) != 0) 4877 umtx_ops = &umtx_native_opsi386; 4878 else 4879 umtx_ops = &umtx_native_opsx32; 4880 } 4881 #elif !defined(__i386__) 4882 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4883 if ((uap->op & UMTX_OP__I386) != 0) 4884 umtx_ops = &umtx_native_opsi386; 4885 #else 4886 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4887 if ((uap->op & UMTX_OP__32BIT) != 0) 4888 umtx_ops = &umtx_native_opsx32; 4889 #endif 4890 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4891 uap->uaddr2, umtx_ops)); 4892 } 4893 4894 #ifdef COMPAT_FREEBSD32 4895 #ifdef COMPAT_FREEBSD10 4896 int 4897 freebsd10_freebsd32__umtx_lock(struct thread *td, 4898 struct freebsd10_freebsd32__umtx_lock_args *uap) 4899 { 4900 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4901 } 4902 4903 int 4904 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4905 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4906 { 4907 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4908 } 4909 #endif /* COMPAT_FREEBSD10 */ 4910 4911 int 4912 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4913 { 4914 4915 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4916 uap->uaddr2, &umtx_native_ops32)); 4917 } 4918 #endif /* COMPAT_FREEBSD32 */ 4919 4920 void 4921 umtx_thread_init(struct thread *td) 4922 { 4923 4924 td->td_umtxq = umtxq_alloc(); 4925 td->td_umtxq->uq_thread = td; 4926 } 4927 4928 void 4929 umtx_thread_fini(struct thread *td) 4930 { 4931 4932 umtxq_free(td->td_umtxq); 4933 } 4934 4935 /* 4936 * It will be called when new thread is created, e.g fork(). 4937 */ 4938 void 4939 umtx_thread_alloc(struct thread *td) 4940 { 4941 struct umtx_q *uq; 4942 4943 uq = td->td_umtxq; 4944 uq->uq_inherited_pri = PRI_MAX; 4945 4946 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4947 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4948 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4949 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4950 } 4951 4952 /* 4953 * exec() hook. 4954 * 4955 * Clear robust lists for all process' threads, not delaying the 4956 * cleanup to thread exit, since the relevant address space is 4957 * destroyed right now. 4958 */ 4959 void 4960 umtx_exec(struct proc *p) 4961 { 4962 struct thread *td; 4963 4964 KASSERT(p == curproc, ("need curproc")); 4965 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4966 (p->p_flag & P_STOPPED_SINGLE) != 0, 4967 ("curproc must be single-threaded")); 4968 /* 4969 * There is no need to lock the list as only this thread can be 4970 * running. 4971 */ 4972 FOREACH_THREAD_IN_PROC(p, td) { 4973 KASSERT(td == curthread || 4974 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4975 ("running thread %p %p", p, td)); 4976 umtx_thread_cleanup(td); 4977 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4978 } 4979 } 4980 4981 /* 4982 * thread exit hook. 4983 */ 4984 void 4985 umtx_thread_exit(struct thread *td) 4986 { 4987 4988 umtx_thread_cleanup(td); 4989 } 4990 4991 static int 4992 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4993 { 4994 u_long res1; 4995 uint32_t res32; 4996 int error; 4997 4998 if (compat32) { 4999 error = fueword32((void *)ptr, &res32); 5000 if (error == 0) 5001 res1 = res32; 5002 } else { 5003 error = fueword((void *)ptr, &res1); 5004 } 5005 if (error == 0) 5006 *res = res1; 5007 else 5008 error = EFAULT; 5009 return (error); 5010 } 5011 5012 static void 5013 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5014 bool compat32) 5015 { 5016 struct umutex32 m32; 5017 5018 if (compat32) { 5019 memcpy(&m32, m, sizeof(m32)); 5020 *rb_list = m32.m_rb_lnk; 5021 } else { 5022 *rb_list = m->m_rb_lnk; 5023 } 5024 } 5025 5026 static int 5027 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5028 bool compat32) 5029 { 5030 struct umutex m; 5031 int error; 5032 5033 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5034 error = copyin((void *)rbp, &m, sizeof(m)); 5035 if (error != 0) 5036 return (error); 5037 if (rb_list != NULL) 5038 umtx_read_rb_list(td, &m, rb_list, compat32); 5039 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5040 return (EINVAL); 5041 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5042 /* inact is cleared after unlock, allow the inconsistency */ 5043 return (inact ? 0 : EINVAL); 5044 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5045 } 5046 5047 static void 5048 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5049 const char *name, bool compat32) 5050 { 5051 int error, i; 5052 uintptr_t rbp; 5053 bool inact; 5054 5055 if (rb_list == 0) 5056 return; 5057 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5058 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5059 if (rbp == *rb_inact) { 5060 inact = true; 5061 *rb_inact = 0; 5062 } else 5063 inact = false; 5064 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5065 } 5066 if (i == umtx_max_rb && umtx_verbose_rb) { 5067 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5068 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5069 } 5070 if (error != 0 && umtx_verbose_rb) { 5071 uprintf("comm %s pid %d: handling %srb error %d\n", 5072 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5073 } 5074 } 5075 5076 /* 5077 * Clean up umtx data. 5078 */ 5079 static void 5080 umtx_thread_cleanup(struct thread *td) 5081 { 5082 struct umtx_q *uq; 5083 struct umtx_pi *pi; 5084 uintptr_t rb_inact; 5085 bool compat32; 5086 5087 /* 5088 * Disown pi mutexes. 5089 */ 5090 uq = td->td_umtxq; 5091 if (uq != NULL) { 5092 if (uq->uq_inherited_pri != PRI_MAX || 5093 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5094 mtx_lock(&umtx_lock); 5095 uq->uq_inherited_pri = PRI_MAX; 5096 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5097 pi->pi_owner = NULL; 5098 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5099 } 5100 mtx_unlock(&umtx_lock); 5101 } 5102 sched_lend_user_prio_cond(td, PRI_MAX); 5103 } 5104 5105 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5106 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5107 5108 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5109 return; 5110 5111 /* 5112 * Handle terminated robust mutexes. Must be done after 5113 * robust pi disown, otherwise unlock could see unowned 5114 * entries. 5115 */ 5116 rb_inact = td->td_rb_inact; 5117 if (rb_inact != 0) 5118 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5119 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5120 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5121 if (rb_inact != 0) 5122 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5123 } 5124