1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/time.h> 63 #include <sys/eventhandler.h> 64 #include <sys/umtx.h> 65 #include <sys/umtxvar.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #include <compat/freebsd32/freebsd32.h> 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 92 #ifdef INVARIANTS 93 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 94 struct umtxq_chain *uc; \ 95 \ 96 uc = umtxq_getchain(key); \ 97 mtx_assert(&uc->uc_lock, MA_OWNED); \ 98 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 99 } while (0) 100 #else 101 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 102 #endif 103 104 /* 105 * Don't propagate time-sharing priority, there is a security reason, 106 * a user can simply introduce PI-mutex, let thread A lock the mutex, 107 * and let another thread B block on the mutex, because B is 108 * sleeping, its priority will be boosted, this causes A's priority to 109 * be boosted via priority propagating too and will never be lowered even 110 * if it is using 100%CPU, this is unfair to other processes. 111 */ 112 113 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 114 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 115 PRI_MAX_TIMESHARE : (td)->td_user_pri) 116 117 #define GOLDEN_RATIO_PRIME 2654404609U 118 #ifndef UMTX_CHAINS 119 #define UMTX_CHAINS 512 120 #endif 121 #define UMTX_SHIFTS (__WORD_BIT - 9) 122 123 #define GET_SHARE(flags) \ 124 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 125 126 #define BUSY_SPINS 200 127 128 struct umtx_copyops { 129 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 130 int (*copyin_umtx_time)(const void *uaddr, size_t size, 131 struct _umtx_time *tp); 132 int (*copyin_robust_lists)(const void *uaddr, size_t size, 133 struct umtx_robust_lists_params *rbp); 134 int (*copyout_timeout)(void *uaddr, size_t size, 135 struct timespec *tsp); 136 const size_t timespec_sz; 137 const size_t umtx_time_sz; 138 const bool compat32; 139 }; 140 141 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 142 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 143 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 144 145 int umtx_shm_vnobj_persistent = 0; 146 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 147 &umtx_shm_vnobj_persistent, 0, 148 "False forces destruction of umtx attached to file, on last close"); 149 static int umtx_max_rb = 1000; 150 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 151 &umtx_max_rb, 0, 152 "Maximum number of robust mutexes allowed for each thread"); 153 154 static uma_zone_t umtx_pi_zone; 155 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 156 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 157 static int umtx_pi_allocated; 158 159 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 160 "umtx debug"); 161 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 162 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 163 static int umtx_verbose_rb = 1; 164 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 165 &umtx_verbose_rb, 0, 166 ""); 167 168 #ifdef UMTX_PROFILING 169 static long max_length; 170 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 171 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 172 "umtx chain stats"); 173 #endif 174 175 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 176 const struct _umtx_time *umtxtime); 177 178 static void umtx_shm_init(void); 179 static void umtxq_sysinit(void *); 180 static void umtxq_hash(struct umtx_key *key); 181 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 182 bool rb); 183 static void umtx_thread_cleanup(struct thread *td); 184 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 185 186 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 187 188 static struct mtx umtx_lock; 189 190 #ifdef UMTX_PROFILING 191 static void 192 umtx_init_profiling(void) 193 { 194 struct sysctl_oid *chain_oid; 195 char chain_name[10]; 196 int i; 197 198 for (i = 0; i < UMTX_CHAINS; ++i) { 199 snprintf(chain_name, sizeof(chain_name), "%d", i); 200 chain_oid = SYSCTL_ADD_NODE(NULL, 201 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 202 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 203 "umtx hash stats"); 204 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 205 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 206 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 207 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 208 } 209 } 210 211 static int 212 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 213 { 214 char buf[512]; 215 struct sbuf sb; 216 struct umtxq_chain *uc; 217 u_int fract, i, j, tot, whole; 218 u_int sf0, sf1, sf2, sf3, sf4; 219 u_int si0, si1, si2, si3, si4; 220 u_int sw0, sw1, sw2, sw3, sw4; 221 222 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 223 for (i = 0; i < 2; i++) { 224 tot = 0; 225 for (j = 0; j < UMTX_CHAINS; ++j) { 226 uc = &umtxq_chains[i][j]; 227 mtx_lock(&uc->uc_lock); 228 tot += uc->max_length; 229 mtx_unlock(&uc->uc_lock); 230 } 231 if (tot == 0) 232 sbuf_printf(&sb, "%u) Empty ", i); 233 else { 234 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 235 si0 = si1 = si2 = si3 = si4 = 0; 236 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 237 for (j = 0; j < UMTX_CHAINS; j++) { 238 uc = &umtxq_chains[i][j]; 239 mtx_lock(&uc->uc_lock); 240 whole = uc->max_length * 100; 241 mtx_unlock(&uc->uc_lock); 242 fract = (whole % tot) * 100; 243 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 244 sf0 = fract; 245 si0 = j; 246 sw0 = whole; 247 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 248 sf1)) { 249 sf1 = fract; 250 si1 = j; 251 sw1 = whole; 252 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 253 sf2)) { 254 sf2 = fract; 255 si2 = j; 256 sw2 = whole; 257 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 258 sf3)) { 259 sf3 = fract; 260 si3 = j; 261 sw3 = whole; 262 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 263 sf4)) { 264 sf4 = fract; 265 si4 = j; 266 sw4 = whole; 267 } 268 } 269 sbuf_printf(&sb, "queue %u:\n", i); 270 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 271 sf0 / tot, si0); 272 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 273 sf1 / tot, si1); 274 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 275 sf2 / tot, si2); 276 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 277 sf3 / tot, si3); 278 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 279 sf4 / tot, si4); 280 } 281 } 282 sbuf_trim(&sb); 283 sbuf_finish(&sb); 284 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 285 sbuf_delete(&sb); 286 return (0); 287 } 288 289 static int 290 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 291 { 292 struct umtxq_chain *uc; 293 u_int i, j; 294 int clear, error; 295 296 clear = 0; 297 error = sysctl_handle_int(oidp, &clear, 0, req); 298 if (error != 0 || req->newptr == NULL) 299 return (error); 300 301 if (clear != 0) { 302 for (i = 0; i < 2; ++i) { 303 for (j = 0; j < UMTX_CHAINS; ++j) { 304 uc = &umtxq_chains[i][j]; 305 mtx_lock(&uc->uc_lock); 306 uc->length = 0; 307 uc->max_length = 0; 308 mtx_unlock(&uc->uc_lock); 309 } 310 } 311 } 312 return (0); 313 } 314 315 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 316 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 317 sysctl_debug_umtx_chains_clear, "I", 318 "Clear umtx chains statistics"); 319 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 320 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 321 sysctl_debug_umtx_chains_peaks, "A", 322 "Highest peaks in chains max length"); 323 #endif 324 325 static void 326 umtxq_sysinit(void *arg __unused) 327 { 328 int i, j; 329 330 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 331 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 332 for (i = 0; i < 2; ++i) { 333 for (j = 0; j < UMTX_CHAINS; ++j) { 334 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 335 MTX_DEF | MTX_DUPOK); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 337 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 338 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 339 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 340 umtxq_chains[i][j].uc_busy = 0; 341 umtxq_chains[i][j].uc_waiters = 0; 342 #ifdef UMTX_PROFILING 343 umtxq_chains[i][j].length = 0; 344 umtxq_chains[i][j].max_length = 0; 345 #endif 346 } 347 } 348 #ifdef UMTX_PROFILING 349 umtx_init_profiling(); 350 #endif 351 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 352 umtx_shm_init(); 353 } 354 355 struct umtx_q * 356 umtxq_alloc(void) 357 { 358 struct umtx_q *uq; 359 360 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 361 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 362 M_WAITOK | M_ZERO); 363 TAILQ_INIT(&uq->uq_spare_queue->head); 364 TAILQ_INIT(&uq->uq_pi_contested); 365 uq->uq_inherited_pri = PRI_MAX; 366 return (uq); 367 } 368 369 void 370 umtxq_free(struct umtx_q *uq) 371 { 372 373 MPASS(uq->uq_spare_queue != NULL); 374 free(uq->uq_spare_queue, M_UMTX); 375 free(uq, M_UMTX); 376 } 377 378 static inline void 379 umtxq_hash(struct umtx_key *key) 380 { 381 unsigned n; 382 383 n = (uintptr_t)key->info.both.a + key->info.both.b; 384 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 385 } 386 387 struct umtxq_chain * 388 umtxq_getchain(struct umtx_key *key) 389 { 390 391 if (key->type <= TYPE_SEM) 392 return (&umtxq_chains[1][key->hash]); 393 return (&umtxq_chains[0][key->hash]); 394 } 395 396 /* 397 * Set chain to busy state when following operation 398 * may be blocked (kernel mutex can not be used). 399 */ 400 void 401 umtxq_busy(struct umtx_key *key) 402 { 403 struct umtxq_chain *uc; 404 405 uc = umtxq_getchain(key); 406 mtx_assert(&uc->uc_lock, MA_OWNED); 407 if (uc->uc_busy) { 408 #ifdef SMP 409 if (smp_cpus > 1) { 410 int count = BUSY_SPINS; 411 if (count > 0) { 412 umtxq_unlock(key); 413 while (uc->uc_busy && --count > 0) 414 cpu_spinwait(); 415 umtxq_lock(key); 416 } 417 } 418 #endif 419 while (uc->uc_busy) { 420 uc->uc_waiters++; 421 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 422 uc->uc_waiters--; 423 } 424 } 425 uc->uc_busy = 1; 426 } 427 428 /* 429 * Unbusy a chain. 430 */ 431 void 432 umtxq_unbusy(struct umtx_key *key) 433 { 434 struct umtxq_chain *uc; 435 436 uc = umtxq_getchain(key); 437 mtx_assert(&uc->uc_lock, MA_OWNED); 438 KASSERT(uc->uc_busy != 0, ("not busy")); 439 uc->uc_busy = 0; 440 if (uc->uc_waiters) 441 wakeup_one(uc); 442 } 443 444 void 445 umtxq_unbusy_unlocked(struct umtx_key *key) 446 { 447 448 umtxq_lock(key); 449 umtxq_unbusy(key); 450 umtxq_unlock(key); 451 } 452 453 static struct umtxq_queue * 454 umtxq_queue_lookup(struct umtx_key *key, int q) 455 { 456 struct umtxq_queue *uh; 457 struct umtxq_chain *uc; 458 459 uc = umtxq_getchain(key); 460 UMTXQ_LOCKED_ASSERT(uc); 461 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 462 if (umtx_key_match(&uh->key, key)) 463 return (uh); 464 } 465 466 return (NULL); 467 } 468 469 void 470 umtxq_insert_queue(struct umtx_q *uq, int q) 471 { 472 struct umtxq_queue *uh; 473 struct umtxq_chain *uc; 474 475 uc = umtxq_getchain(&uq->uq_key); 476 UMTXQ_LOCKED_ASSERT(uc); 477 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 478 uh = umtxq_queue_lookup(&uq->uq_key, q); 479 if (uh != NULL) { 480 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 481 } else { 482 uh = uq->uq_spare_queue; 483 uh->key = uq->uq_key; 484 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 485 #ifdef UMTX_PROFILING 486 uc->length++; 487 if (uc->length > uc->max_length) { 488 uc->max_length = uc->length; 489 if (uc->max_length > max_length) 490 max_length = uc->max_length; 491 } 492 #endif 493 } 494 uq->uq_spare_queue = NULL; 495 496 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 497 uh->length++; 498 uq->uq_flags |= UQF_UMTXQ; 499 uq->uq_cur_queue = uh; 500 return; 501 } 502 503 void 504 umtxq_remove_queue(struct umtx_q *uq, int q) 505 { 506 struct umtxq_chain *uc; 507 struct umtxq_queue *uh; 508 509 uc = umtxq_getchain(&uq->uq_key); 510 UMTXQ_LOCKED_ASSERT(uc); 511 if (uq->uq_flags & UQF_UMTXQ) { 512 uh = uq->uq_cur_queue; 513 TAILQ_REMOVE(&uh->head, uq, uq_link); 514 uh->length--; 515 uq->uq_flags &= ~UQF_UMTXQ; 516 if (TAILQ_EMPTY(&uh->head)) { 517 KASSERT(uh->length == 0, 518 ("inconsistent umtxq_queue length")); 519 #ifdef UMTX_PROFILING 520 uc->length--; 521 #endif 522 LIST_REMOVE(uh, link); 523 } else { 524 uh = LIST_FIRST(&uc->uc_spare_queue); 525 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 526 LIST_REMOVE(uh, link); 527 } 528 uq->uq_spare_queue = uh; 529 uq->uq_cur_queue = NULL; 530 } 531 } 532 533 /* 534 * Check if there are multiple waiters 535 */ 536 int 537 umtxq_count(struct umtx_key *key) 538 { 539 struct umtxq_queue *uh; 540 541 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 542 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 543 if (uh != NULL) 544 return (uh->length); 545 return (0); 546 } 547 548 /* 549 * Check if there are multiple PI waiters and returns first 550 * waiter. 551 */ 552 static int 553 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 554 { 555 struct umtxq_queue *uh; 556 557 *first = NULL; 558 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 559 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 560 if (uh != NULL) { 561 *first = TAILQ_FIRST(&uh->head); 562 return (uh->length); 563 } 564 return (0); 565 } 566 567 /* 568 * Wake up threads waiting on an userland object by a bit mask. 569 */ 570 int 571 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 572 { 573 struct umtxq_queue *uh; 574 struct umtx_q *uq, *uq_temp; 575 int ret; 576 577 ret = 0; 578 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 579 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 580 if (uh == NULL) 581 return (0); 582 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 583 if ((uq->uq_bitset & bitset) == 0) 584 continue; 585 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 586 wakeup_one(uq); 587 if (++ret >= n_wake) 588 break; 589 } 590 return (ret); 591 } 592 593 /* 594 * Wake up threads waiting on an userland object. 595 */ 596 597 static int 598 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 599 { 600 struct umtxq_queue *uh; 601 struct umtx_q *uq; 602 int ret; 603 604 ret = 0; 605 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 606 uh = umtxq_queue_lookup(key, q); 607 if (uh != NULL) { 608 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 609 umtxq_remove_queue(uq, q); 610 wakeup(uq); 611 if (++ret >= n_wake) 612 return (ret); 613 } 614 } 615 return (ret); 616 } 617 618 /* 619 * Wake up specified thread. 620 */ 621 static inline void 622 umtxq_signal_thread(struct umtx_q *uq) 623 { 624 625 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 626 umtxq_remove(uq); 627 wakeup(uq); 628 } 629 630 /* 631 * Wake up a maximum of n_wake threads that are waiting on an userland 632 * object identified by key. The remaining threads are removed from queue 633 * identified by key and added to the queue identified by key2 (requeued). 634 * The n_requeue specifies an upper limit on the number of threads that 635 * are requeued to the second queue. 636 */ 637 int 638 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 639 int n_requeue) 640 { 641 struct umtxq_queue *uh; 642 struct umtx_q *uq, *uq_temp; 643 int ret; 644 645 ret = 0; 646 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 647 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 648 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 649 if (uh == NULL) 650 return (0); 651 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 652 if (++ret <= n_wake) { 653 umtxq_remove(uq); 654 wakeup_one(uq); 655 } else { 656 umtxq_remove(uq); 657 uq->uq_key = *key2; 658 umtxq_insert(uq); 659 if (ret - n_wake == n_requeue) 660 break; 661 } 662 } 663 return (ret); 664 } 665 666 static inline int 667 tstohz(const struct timespec *tsp) 668 { 669 struct timeval tv; 670 671 TIMESPEC_TO_TIMEVAL(&tv, tsp); 672 return tvtohz(&tv); 673 } 674 675 void 676 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 677 int absolute, const struct timespec *timeout) 678 { 679 680 timo->clockid = clockid; 681 if (!absolute) { 682 timo->is_abs_real = false; 683 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 684 timespecadd(&timo->cur, timeout, &timo->end); 685 } else { 686 timo->end = *timeout; 687 timo->is_abs_real = clockid == CLOCK_REALTIME || 688 clockid == CLOCK_REALTIME_FAST || 689 clockid == CLOCK_REALTIME_PRECISE || 690 clockid == CLOCK_SECOND; 691 } 692 } 693 694 static void 695 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 696 const struct _umtx_time *umtxtime) 697 { 698 699 umtx_abs_timeout_init(timo, umtxtime->_clockid, 700 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 701 } 702 703 static int 704 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 705 int *flags) 706 { 707 struct bintime bt, bbt; 708 struct timespec tts; 709 710 switch (timo->clockid) { 711 712 /* Clocks that can be converted into absolute time. */ 713 case CLOCK_REALTIME: 714 case CLOCK_REALTIME_PRECISE: 715 case CLOCK_REALTIME_FAST: 716 case CLOCK_MONOTONIC: 717 case CLOCK_MONOTONIC_PRECISE: 718 case CLOCK_MONOTONIC_FAST: 719 case CLOCK_UPTIME: 720 case CLOCK_UPTIME_PRECISE: 721 case CLOCK_UPTIME_FAST: 722 case CLOCK_SECOND: 723 timespec2bintime(&timo->end, &bt); 724 switch (timo->clockid) { 725 case CLOCK_REALTIME: 726 case CLOCK_REALTIME_PRECISE: 727 case CLOCK_REALTIME_FAST: 728 case CLOCK_SECOND: 729 getboottimebin(&bbt); 730 bintime_sub(&bt, &bbt); 731 break; 732 } 733 if (bt.sec < 0) 734 return (ETIMEDOUT); 735 if (bt.sec >= (SBT_MAX >> 32)) { 736 *sbt = 0; 737 *flags = 0; 738 return (0); 739 } 740 *sbt = bttosbt(bt); 741 switch (timo->clockid) { 742 case CLOCK_REALTIME_FAST: 743 case CLOCK_MONOTONIC_FAST: 744 case CLOCK_UPTIME_FAST: 745 *sbt += tc_tick_sbt; 746 break; 747 case CLOCK_SECOND: 748 *sbt += SBT_1S; 749 break; 750 } 751 *flags = C_ABSOLUTE; 752 return (0); 753 754 /* Clocks that has to be periodically polled. */ 755 case CLOCK_VIRTUAL: 756 case CLOCK_PROF: 757 case CLOCK_THREAD_CPUTIME_ID: 758 case CLOCK_PROCESS_CPUTIME_ID: 759 default: 760 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 761 if (timespeccmp(&timo->end, &timo->cur, <=)) 762 return (ETIMEDOUT); 763 timespecsub(&timo->end, &timo->cur, &tts); 764 *sbt = tick_sbt * tstohz(&tts); 765 *flags = C_HARDCLOCK; 766 return (0); 767 } 768 } 769 770 static uint32_t 771 umtx_unlock_val(uint32_t flags, bool rb) 772 { 773 774 if (rb) 775 return (UMUTEX_RB_OWNERDEAD); 776 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 777 return (UMUTEX_RB_NOTRECOV); 778 else 779 return (UMUTEX_UNOWNED); 780 781 } 782 783 /* 784 * Put thread into sleep state, before sleeping, check if 785 * thread was removed from umtx queue. 786 */ 787 int 788 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 789 struct umtx_abs_timeout *timo) 790 { 791 struct umtxq_chain *uc; 792 sbintime_t sbt = 0; 793 int error, flags = 0; 794 795 uc = umtxq_getchain(&uq->uq_key); 796 UMTXQ_LOCKED_ASSERT(uc); 797 for (;;) { 798 if (!(uq->uq_flags & UQF_UMTXQ)) { 799 error = 0; 800 break; 801 } 802 if (timo != NULL) { 803 if (timo->is_abs_real) 804 curthread->td_rtcgen = 805 atomic_load_acq_int(&rtc_generation); 806 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 807 if (error != 0) 808 break; 809 } 810 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 811 sbt, 0, flags); 812 uc = umtxq_getchain(&uq->uq_key); 813 mtx_lock(&uc->uc_lock); 814 if (error == EINTR || error == ERESTART) 815 break; 816 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 817 error = ETIMEDOUT; 818 break; 819 } 820 } 821 822 curthread->td_rtcgen = 0; 823 return (error); 824 } 825 826 /* 827 * Convert userspace address into unique logical address. 828 */ 829 int 830 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 831 { 832 struct thread *td = curthread; 833 vm_map_t map; 834 vm_map_entry_t entry; 835 vm_pindex_t pindex; 836 vm_prot_t prot; 837 boolean_t wired; 838 839 key->type = type; 840 if (share == THREAD_SHARE) { 841 key->shared = 0; 842 key->info.private.vs = td->td_proc->p_vmspace; 843 key->info.private.addr = (uintptr_t)addr; 844 } else { 845 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 846 map = &td->td_proc->p_vmspace->vm_map; 847 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 848 &entry, &key->info.shared.object, &pindex, &prot, 849 &wired) != KERN_SUCCESS) { 850 return (EFAULT); 851 } 852 853 if ((share == PROCESS_SHARE) || 854 (share == AUTO_SHARE && 855 VM_INHERIT_SHARE == entry->inheritance)) { 856 key->shared = 1; 857 key->info.shared.offset = (vm_offset_t)addr - 858 entry->start + entry->offset; 859 vm_object_reference(key->info.shared.object); 860 } else { 861 key->shared = 0; 862 key->info.private.vs = td->td_proc->p_vmspace; 863 key->info.private.addr = (uintptr_t)addr; 864 } 865 vm_map_lookup_done(map, entry); 866 } 867 868 umtxq_hash(key); 869 return (0); 870 } 871 872 /* 873 * Release key. 874 */ 875 void 876 umtx_key_release(struct umtx_key *key) 877 { 878 if (key->shared) 879 vm_object_deallocate(key->info.shared.object); 880 } 881 882 #ifdef COMPAT_FREEBSD10 883 /* 884 * Lock a umtx object. 885 */ 886 static int 887 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 888 const struct timespec *timeout) 889 { 890 struct umtx_abs_timeout timo; 891 struct umtx_q *uq; 892 u_long owner; 893 u_long old; 894 int error = 0; 895 896 uq = td->td_umtxq; 897 if (timeout != NULL) 898 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 899 900 /* 901 * Care must be exercised when dealing with umtx structure. It 902 * can fault on any access. 903 */ 904 for (;;) { 905 /* 906 * Try the uncontested case. This should be done in userland. 907 */ 908 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 909 910 /* The acquire succeeded. */ 911 if (owner == UMTX_UNOWNED) 912 return (0); 913 914 /* The address was invalid. */ 915 if (owner == -1) 916 return (EFAULT); 917 918 /* If no one owns it but it is contested try to acquire it. */ 919 if (owner == UMTX_CONTESTED) { 920 owner = casuword(&umtx->u_owner, 921 UMTX_CONTESTED, id | UMTX_CONTESTED); 922 923 if (owner == UMTX_CONTESTED) 924 return (0); 925 926 /* The address was invalid. */ 927 if (owner == -1) 928 return (EFAULT); 929 930 error = thread_check_susp(td, false); 931 if (error != 0) 932 break; 933 934 /* If this failed the lock has changed, restart. */ 935 continue; 936 } 937 938 /* 939 * If we caught a signal, we have retried and now 940 * exit immediately. 941 */ 942 if (error != 0) 943 break; 944 945 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 946 AUTO_SHARE, &uq->uq_key)) != 0) 947 return (error); 948 949 umtxq_lock(&uq->uq_key); 950 umtxq_busy(&uq->uq_key); 951 umtxq_insert(uq); 952 umtxq_unbusy(&uq->uq_key); 953 umtxq_unlock(&uq->uq_key); 954 955 /* 956 * Set the contested bit so that a release in user space 957 * knows to use the system call for unlock. If this fails 958 * either some one else has acquired the lock or it has been 959 * released. 960 */ 961 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 962 963 /* The address was invalid. */ 964 if (old == -1) { 965 umtxq_lock(&uq->uq_key); 966 umtxq_remove(uq); 967 umtxq_unlock(&uq->uq_key); 968 umtx_key_release(&uq->uq_key); 969 return (EFAULT); 970 } 971 972 /* 973 * We set the contested bit, sleep. Otherwise the lock changed 974 * and we need to retry or we lost a race to the thread 975 * unlocking the umtx. 976 */ 977 umtxq_lock(&uq->uq_key); 978 if (old == owner) 979 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 980 &timo); 981 umtxq_remove(uq); 982 umtxq_unlock(&uq->uq_key); 983 umtx_key_release(&uq->uq_key); 984 985 if (error == 0) 986 error = thread_check_susp(td, false); 987 } 988 989 if (timeout == NULL) { 990 /* Mutex locking is restarted if it is interrupted. */ 991 if (error == EINTR) 992 error = ERESTART; 993 } else { 994 /* Timed-locking is not restarted. */ 995 if (error == ERESTART) 996 error = EINTR; 997 } 998 return (error); 999 } 1000 1001 /* 1002 * Unlock a umtx object. 1003 */ 1004 static int 1005 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1006 { 1007 struct umtx_key key; 1008 u_long owner; 1009 u_long old; 1010 int error; 1011 int count; 1012 1013 /* 1014 * Make sure we own this mtx. 1015 */ 1016 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1017 if (owner == -1) 1018 return (EFAULT); 1019 1020 if ((owner & ~UMTX_CONTESTED) != id) 1021 return (EPERM); 1022 1023 /* This should be done in userland */ 1024 if ((owner & UMTX_CONTESTED) == 0) { 1025 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1026 if (old == -1) 1027 return (EFAULT); 1028 if (old == owner) 1029 return (0); 1030 owner = old; 1031 } 1032 1033 /* We should only ever be in here for contested locks */ 1034 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1035 &key)) != 0) 1036 return (error); 1037 1038 umtxq_lock(&key); 1039 umtxq_busy(&key); 1040 count = umtxq_count(&key); 1041 umtxq_unlock(&key); 1042 1043 /* 1044 * When unlocking the umtx, it must be marked as unowned if 1045 * there is zero or one thread only waiting for it. 1046 * Otherwise, it must be marked as contested. 1047 */ 1048 old = casuword(&umtx->u_owner, owner, 1049 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1050 umtxq_lock(&key); 1051 umtxq_signal(&key,1); 1052 umtxq_unbusy(&key); 1053 umtxq_unlock(&key); 1054 umtx_key_release(&key); 1055 if (old == -1) 1056 return (EFAULT); 1057 if (old != owner) 1058 return (EINVAL); 1059 return (0); 1060 } 1061 1062 #ifdef COMPAT_FREEBSD32 1063 1064 /* 1065 * Lock a umtx object. 1066 */ 1067 static int 1068 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1069 const struct timespec *timeout) 1070 { 1071 struct umtx_abs_timeout timo; 1072 struct umtx_q *uq; 1073 uint32_t owner; 1074 uint32_t old; 1075 int error = 0; 1076 1077 uq = td->td_umtxq; 1078 1079 if (timeout != NULL) 1080 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1081 1082 /* 1083 * Care must be exercised when dealing with umtx structure. It 1084 * can fault on any access. 1085 */ 1086 for (;;) { 1087 /* 1088 * Try the uncontested case. This should be done in userland. 1089 */ 1090 owner = casuword32(m, UMUTEX_UNOWNED, id); 1091 1092 /* The acquire succeeded. */ 1093 if (owner == UMUTEX_UNOWNED) 1094 return (0); 1095 1096 /* The address was invalid. */ 1097 if (owner == -1) 1098 return (EFAULT); 1099 1100 /* If no one owns it but it is contested try to acquire it. */ 1101 if (owner == UMUTEX_CONTESTED) { 1102 owner = casuword32(m, 1103 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1104 if (owner == UMUTEX_CONTESTED) 1105 return (0); 1106 1107 /* The address was invalid. */ 1108 if (owner == -1) 1109 return (EFAULT); 1110 1111 error = thread_check_susp(td, false); 1112 if (error != 0) 1113 break; 1114 1115 /* If this failed the lock has changed, restart. */ 1116 continue; 1117 } 1118 1119 /* 1120 * If we caught a signal, we have retried and now 1121 * exit immediately. 1122 */ 1123 if (error != 0) 1124 return (error); 1125 1126 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1127 AUTO_SHARE, &uq->uq_key)) != 0) 1128 return (error); 1129 1130 umtxq_lock(&uq->uq_key); 1131 umtxq_busy(&uq->uq_key); 1132 umtxq_insert(uq); 1133 umtxq_unbusy(&uq->uq_key); 1134 umtxq_unlock(&uq->uq_key); 1135 1136 /* 1137 * Set the contested bit so that a release in user space 1138 * knows to use the system call for unlock. If this fails 1139 * either some one else has acquired the lock or it has been 1140 * released. 1141 */ 1142 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1143 1144 /* The address was invalid. */ 1145 if (old == -1) { 1146 umtxq_lock(&uq->uq_key); 1147 umtxq_remove(uq); 1148 umtxq_unlock(&uq->uq_key); 1149 umtx_key_release(&uq->uq_key); 1150 return (EFAULT); 1151 } 1152 1153 /* 1154 * We set the contested bit, sleep. Otherwise the lock changed 1155 * and we need to retry or we lost a race to the thread 1156 * unlocking the umtx. 1157 */ 1158 umtxq_lock(&uq->uq_key); 1159 if (old == owner) 1160 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1161 NULL : &timo); 1162 umtxq_remove(uq); 1163 umtxq_unlock(&uq->uq_key); 1164 umtx_key_release(&uq->uq_key); 1165 1166 if (error == 0) 1167 error = thread_check_susp(td, false); 1168 } 1169 1170 if (timeout == NULL) { 1171 /* Mutex locking is restarted if it is interrupted. */ 1172 if (error == EINTR) 1173 error = ERESTART; 1174 } else { 1175 /* Timed-locking is not restarted. */ 1176 if (error == ERESTART) 1177 error = EINTR; 1178 } 1179 return (error); 1180 } 1181 1182 /* 1183 * Unlock a umtx object. 1184 */ 1185 static int 1186 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1187 { 1188 struct umtx_key key; 1189 uint32_t owner; 1190 uint32_t old; 1191 int error; 1192 int count; 1193 1194 /* 1195 * Make sure we own this mtx. 1196 */ 1197 owner = fuword32(m); 1198 if (owner == -1) 1199 return (EFAULT); 1200 1201 if ((owner & ~UMUTEX_CONTESTED) != id) 1202 return (EPERM); 1203 1204 /* This should be done in userland */ 1205 if ((owner & UMUTEX_CONTESTED) == 0) { 1206 old = casuword32(m, owner, UMUTEX_UNOWNED); 1207 if (old == -1) 1208 return (EFAULT); 1209 if (old == owner) 1210 return (0); 1211 owner = old; 1212 } 1213 1214 /* We should only ever be in here for contested locks */ 1215 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1216 &key)) != 0) 1217 return (error); 1218 1219 umtxq_lock(&key); 1220 umtxq_busy(&key); 1221 count = umtxq_count(&key); 1222 umtxq_unlock(&key); 1223 1224 /* 1225 * When unlocking the umtx, it must be marked as unowned if 1226 * there is zero or one thread only waiting for it. 1227 * Otherwise, it must be marked as contested. 1228 */ 1229 old = casuword32(m, owner, 1230 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1231 umtxq_lock(&key); 1232 umtxq_signal(&key,1); 1233 umtxq_unbusy(&key); 1234 umtxq_unlock(&key); 1235 umtx_key_release(&key); 1236 if (old == -1) 1237 return (EFAULT); 1238 if (old != owner) 1239 return (EINVAL); 1240 return (0); 1241 } 1242 #endif /* COMPAT_FREEBSD32 */ 1243 #endif /* COMPAT_FREEBSD10 */ 1244 1245 /* 1246 * Fetch and compare value, sleep on the address if value is not changed. 1247 */ 1248 static int 1249 do_wait(struct thread *td, void *addr, u_long id, 1250 struct _umtx_time *timeout, int compat32, int is_private) 1251 { 1252 struct umtx_abs_timeout timo; 1253 struct umtx_q *uq; 1254 u_long tmp; 1255 uint32_t tmp32; 1256 int error = 0; 1257 1258 uq = td->td_umtxq; 1259 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1260 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1261 return (error); 1262 1263 if (timeout != NULL) 1264 umtx_abs_timeout_init2(&timo, timeout); 1265 1266 umtxq_lock(&uq->uq_key); 1267 umtxq_insert(uq); 1268 umtxq_unlock(&uq->uq_key); 1269 if (compat32 == 0) { 1270 error = fueword(addr, &tmp); 1271 if (error != 0) 1272 error = EFAULT; 1273 } else { 1274 error = fueword32(addr, &tmp32); 1275 if (error == 0) 1276 tmp = tmp32; 1277 else 1278 error = EFAULT; 1279 } 1280 umtxq_lock(&uq->uq_key); 1281 if (error == 0) { 1282 if (tmp == id) 1283 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1284 NULL : &timo); 1285 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1286 error = 0; 1287 else 1288 umtxq_remove(uq); 1289 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1290 umtxq_remove(uq); 1291 } 1292 umtxq_unlock(&uq->uq_key); 1293 umtx_key_release(&uq->uq_key); 1294 if (error == ERESTART) 1295 error = EINTR; 1296 return (error); 1297 } 1298 1299 /* 1300 * Wake up threads sleeping on the specified address. 1301 */ 1302 int 1303 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1304 { 1305 struct umtx_key key; 1306 int ret; 1307 1308 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1309 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1310 return (ret); 1311 umtxq_lock(&key); 1312 umtxq_signal(&key, n_wake); 1313 umtxq_unlock(&key); 1314 umtx_key_release(&key); 1315 return (0); 1316 } 1317 1318 /* 1319 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1320 */ 1321 static int 1322 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1323 struct _umtx_time *timeout, int mode) 1324 { 1325 struct umtx_abs_timeout timo; 1326 struct umtx_q *uq; 1327 uint32_t owner, old, id; 1328 int error, rv; 1329 1330 id = td->td_tid; 1331 uq = td->td_umtxq; 1332 error = 0; 1333 if (timeout != NULL) 1334 umtx_abs_timeout_init2(&timo, timeout); 1335 1336 /* 1337 * Care must be exercised when dealing with umtx structure. It 1338 * can fault on any access. 1339 */ 1340 for (;;) { 1341 rv = fueword32(&m->m_owner, &owner); 1342 if (rv == -1) 1343 return (EFAULT); 1344 if (mode == _UMUTEX_WAIT) { 1345 if (owner == UMUTEX_UNOWNED || 1346 owner == UMUTEX_CONTESTED || 1347 owner == UMUTEX_RB_OWNERDEAD || 1348 owner == UMUTEX_RB_NOTRECOV) 1349 return (0); 1350 } else { 1351 /* 1352 * Robust mutex terminated. Kernel duty is to 1353 * return EOWNERDEAD to the userspace. The 1354 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1355 * by the common userspace code. 1356 */ 1357 if (owner == UMUTEX_RB_OWNERDEAD) { 1358 rv = casueword32(&m->m_owner, 1359 UMUTEX_RB_OWNERDEAD, &owner, 1360 id | UMUTEX_CONTESTED); 1361 if (rv == -1) 1362 return (EFAULT); 1363 if (rv == 0) { 1364 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1365 return (EOWNERDEAD); /* success */ 1366 } 1367 MPASS(rv == 1); 1368 rv = thread_check_susp(td, false); 1369 if (rv != 0) 1370 return (rv); 1371 continue; 1372 } 1373 if (owner == UMUTEX_RB_NOTRECOV) 1374 return (ENOTRECOVERABLE); 1375 1376 /* 1377 * Try the uncontested case. This should be 1378 * done in userland. 1379 */ 1380 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1381 &owner, id); 1382 /* The address was invalid. */ 1383 if (rv == -1) 1384 return (EFAULT); 1385 1386 /* The acquire succeeded. */ 1387 if (rv == 0) { 1388 MPASS(owner == UMUTEX_UNOWNED); 1389 return (0); 1390 } 1391 1392 /* 1393 * If no one owns it but it is contested try 1394 * to acquire it. 1395 */ 1396 MPASS(rv == 1); 1397 if (owner == UMUTEX_CONTESTED) { 1398 rv = casueword32(&m->m_owner, 1399 UMUTEX_CONTESTED, &owner, 1400 id | UMUTEX_CONTESTED); 1401 /* The address was invalid. */ 1402 if (rv == -1) 1403 return (EFAULT); 1404 if (rv == 0) { 1405 MPASS(owner == UMUTEX_CONTESTED); 1406 return (0); 1407 } 1408 if (rv == 1) { 1409 rv = thread_check_susp(td, false); 1410 if (rv != 0) 1411 return (rv); 1412 } 1413 1414 /* 1415 * If this failed the lock has 1416 * changed, restart. 1417 */ 1418 continue; 1419 } 1420 1421 /* rv == 1 but not contested, likely store failure */ 1422 rv = thread_check_susp(td, false); 1423 if (rv != 0) 1424 return (rv); 1425 } 1426 1427 if (mode == _UMUTEX_TRY) 1428 return (EBUSY); 1429 1430 /* 1431 * If we caught a signal, we have retried and now 1432 * exit immediately. 1433 */ 1434 if (error != 0) 1435 return (error); 1436 1437 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1438 GET_SHARE(flags), &uq->uq_key)) != 0) 1439 return (error); 1440 1441 umtxq_lock(&uq->uq_key); 1442 umtxq_busy(&uq->uq_key); 1443 umtxq_insert(uq); 1444 umtxq_unlock(&uq->uq_key); 1445 1446 /* 1447 * Set the contested bit so that a release in user space 1448 * knows to use the system call for unlock. If this fails 1449 * either some one else has acquired the lock or it has been 1450 * released. 1451 */ 1452 rv = casueword32(&m->m_owner, owner, &old, 1453 owner | UMUTEX_CONTESTED); 1454 1455 /* The address was invalid or casueword failed to store. */ 1456 if (rv == -1 || rv == 1) { 1457 umtxq_lock(&uq->uq_key); 1458 umtxq_remove(uq); 1459 umtxq_unbusy(&uq->uq_key); 1460 umtxq_unlock(&uq->uq_key); 1461 umtx_key_release(&uq->uq_key); 1462 if (rv == -1) 1463 return (EFAULT); 1464 if (rv == 1) { 1465 rv = thread_check_susp(td, false); 1466 if (rv != 0) 1467 return (rv); 1468 } 1469 continue; 1470 } 1471 1472 /* 1473 * We set the contested bit, sleep. Otherwise the lock changed 1474 * and we need to retry or we lost a race to the thread 1475 * unlocking the umtx. 1476 */ 1477 umtxq_lock(&uq->uq_key); 1478 umtxq_unbusy(&uq->uq_key); 1479 MPASS(old == owner); 1480 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1481 NULL : &timo); 1482 umtxq_remove(uq); 1483 umtxq_unlock(&uq->uq_key); 1484 umtx_key_release(&uq->uq_key); 1485 1486 if (error == 0) 1487 error = thread_check_susp(td, false); 1488 } 1489 1490 return (0); 1491 } 1492 1493 /* 1494 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1495 */ 1496 static int 1497 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1498 { 1499 struct umtx_key key; 1500 uint32_t owner, old, id, newlock; 1501 int error, count; 1502 1503 id = td->td_tid; 1504 1505 again: 1506 /* 1507 * Make sure we own this mtx. 1508 */ 1509 error = fueword32(&m->m_owner, &owner); 1510 if (error == -1) 1511 return (EFAULT); 1512 1513 if ((owner & ~UMUTEX_CONTESTED) != id) 1514 return (EPERM); 1515 1516 newlock = umtx_unlock_val(flags, rb); 1517 if ((owner & UMUTEX_CONTESTED) == 0) { 1518 error = casueword32(&m->m_owner, owner, &old, newlock); 1519 if (error == -1) 1520 return (EFAULT); 1521 if (error == 1) { 1522 error = thread_check_susp(td, false); 1523 if (error != 0) 1524 return (error); 1525 goto again; 1526 } 1527 MPASS(old == owner); 1528 return (0); 1529 } 1530 1531 /* We should only ever be in here for contested locks */ 1532 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1533 &key)) != 0) 1534 return (error); 1535 1536 umtxq_lock(&key); 1537 umtxq_busy(&key); 1538 count = umtxq_count(&key); 1539 umtxq_unlock(&key); 1540 1541 /* 1542 * When unlocking the umtx, it must be marked as unowned if 1543 * there is zero or one thread only waiting for it. 1544 * Otherwise, it must be marked as contested. 1545 */ 1546 if (count > 1) 1547 newlock |= UMUTEX_CONTESTED; 1548 error = casueword32(&m->m_owner, owner, &old, newlock); 1549 umtxq_lock(&key); 1550 umtxq_signal(&key, 1); 1551 umtxq_unbusy(&key); 1552 umtxq_unlock(&key); 1553 umtx_key_release(&key); 1554 if (error == -1) 1555 return (EFAULT); 1556 if (error == 1) { 1557 if (old != owner) 1558 return (EINVAL); 1559 error = thread_check_susp(td, false); 1560 if (error != 0) 1561 return (error); 1562 goto again; 1563 } 1564 return (0); 1565 } 1566 1567 /* 1568 * Check if the mutex is available and wake up a waiter, 1569 * only for simple mutex. 1570 */ 1571 static int 1572 do_wake_umutex(struct thread *td, struct umutex *m) 1573 { 1574 struct umtx_key key; 1575 uint32_t owner; 1576 uint32_t flags; 1577 int error; 1578 int count; 1579 1580 again: 1581 error = fueword32(&m->m_owner, &owner); 1582 if (error == -1) 1583 return (EFAULT); 1584 1585 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1586 owner != UMUTEX_RB_NOTRECOV) 1587 return (0); 1588 1589 error = fueword32(&m->m_flags, &flags); 1590 if (error == -1) 1591 return (EFAULT); 1592 1593 /* We should only ever be in here for contested locks */ 1594 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1595 &key)) != 0) 1596 return (error); 1597 1598 umtxq_lock(&key); 1599 umtxq_busy(&key); 1600 count = umtxq_count(&key); 1601 umtxq_unlock(&key); 1602 1603 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1604 owner != UMUTEX_RB_NOTRECOV) { 1605 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1606 UMUTEX_UNOWNED); 1607 if (error == -1) { 1608 error = EFAULT; 1609 } else if (error == 1) { 1610 umtxq_lock(&key); 1611 umtxq_unbusy(&key); 1612 umtxq_unlock(&key); 1613 umtx_key_release(&key); 1614 error = thread_check_susp(td, false); 1615 if (error != 0) 1616 return (error); 1617 goto again; 1618 } 1619 } 1620 1621 umtxq_lock(&key); 1622 if (error == 0 && count != 0) { 1623 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1624 owner == UMUTEX_RB_OWNERDEAD || 1625 owner == UMUTEX_RB_NOTRECOV); 1626 umtxq_signal(&key, 1); 1627 } 1628 umtxq_unbusy(&key); 1629 umtxq_unlock(&key); 1630 umtx_key_release(&key); 1631 return (error); 1632 } 1633 1634 /* 1635 * Check if the mutex has waiters and tries to fix contention bit. 1636 */ 1637 static int 1638 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1639 { 1640 struct umtx_key key; 1641 uint32_t owner, old; 1642 int type; 1643 int error; 1644 int count; 1645 1646 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1647 UMUTEX_ROBUST)) { 1648 case 0: 1649 case UMUTEX_ROBUST: 1650 type = TYPE_NORMAL_UMUTEX; 1651 break; 1652 case UMUTEX_PRIO_INHERIT: 1653 type = TYPE_PI_UMUTEX; 1654 break; 1655 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1656 type = TYPE_PI_ROBUST_UMUTEX; 1657 break; 1658 case UMUTEX_PRIO_PROTECT: 1659 type = TYPE_PP_UMUTEX; 1660 break; 1661 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1662 type = TYPE_PP_ROBUST_UMUTEX; 1663 break; 1664 default: 1665 return (EINVAL); 1666 } 1667 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1668 return (error); 1669 1670 owner = 0; 1671 umtxq_lock(&key); 1672 umtxq_busy(&key); 1673 count = umtxq_count(&key); 1674 umtxq_unlock(&key); 1675 1676 error = fueword32(&m->m_owner, &owner); 1677 if (error == -1) 1678 error = EFAULT; 1679 1680 /* 1681 * Only repair contention bit if there is a waiter, this means 1682 * the mutex is still being referenced by userland code, 1683 * otherwise don't update any memory. 1684 */ 1685 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1686 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1687 error = casueword32(&m->m_owner, owner, &old, 1688 owner | UMUTEX_CONTESTED); 1689 if (error == -1) { 1690 error = EFAULT; 1691 break; 1692 } 1693 if (error == 0) { 1694 MPASS(old == owner); 1695 break; 1696 } 1697 owner = old; 1698 error = thread_check_susp(td, false); 1699 } 1700 1701 umtxq_lock(&key); 1702 if (error == EFAULT) { 1703 umtxq_signal(&key, INT_MAX); 1704 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1705 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1706 umtxq_signal(&key, 1); 1707 umtxq_unbusy(&key); 1708 umtxq_unlock(&key); 1709 umtx_key_release(&key); 1710 return (error); 1711 } 1712 1713 struct umtx_pi * 1714 umtx_pi_alloc(int flags) 1715 { 1716 struct umtx_pi *pi; 1717 1718 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1719 TAILQ_INIT(&pi->pi_blocked); 1720 atomic_add_int(&umtx_pi_allocated, 1); 1721 return (pi); 1722 } 1723 1724 void 1725 umtx_pi_free(struct umtx_pi *pi) 1726 { 1727 uma_zfree(umtx_pi_zone, pi); 1728 atomic_add_int(&umtx_pi_allocated, -1); 1729 } 1730 1731 /* 1732 * Adjust the thread's position on a pi_state after its priority has been 1733 * changed. 1734 */ 1735 static int 1736 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1737 { 1738 struct umtx_q *uq, *uq1, *uq2; 1739 struct thread *td1; 1740 1741 mtx_assert(&umtx_lock, MA_OWNED); 1742 if (pi == NULL) 1743 return (0); 1744 1745 uq = td->td_umtxq; 1746 1747 /* 1748 * Check if the thread needs to be moved on the blocked chain. 1749 * It needs to be moved if either its priority is lower than 1750 * the previous thread or higher than the next thread. 1751 */ 1752 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1753 uq2 = TAILQ_NEXT(uq, uq_lockq); 1754 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1755 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1756 /* 1757 * Remove thread from blocked chain and determine where 1758 * it should be moved to. 1759 */ 1760 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1761 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1762 td1 = uq1->uq_thread; 1763 MPASS(td1->td_proc->p_magic == P_MAGIC); 1764 if (UPRI(td1) > UPRI(td)) 1765 break; 1766 } 1767 1768 if (uq1 == NULL) 1769 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1770 else 1771 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1772 } 1773 return (1); 1774 } 1775 1776 static struct umtx_pi * 1777 umtx_pi_next(struct umtx_pi *pi) 1778 { 1779 struct umtx_q *uq_owner; 1780 1781 if (pi->pi_owner == NULL) 1782 return (NULL); 1783 uq_owner = pi->pi_owner->td_umtxq; 1784 if (uq_owner == NULL) 1785 return (NULL); 1786 return (uq_owner->uq_pi_blocked); 1787 } 1788 1789 /* 1790 * Floyd's Cycle-Finding Algorithm. 1791 */ 1792 static bool 1793 umtx_pi_check_loop(struct umtx_pi *pi) 1794 { 1795 struct umtx_pi *pi1; /* fast iterator */ 1796 1797 mtx_assert(&umtx_lock, MA_OWNED); 1798 if (pi == NULL) 1799 return (false); 1800 pi1 = pi; 1801 for (;;) { 1802 pi = umtx_pi_next(pi); 1803 if (pi == NULL) 1804 break; 1805 pi1 = umtx_pi_next(pi1); 1806 if (pi1 == NULL) 1807 break; 1808 pi1 = umtx_pi_next(pi1); 1809 if (pi1 == NULL) 1810 break; 1811 if (pi == pi1) 1812 return (true); 1813 } 1814 return (false); 1815 } 1816 1817 /* 1818 * Propagate priority when a thread is blocked on POSIX 1819 * PI mutex. 1820 */ 1821 static void 1822 umtx_propagate_priority(struct thread *td) 1823 { 1824 struct umtx_q *uq; 1825 struct umtx_pi *pi; 1826 int pri; 1827 1828 mtx_assert(&umtx_lock, MA_OWNED); 1829 pri = UPRI(td); 1830 uq = td->td_umtxq; 1831 pi = uq->uq_pi_blocked; 1832 if (pi == NULL) 1833 return; 1834 if (umtx_pi_check_loop(pi)) 1835 return; 1836 1837 for (;;) { 1838 td = pi->pi_owner; 1839 if (td == NULL || td == curthread) 1840 return; 1841 1842 MPASS(td->td_proc != NULL); 1843 MPASS(td->td_proc->p_magic == P_MAGIC); 1844 1845 thread_lock(td); 1846 if (td->td_lend_user_pri > pri) 1847 sched_lend_user_prio(td, pri); 1848 else { 1849 thread_unlock(td); 1850 break; 1851 } 1852 thread_unlock(td); 1853 1854 /* 1855 * Pick up the lock that td is blocked on. 1856 */ 1857 uq = td->td_umtxq; 1858 pi = uq->uq_pi_blocked; 1859 if (pi == NULL) 1860 break; 1861 /* Resort td on the list if needed. */ 1862 umtx_pi_adjust_thread(pi, td); 1863 } 1864 } 1865 1866 /* 1867 * Unpropagate priority for a PI mutex when a thread blocked on 1868 * it is interrupted by signal or resumed by others. 1869 */ 1870 static void 1871 umtx_repropagate_priority(struct umtx_pi *pi) 1872 { 1873 struct umtx_q *uq, *uq_owner; 1874 struct umtx_pi *pi2; 1875 int pri; 1876 1877 mtx_assert(&umtx_lock, MA_OWNED); 1878 1879 if (umtx_pi_check_loop(pi)) 1880 return; 1881 while (pi != NULL && pi->pi_owner != NULL) { 1882 pri = PRI_MAX; 1883 uq_owner = pi->pi_owner->td_umtxq; 1884 1885 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1886 uq = TAILQ_FIRST(&pi2->pi_blocked); 1887 if (uq != NULL) { 1888 if (pri > UPRI(uq->uq_thread)) 1889 pri = UPRI(uq->uq_thread); 1890 } 1891 } 1892 1893 if (pri > uq_owner->uq_inherited_pri) 1894 pri = uq_owner->uq_inherited_pri; 1895 thread_lock(pi->pi_owner); 1896 sched_lend_user_prio(pi->pi_owner, pri); 1897 thread_unlock(pi->pi_owner); 1898 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1899 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1900 } 1901 } 1902 1903 /* 1904 * Insert a PI mutex into owned list. 1905 */ 1906 static void 1907 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1908 { 1909 struct umtx_q *uq_owner; 1910 1911 uq_owner = owner->td_umtxq; 1912 mtx_assert(&umtx_lock, MA_OWNED); 1913 MPASS(pi->pi_owner == NULL); 1914 pi->pi_owner = owner; 1915 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1916 } 1917 1918 /* 1919 * Disown a PI mutex, and remove it from the owned list. 1920 */ 1921 static void 1922 umtx_pi_disown(struct umtx_pi *pi) 1923 { 1924 1925 mtx_assert(&umtx_lock, MA_OWNED); 1926 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1927 pi->pi_owner = NULL; 1928 } 1929 1930 /* 1931 * Claim ownership of a PI mutex. 1932 */ 1933 int 1934 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1935 { 1936 struct umtx_q *uq; 1937 int pri; 1938 1939 mtx_lock(&umtx_lock); 1940 if (pi->pi_owner == owner) { 1941 mtx_unlock(&umtx_lock); 1942 return (0); 1943 } 1944 1945 if (pi->pi_owner != NULL) { 1946 /* 1947 * userland may have already messed the mutex, sigh. 1948 */ 1949 mtx_unlock(&umtx_lock); 1950 return (EPERM); 1951 } 1952 umtx_pi_setowner(pi, owner); 1953 uq = TAILQ_FIRST(&pi->pi_blocked); 1954 if (uq != NULL) { 1955 pri = UPRI(uq->uq_thread); 1956 thread_lock(owner); 1957 if (pri < UPRI(owner)) 1958 sched_lend_user_prio(owner, pri); 1959 thread_unlock(owner); 1960 } 1961 mtx_unlock(&umtx_lock); 1962 return (0); 1963 } 1964 1965 /* 1966 * Adjust a thread's order position in its blocked PI mutex, 1967 * this may result new priority propagating process. 1968 */ 1969 void 1970 umtx_pi_adjust(struct thread *td, u_char oldpri) 1971 { 1972 struct umtx_q *uq; 1973 struct umtx_pi *pi; 1974 1975 uq = td->td_umtxq; 1976 mtx_lock(&umtx_lock); 1977 /* 1978 * Pick up the lock that td is blocked on. 1979 */ 1980 pi = uq->uq_pi_blocked; 1981 if (pi != NULL) { 1982 umtx_pi_adjust_thread(pi, td); 1983 umtx_repropagate_priority(pi); 1984 } 1985 mtx_unlock(&umtx_lock); 1986 } 1987 1988 /* 1989 * Sleep on a PI mutex. 1990 */ 1991 int 1992 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1993 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 1994 { 1995 struct thread *td, *td1; 1996 struct umtx_q *uq1; 1997 int error, pri; 1998 #ifdef INVARIANTS 1999 struct umtxq_chain *uc; 2000 2001 uc = umtxq_getchain(&pi->pi_key); 2002 #endif 2003 error = 0; 2004 td = uq->uq_thread; 2005 KASSERT(td == curthread, ("inconsistent uq_thread")); 2006 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2007 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2008 umtxq_insert(uq); 2009 mtx_lock(&umtx_lock); 2010 if (pi->pi_owner == NULL) { 2011 mtx_unlock(&umtx_lock); 2012 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2013 mtx_lock(&umtx_lock); 2014 if (td1 != NULL) { 2015 if (pi->pi_owner == NULL) 2016 umtx_pi_setowner(pi, td1); 2017 PROC_UNLOCK(td1->td_proc); 2018 } 2019 } 2020 2021 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2022 pri = UPRI(uq1->uq_thread); 2023 if (pri > UPRI(td)) 2024 break; 2025 } 2026 2027 if (uq1 != NULL) 2028 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2029 else 2030 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2031 2032 uq->uq_pi_blocked = pi; 2033 thread_lock(td); 2034 td->td_flags |= TDF_UPIBLOCKED; 2035 thread_unlock(td); 2036 umtx_propagate_priority(td); 2037 mtx_unlock(&umtx_lock); 2038 umtxq_unbusy(&uq->uq_key); 2039 2040 error = umtxq_sleep(uq, wmesg, timo); 2041 umtxq_remove(uq); 2042 2043 mtx_lock(&umtx_lock); 2044 uq->uq_pi_blocked = NULL; 2045 thread_lock(td); 2046 td->td_flags &= ~TDF_UPIBLOCKED; 2047 thread_unlock(td); 2048 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2049 umtx_repropagate_priority(pi); 2050 mtx_unlock(&umtx_lock); 2051 umtxq_unlock(&uq->uq_key); 2052 2053 return (error); 2054 } 2055 2056 /* 2057 * Add reference count for a PI mutex. 2058 */ 2059 void 2060 umtx_pi_ref(struct umtx_pi *pi) 2061 { 2062 2063 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2064 pi->pi_refcount++; 2065 } 2066 2067 /* 2068 * Decrease reference count for a PI mutex, if the counter 2069 * is decreased to zero, its memory space is freed. 2070 */ 2071 void 2072 umtx_pi_unref(struct umtx_pi *pi) 2073 { 2074 struct umtxq_chain *uc; 2075 2076 uc = umtxq_getchain(&pi->pi_key); 2077 UMTXQ_LOCKED_ASSERT(uc); 2078 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2079 if (--pi->pi_refcount == 0) { 2080 mtx_lock(&umtx_lock); 2081 if (pi->pi_owner != NULL) 2082 umtx_pi_disown(pi); 2083 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2084 ("blocked queue not empty")); 2085 mtx_unlock(&umtx_lock); 2086 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2087 umtx_pi_free(pi); 2088 } 2089 } 2090 2091 /* 2092 * Find a PI mutex in hash table. 2093 */ 2094 struct umtx_pi * 2095 umtx_pi_lookup(struct umtx_key *key) 2096 { 2097 struct umtxq_chain *uc; 2098 struct umtx_pi *pi; 2099 2100 uc = umtxq_getchain(key); 2101 UMTXQ_LOCKED_ASSERT(uc); 2102 2103 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2104 if (umtx_key_match(&pi->pi_key, key)) { 2105 return (pi); 2106 } 2107 } 2108 return (NULL); 2109 } 2110 2111 /* 2112 * Insert a PI mutex into hash table. 2113 */ 2114 void 2115 umtx_pi_insert(struct umtx_pi *pi) 2116 { 2117 struct umtxq_chain *uc; 2118 2119 uc = umtxq_getchain(&pi->pi_key); 2120 UMTXQ_LOCKED_ASSERT(uc); 2121 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2122 } 2123 2124 /* 2125 * Drop a PI mutex and wakeup a top waiter. 2126 */ 2127 int 2128 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2129 { 2130 struct umtx_q *uq_first, *uq_first2, *uq_me; 2131 struct umtx_pi *pi, *pi2; 2132 int pri; 2133 2134 UMTXQ_ASSERT_LOCKED_BUSY(key); 2135 *count = umtxq_count_pi(key, &uq_first); 2136 if (uq_first != NULL) { 2137 mtx_lock(&umtx_lock); 2138 pi = uq_first->uq_pi_blocked; 2139 KASSERT(pi != NULL, ("pi == NULL?")); 2140 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2141 mtx_unlock(&umtx_lock); 2142 /* userland messed the mutex */ 2143 return (EPERM); 2144 } 2145 uq_me = td->td_umtxq; 2146 if (pi->pi_owner == td) 2147 umtx_pi_disown(pi); 2148 /* get highest priority thread which is still sleeping. */ 2149 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2150 while (uq_first != NULL && 2151 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2152 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2153 } 2154 pri = PRI_MAX; 2155 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2156 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2157 if (uq_first2 != NULL) { 2158 if (pri > UPRI(uq_first2->uq_thread)) 2159 pri = UPRI(uq_first2->uq_thread); 2160 } 2161 } 2162 thread_lock(td); 2163 sched_lend_user_prio(td, pri); 2164 thread_unlock(td); 2165 mtx_unlock(&umtx_lock); 2166 if (uq_first) 2167 umtxq_signal_thread(uq_first); 2168 } else { 2169 pi = umtx_pi_lookup(key); 2170 /* 2171 * A umtx_pi can exist if a signal or timeout removed the 2172 * last waiter from the umtxq, but there is still 2173 * a thread in do_lock_pi() holding the umtx_pi. 2174 */ 2175 if (pi != NULL) { 2176 /* 2177 * The umtx_pi can be unowned, such as when a thread 2178 * has just entered do_lock_pi(), allocated the 2179 * umtx_pi, and unlocked the umtxq. 2180 * If the current thread owns it, it must disown it. 2181 */ 2182 mtx_lock(&umtx_lock); 2183 if (pi->pi_owner == td) 2184 umtx_pi_disown(pi); 2185 mtx_unlock(&umtx_lock); 2186 } 2187 } 2188 return (0); 2189 } 2190 2191 /* 2192 * Lock a PI mutex. 2193 */ 2194 static int 2195 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2196 struct _umtx_time *timeout, int try) 2197 { 2198 struct umtx_abs_timeout timo; 2199 struct umtx_q *uq; 2200 struct umtx_pi *pi, *new_pi; 2201 uint32_t id, old_owner, owner, old; 2202 int error, rv; 2203 2204 id = td->td_tid; 2205 uq = td->td_umtxq; 2206 2207 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2208 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2209 &uq->uq_key)) != 0) 2210 return (error); 2211 2212 if (timeout != NULL) 2213 umtx_abs_timeout_init2(&timo, timeout); 2214 2215 umtxq_lock(&uq->uq_key); 2216 pi = umtx_pi_lookup(&uq->uq_key); 2217 if (pi == NULL) { 2218 new_pi = umtx_pi_alloc(M_NOWAIT); 2219 if (new_pi == NULL) { 2220 umtxq_unlock(&uq->uq_key); 2221 new_pi = umtx_pi_alloc(M_WAITOK); 2222 umtxq_lock(&uq->uq_key); 2223 pi = umtx_pi_lookup(&uq->uq_key); 2224 if (pi != NULL) { 2225 umtx_pi_free(new_pi); 2226 new_pi = NULL; 2227 } 2228 } 2229 if (new_pi != NULL) { 2230 new_pi->pi_key = uq->uq_key; 2231 umtx_pi_insert(new_pi); 2232 pi = new_pi; 2233 } 2234 } 2235 umtx_pi_ref(pi); 2236 umtxq_unlock(&uq->uq_key); 2237 2238 /* 2239 * Care must be exercised when dealing with umtx structure. It 2240 * can fault on any access. 2241 */ 2242 for (;;) { 2243 /* 2244 * Try the uncontested case. This should be done in userland. 2245 */ 2246 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2247 /* The address was invalid. */ 2248 if (rv == -1) { 2249 error = EFAULT; 2250 break; 2251 } 2252 /* The acquire succeeded. */ 2253 if (rv == 0) { 2254 MPASS(owner == UMUTEX_UNOWNED); 2255 error = 0; 2256 break; 2257 } 2258 2259 if (owner == UMUTEX_RB_NOTRECOV) { 2260 error = ENOTRECOVERABLE; 2261 break; 2262 } 2263 2264 /* 2265 * Nobody owns it, but the acquire failed. This can happen 2266 * with ll/sc atomics. 2267 */ 2268 if (owner == UMUTEX_UNOWNED) { 2269 error = thread_check_susp(td, true); 2270 if (error != 0) 2271 break; 2272 continue; 2273 } 2274 2275 /* 2276 * Avoid overwriting a possible error from sleep due 2277 * to the pending signal with suspension check result. 2278 */ 2279 if (error == 0) { 2280 error = thread_check_susp(td, true); 2281 if (error != 0) 2282 break; 2283 } 2284 2285 /* If no one owns it but it is contested try to acquire it. */ 2286 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2287 old_owner = owner; 2288 rv = casueword32(&m->m_owner, owner, &owner, 2289 id | UMUTEX_CONTESTED); 2290 /* The address was invalid. */ 2291 if (rv == -1) { 2292 error = EFAULT; 2293 break; 2294 } 2295 if (rv == 1) { 2296 if (error == 0) { 2297 error = thread_check_susp(td, true); 2298 if (error != 0) 2299 break; 2300 } 2301 2302 /* 2303 * If this failed the lock could 2304 * changed, restart. 2305 */ 2306 continue; 2307 } 2308 2309 MPASS(rv == 0); 2310 MPASS(owner == old_owner); 2311 umtxq_lock(&uq->uq_key); 2312 umtxq_busy(&uq->uq_key); 2313 error = umtx_pi_claim(pi, td); 2314 umtxq_unbusy(&uq->uq_key); 2315 umtxq_unlock(&uq->uq_key); 2316 if (error != 0) { 2317 /* 2318 * Since we're going to return an 2319 * error, restore the m_owner to its 2320 * previous, unowned state to avoid 2321 * compounding the problem. 2322 */ 2323 (void)casuword32(&m->m_owner, 2324 id | UMUTEX_CONTESTED, old_owner); 2325 } 2326 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2327 error = EOWNERDEAD; 2328 break; 2329 } 2330 2331 if ((owner & ~UMUTEX_CONTESTED) == id) { 2332 error = EDEADLK; 2333 break; 2334 } 2335 2336 if (try != 0) { 2337 error = EBUSY; 2338 break; 2339 } 2340 2341 /* 2342 * If we caught a signal, we have retried and now 2343 * exit immediately. 2344 */ 2345 if (error != 0) 2346 break; 2347 2348 umtxq_lock(&uq->uq_key); 2349 umtxq_busy(&uq->uq_key); 2350 umtxq_unlock(&uq->uq_key); 2351 2352 /* 2353 * Set the contested bit so that a release in user space 2354 * knows to use the system call for unlock. If this fails 2355 * either some one else has acquired the lock or it has been 2356 * released. 2357 */ 2358 rv = casueword32(&m->m_owner, owner, &old, owner | 2359 UMUTEX_CONTESTED); 2360 2361 /* The address was invalid. */ 2362 if (rv == -1) { 2363 umtxq_unbusy_unlocked(&uq->uq_key); 2364 error = EFAULT; 2365 break; 2366 } 2367 if (rv == 1) { 2368 umtxq_unbusy_unlocked(&uq->uq_key); 2369 error = thread_check_susp(td, true); 2370 if (error != 0) 2371 break; 2372 2373 /* 2374 * The lock changed and we need to retry or we 2375 * lost a race to the thread unlocking the 2376 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2377 * value for owner is impossible there. 2378 */ 2379 continue; 2380 } 2381 2382 umtxq_lock(&uq->uq_key); 2383 2384 /* We set the contested bit, sleep. */ 2385 MPASS(old == owner); 2386 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2387 "umtxpi", timeout == NULL ? NULL : &timo, 2388 (flags & USYNC_PROCESS_SHARED) != 0); 2389 if (error != 0) 2390 continue; 2391 2392 error = thread_check_susp(td, false); 2393 if (error != 0) 2394 break; 2395 } 2396 2397 umtxq_lock(&uq->uq_key); 2398 umtx_pi_unref(pi); 2399 umtxq_unlock(&uq->uq_key); 2400 2401 umtx_key_release(&uq->uq_key); 2402 return (error); 2403 } 2404 2405 /* 2406 * Unlock a PI mutex. 2407 */ 2408 static int 2409 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2410 { 2411 struct umtx_key key; 2412 uint32_t id, new_owner, old, owner; 2413 int count, error; 2414 2415 id = td->td_tid; 2416 2417 usrloop: 2418 /* 2419 * Make sure we own this mtx. 2420 */ 2421 error = fueword32(&m->m_owner, &owner); 2422 if (error == -1) 2423 return (EFAULT); 2424 2425 if ((owner & ~UMUTEX_CONTESTED) != id) 2426 return (EPERM); 2427 2428 new_owner = umtx_unlock_val(flags, rb); 2429 2430 /* This should be done in userland */ 2431 if ((owner & UMUTEX_CONTESTED) == 0) { 2432 error = casueword32(&m->m_owner, owner, &old, new_owner); 2433 if (error == -1) 2434 return (EFAULT); 2435 if (error == 1) { 2436 error = thread_check_susp(td, true); 2437 if (error != 0) 2438 return (error); 2439 goto usrloop; 2440 } 2441 if (old == owner) 2442 return (0); 2443 owner = old; 2444 } 2445 2446 /* We should only ever be in here for contested locks */ 2447 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2448 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2449 &key)) != 0) 2450 return (error); 2451 2452 umtxq_lock(&key); 2453 umtxq_busy(&key); 2454 error = umtx_pi_drop(td, &key, rb, &count); 2455 if (error != 0) { 2456 umtxq_unbusy(&key); 2457 umtxq_unlock(&key); 2458 umtx_key_release(&key); 2459 /* userland messed the mutex */ 2460 return (error); 2461 } 2462 umtxq_unlock(&key); 2463 2464 /* 2465 * When unlocking the umtx, it must be marked as unowned if 2466 * there is zero or one thread only waiting for it. 2467 * Otherwise, it must be marked as contested. 2468 */ 2469 2470 if (count > 1) 2471 new_owner |= UMUTEX_CONTESTED; 2472 again: 2473 error = casueword32(&m->m_owner, owner, &old, new_owner); 2474 if (error == 1) { 2475 error = thread_check_susp(td, false); 2476 if (error == 0) 2477 goto again; 2478 } 2479 umtxq_unbusy_unlocked(&key); 2480 umtx_key_release(&key); 2481 if (error == -1) 2482 return (EFAULT); 2483 if (error == 0 && old != owner) 2484 return (EINVAL); 2485 return (error); 2486 } 2487 2488 /* 2489 * Lock a PP mutex. 2490 */ 2491 static int 2492 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2493 struct _umtx_time *timeout, int try) 2494 { 2495 struct umtx_abs_timeout timo; 2496 struct umtx_q *uq, *uq2; 2497 struct umtx_pi *pi; 2498 uint32_t ceiling; 2499 uint32_t owner, id; 2500 int error, pri, old_inherited_pri, su, rv; 2501 2502 id = td->td_tid; 2503 uq = td->td_umtxq; 2504 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2505 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2506 &uq->uq_key)) != 0) 2507 return (error); 2508 2509 if (timeout != NULL) 2510 umtx_abs_timeout_init2(&timo, timeout); 2511 2512 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2513 for (;;) { 2514 old_inherited_pri = uq->uq_inherited_pri; 2515 umtxq_lock(&uq->uq_key); 2516 umtxq_busy(&uq->uq_key); 2517 umtxq_unlock(&uq->uq_key); 2518 2519 rv = fueword32(&m->m_ceilings[0], &ceiling); 2520 if (rv == -1) { 2521 error = EFAULT; 2522 goto out; 2523 } 2524 ceiling = RTP_PRIO_MAX - ceiling; 2525 if (ceiling > RTP_PRIO_MAX) { 2526 error = EINVAL; 2527 goto out; 2528 } 2529 2530 mtx_lock(&umtx_lock); 2531 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2532 mtx_unlock(&umtx_lock); 2533 error = EINVAL; 2534 goto out; 2535 } 2536 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2537 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2538 thread_lock(td); 2539 if (uq->uq_inherited_pri < UPRI(td)) 2540 sched_lend_user_prio(td, uq->uq_inherited_pri); 2541 thread_unlock(td); 2542 } 2543 mtx_unlock(&umtx_lock); 2544 2545 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2546 id | UMUTEX_CONTESTED); 2547 /* The address was invalid. */ 2548 if (rv == -1) { 2549 error = EFAULT; 2550 break; 2551 } 2552 if (rv == 0) { 2553 MPASS(owner == UMUTEX_CONTESTED); 2554 error = 0; 2555 break; 2556 } 2557 /* rv == 1 */ 2558 if (owner == UMUTEX_RB_OWNERDEAD) { 2559 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2560 &owner, id | UMUTEX_CONTESTED); 2561 if (rv == -1) { 2562 error = EFAULT; 2563 break; 2564 } 2565 if (rv == 0) { 2566 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2567 error = EOWNERDEAD; /* success */ 2568 break; 2569 } 2570 2571 /* 2572 * rv == 1, only check for suspension if we 2573 * did not already catched a signal. If we 2574 * get an error from the check, the same 2575 * condition is checked by the umtxq_sleep() 2576 * call below, so we should obliterate the 2577 * error to not skip the last loop iteration. 2578 */ 2579 if (error == 0) { 2580 error = thread_check_susp(td, false); 2581 if (error == 0) { 2582 if (try != 0) 2583 error = EBUSY; 2584 else 2585 continue; 2586 } 2587 error = 0; 2588 } 2589 } else if (owner == UMUTEX_RB_NOTRECOV) { 2590 error = ENOTRECOVERABLE; 2591 } 2592 2593 if (try != 0) 2594 error = EBUSY; 2595 2596 /* 2597 * If we caught a signal, we have retried and now 2598 * exit immediately. 2599 */ 2600 if (error != 0) 2601 break; 2602 2603 umtxq_lock(&uq->uq_key); 2604 umtxq_insert(uq); 2605 umtxq_unbusy(&uq->uq_key); 2606 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2607 NULL : &timo); 2608 umtxq_remove(uq); 2609 umtxq_unlock(&uq->uq_key); 2610 2611 mtx_lock(&umtx_lock); 2612 uq->uq_inherited_pri = old_inherited_pri; 2613 pri = PRI_MAX; 2614 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2615 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2616 if (uq2 != NULL) { 2617 if (pri > UPRI(uq2->uq_thread)) 2618 pri = UPRI(uq2->uq_thread); 2619 } 2620 } 2621 if (pri > uq->uq_inherited_pri) 2622 pri = uq->uq_inherited_pri; 2623 thread_lock(td); 2624 sched_lend_user_prio(td, pri); 2625 thread_unlock(td); 2626 mtx_unlock(&umtx_lock); 2627 } 2628 2629 if (error != 0 && error != EOWNERDEAD) { 2630 mtx_lock(&umtx_lock); 2631 uq->uq_inherited_pri = old_inherited_pri; 2632 pri = PRI_MAX; 2633 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2634 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2635 if (uq2 != NULL) { 2636 if (pri > UPRI(uq2->uq_thread)) 2637 pri = UPRI(uq2->uq_thread); 2638 } 2639 } 2640 if (pri > uq->uq_inherited_pri) 2641 pri = uq->uq_inherited_pri; 2642 thread_lock(td); 2643 sched_lend_user_prio(td, pri); 2644 thread_unlock(td); 2645 mtx_unlock(&umtx_lock); 2646 } 2647 2648 out: 2649 umtxq_unbusy_unlocked(&uq->uq_key); 2650 umtx_key_release(&uq->uq_key); 2651 return (error); 2652 } 2653 2654 /* 2655 * Unlock a PP mutex. 2656 */ 2657 static int 2658 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2659 { 2660 struct umtx_key key; 2661 struct umtx_q *uq, *uq2; 2662 struct umtx_pi *pi; 2663 uint32_t id, owner, rceiling; 2664 int error, pri, new_inherited_pri, su; 2665 2666 id = td->td_tid; 2667 uq = td->td_umtxq; 2668 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2669 2670 /* 2671 * Make sure we own this mtx. 2672 */ 2673 error = fueword32(&m->m_owner, &owner); 2674 if (error == -1) 2675 return (EFAULT); 2676 2677 if ((owner & ~UMUTEX_CONTESTED) != id) 2678 return (EPERM); 2679 2680 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2681 if (error != 0) 2682 return (error); 2683 2684 if (rceiling == -1) 2685 new_inherited_pri = PRI_MAX; 2686 else { 2687 rceiling = RTP_PRIO_MAX - rceiling; 2688 if (rceiling > RTP_PRIO_MAX) 2689 return (EINVAL); 2690 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2691 } 2692 2693 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2694 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2695 &key)) != 0) 2696 return (error); 2697 umtxq_lock(&key); 2698 umtxq_busy(&key); 2699 umtxq_unlock(&key); 2700 /* 2701 * For priority protected mutex, always set unlocked state 2702 * to UMUTEX_CONTESTED, so that userland always enters kernel 2703 * to lock the mutex, it is necessary because thread priority 2704 * has to be adjusted for such mutex. 2705 */ 2706 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2707 UMUTEX_CONTESTED); 2708 2709 umtxq_lock(&key); 2710 if (error == 0) 2711 umtxq_signal(&key, 1); 2712 umtxq_unbusy(&key); 2713 umtxq_unlock(&key); 2714 2715 if (error == -1) 2716 error = EFAULT; 2717 else { 2718 mtx_lock(&umtx_lock); 2719 if (su != 0) 2720 uq->uq_inherited_pri = new_inherited_pri; 2721 pri = PRI_MAX; 2722 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2723 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2724 if (uq2 != NULL) { 2725 if (pri > UPRI(uq2->uq_thread)) 2726 pri = UPRI(uq2->uq_thread); 2727 } 2728 } 2729 if (pri > uq->uq_inherited_pri) 2730 pri = uq->uq_inherited_pri; 2731 thread_lock(td); 2732 sched_lend_user_prio(td, pri); 2733 thread_unlock(td); 2734 mtx_unlock(&umtx_lock); 2735 } 2736 umtx_key_release(&key); 2737 return (error); 2738 } 2739 2740 static int 2741 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2742 uint32_t *old_ceiling) 2743 { 2744 struct umtx_q *uq; 2745 uint32_t flags, id, owner, save_ceiling; 2746 int error, rv, rv1; 2747 2748 error = fueword32(&m->m_flags, &flags); 2749 if (error == -1) 2750 return (EFAULT); 2751 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2752 return (EINVAL); 2753 if (ceiling > RTP_PRIO_MAX) 2754 return (EINVAL); 2755 id = td->td_tid; 2756 uq = td->td_umtxq; 2757 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2758 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2759 &uq->uq_key)) != 0) 2760 return (error); 2761 for (;;) { 2762 umtxq_lock(&uq->uq_key); 2763 umtxq_busy(&uq->uq_key); 2764 umtxq_unlock(&uq->uq_key); 2765 2766 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2767 if (rv == -1) { 2768 error = EFAULT; 2769 break; 2770 } 2771 2772 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2773 id | UMUTEX_CONTESTED); 2774 if (rv == -1) { 2775 error = EFAULT; 2776 break; 2777 } 2778 2779 if (rv == 0) { 2780 MPASS(owner == UMUTEX_CONTESTED); 2781 rv = suword32(&m->m_ceilings[0], ceiling); 2782 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2783 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2784 break; 2785 } 2786 2787 if ((owner & ~UMUTEX_CONTESTED) == id) { 2788 rv = suword32(&m->m_ceilings[0], ceiling); 2789 error = rv == 0 ? 0 : EFAULT; 2790 break; 2791 } 2792 2793 if (owner == UMUTEX_RB_OWNERDEAD) { 2794 error = EOWNERDEAD; 2795 break; 2796 } else if (owner == UMUTEX_RB_NOTRECOV) { 2797 error = ENOTRECOVERABLE; 2798 break; 2799 } 2800 2801 /* 2802 * If we caught a signal, we have retried and now 2803 * exit immediately. 2804 */ 2805 if (error != 0) 2806 break; 2807 2808 /* 2809 * We set the contested bit, sleep. Otherwise the lock changed 2810 * and we need to retry or we lost a race to the thread 2811 * unlocking the umtx. 2812 */ 2813 umtxq_lock(&uq->uq_key); 2814 umtxq_insert(uq); 2815 umtxq_unbusy(&uq->uq_key); 2816 error = umtxq_sleep(uq, "umtxpp", NULL); 2817 umtxq_remove(uq); 2818 umtxq_unlock(&uq->uq_key); 2819 } 2820 umtxq_lock(&uq->uq_key); 2821 if (error == 0) 2822 umtxq_signal(&uq->uq_key, INT_MAX); 2823 umtxq_unbusy(&uq->uq_key); 2824 umtxq_unlock(&uq->uq_key); 2825 umtx_key_release(&uq->uq_key); 2826 if (error == 0 && old_ceiling != NULL) { 2827 rv = suword32(old_ceiling, save_ceiling); 2828 error = rv == 0 ? 0 : EFAULT; 2829 } 2830 return (error); 2831 } 2832 2833 /* 2834 * Lock a userland POSIX mutex. 2835 */ 2836 static int 2837 do_lock_umutex(struct thread *td, struct umutex *m, 2838 struct _umtx_time *timeout, int mode) 2839 { 2840 uint32_t flags; 2841 int error; 2842 2843 error = fueword32(&m->m_flags, &flags); 2844 if (error == -1) 2845 return (EFAULT); 2846 2847 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2848 case 0: 2849 error = do_lock_normal(td, m, flags, timeout, mode); 2850 break; 2851 case UMUTEX_PRIO_INHERIT: 2852 error = do_lock_pi(td, m, flags, timeout, mode); 2853 break; 2854 case UMUTEX_PRIO_PROTECT: 2855 error = do_lock_pp(td, m, flags, timeout, mode); 2856 break; 2857 default: 2858 return (EINVAL); 2859 } 2860 if (timeout == NULL) { 2861 if (error == EINTR && mode != _UMUTEX_WAIT) 2862 error = ERESTART; 2863 } else { 2864 /* Timed-locking is not restarted. */ 2865 if (error == ERESTART) 2866 error = EINTR; 2867 } 2868 return (error); 2869 } 2870 2871 /* 2872 * Unlock a userland POSIX mutex. 2873 */ 2874 static int 2875 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2876 { 2877 uint32_t flags; 2878 int error; 2879 2880 error = fueword32(&m->m_flags, &flags); 2881 if (error == -1) 2882 return (EFAULT); 2883 2884 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2885 case 0: 2886 return (do_unlock_normal(td, m, flags, rb)); 2887 case UMUTEX_PRIO_INHERIT: 2888 return (do_unlock_pi(td, m, flags, rb)); 2889 case UMUTEX_PRIO_PROTECT: 2890 return (do_unlock_pp(td, m, flags, rb)); 2891 } 2892 2893 return (EINVAL); 2894 } 2895 2896 static int 2897 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2898 struct timespec *timeout, u_long wflags) 2899 { 2900 struct umtx_abs_timeout timo; 2901 struct umtx_q *uq; 2902 uint32_t flags, clockid, hasw; 2903 int error; 2904 2905 uq = td->td_umtxq; 2906 error = fueword32(&cv->c_flags, &flags); 2907 if (error == -1) 2908 return (EFAULT); 2909 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2910 if (error != 0) 2911 return (error); 2912 2913 if ((wflags & CVWAIT_CLOCKID) != 0) { 2914 error = fueword32(&cv->c_clockid, &clockid); 2915 if (error == -1) { 2916 umtx_key_release(&uq->uq_key); 2917 return (EFAULT); 2918 } 2919 if (clockid < CLOCK_REALTIME || 2920 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2921 /* hmm, only HW clock id will work. */ 2922 umtx_key_release(&uq->uq_key); 2923 return (EINVAL); 2924 } 2925 } else { 2926 clockid = CLOCK_REALTIME; 2927 } 2928 2929 umtxq_lock(&uq->uq_key); 2930 umtxq_busy(&uq->uq_key); 2931 umtxq_insert(uq); 2932 umtxq_unlock(&uq->uq_key); 2933 2934 /* 2935 * Set c_has_waiters to 1 before releasing user mutex, also 2936 * don't modify cache line when unnecessary. 2937 */ 2938 error = fueword32(&cv->c_has_waiters, &hasw); 2939 if (error == 0 && hasw == 0) 2940 suword32(&cv->c_has_waiters, 1); 2941 2942 umtxq_unbusy_unlocked(&uq->uq_key); 2943 2944 error = do_unlock_umutex(td, m, false); 2945 2946 if (timeout != NULL) 2947 umtx_abs_timeout_init(&timo, clockid, 2948 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2949 2950 umtxq_lock(&uq->uq_key); 2951 if (error == 0) { 2952 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2953 NULL : &timo); 2954 } 2955 2956 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2957 error = 0; 2958 else { 2959 /* 2960 * This must be timeout,interrupted by signal or 2961 * surprious wakeup, clear c_has_waiter flag when 2962 * necessary. 2963 */ 2964 umtxq_busy(&uq->uq_key); 2965 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2966 int oldlen = uq->uq_cur_queue->length; 2967 umtxq_remove(uq); 2968 if (oldlen == 1) { 2969 umtxq_unlock(&uq->uq_key); 2970 suword32(&cv->c_has_waiters, 0); 2971 umtxq_lock(&uq->uq_key); 2972 } 2973 } 2974 umtxq_unbusy(&uq->uq_key); 2975 if (error == ERESTART) 2976 error = EINTR; 2977 } 2978 2979 umtxq_unlock(&uq->uq_key); 2980 umtx_key_release(&uq->uq_key); 2981 return (error); 2982 } 2983 2984 /* 2985 * Signal a userland condition variable. 2986 */ 2987 static int 2988 do_cv_signal(struct thread *td, struct ucond *cv) 2989 { 2990 struct umtx_key key; 2991 int error, cnt, nwake; 2992 uint32_t flags; 2993 2994 error = fueword32(&cv->c_flags, &flags); 2995 if (error == -1) 2996 return (EFAULT); 2997 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2998 return (error); 2999 umtxq_lock(&key); 3000 umtxq_busy(&key); 3001 cnt = umtxq_count(&key); 3002 nwake = umtxq_signal(&key, 1); 3003 if (cnt <= nwake) { 3004 umtxq_unlock(&key); 3005 error = suword32(&cv->c_has_waiters, 0); 3006 if (error == -1) 3007 error = EFAULT; 3008 umtxq_lock(&key); 3009 } 3010 umtxq_unbusy(&key); 3011 umtxq_unlock(&key); 3012 umtx_key_release(&key); 3013 return (error); 3014 } 3015 3016 static int 3017 do_cv_broadcast(struct thread *td, struct ucond *cv) 3018 { 3019 struct umtx_key key; 3020 int error; 3021 uint32_t flags; 3022 3023 error = fueword32(&cv->c_flags, &flags); 3024 if (error == -1) 3025 return (EFAULT); 3026 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3027 return (error); 3028 3029 umtxq_lock(&key); 3030 umtxq_busy(&key); 3031 umtxq_signal(&key, INT_MAX); 3032 umtxq_unlock(&key); 3033 3034 error = suword32(&cv->c_has_waiters, 0); 3035 if (error == -1) 3036 error = EFAULT; 3037 3038 umtxq_unbusy_unlocked(&key); 3039 3040 umtx_key_release(&key); 3041 return (error); 3042 } 3043 3044 static int 3045 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3046 struct _umtx_time *timeout) 3047 { 3048 struct umtx_abs_timeout timo; 3049 struct umtx_q *uq; 3050 uint32_t flags, wrflags; 3051 int32_t state, oldstate; 3052 int32_t blocked_readers; 3053 int error, error1, rv; 3054 3055 uq = td->td_umtxq; 3056 error = fueword32(&rwlock->rw_flags, &flags); 3057 if (error == -1) 3058 return (EFAULT); 3059 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3060 if (error != 0) 3061 return (error); 3062 3063 if (timeout != NULL) 3064 umtx_abs_timeout_init2(&timo, timeout); 3065 3066 wrflags = URWLOCK_WRITE_OWNER; 3067 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3068 wrflags |= URWLOCK_WRITE_WAITERS; 3069 3070 for (;;) { 3071 rv = fueword32(&rwlock->rw_state, &state); 3072 if (rv == -1) { 3073 umtx_key_release(&uq->uq_key); 3074 return (EFAULT); 3075 } 3076 3077 /* try to lock it */ 3078 while (!(state & wrflags)) { 3079 if (__predict_false(URWLOCK_READER_COUNT(state) == 3080 URWLOCK_MAX_READERS)) { 3081 umtx_key_release(&uq->uq_key); 3082 return (EAGAIN); 3083 } 3084 rv = casueword32(&rwlock->rw_state, state, 3085 &oldstate, state + 1); 3086 if (rv == -1) { 3087 umtx_key_release(&uq->uq_key); 3088 return (EFAULT); 3089 } 3090 if (rv == 0) { 3091 MPASS(oldstate == state); 3092 umtx_key_release(&uq->uq_key); 3093 return (0); 3094 } 3095 error = thread_check_susp(td, true); 3096 if (error != 0) 3097 break; 3098 state = oldstate; 3099 } 3100 3101 if (error) 3102 break; 3103 3104 /* grab monitor lock */ 3105 umtxq_lock(&uq->uq_key); 3106 umtxq_busy(&uq->uq_key); 3107 umtxq_unlock(&uq->uq_key); 3108 3109 /* 3110 * re-read the state, in case it changed between the try-lock above 3111 * and the check below 3112 */ 3113 rv = fueword32(&rwlock->rw_state, &state); 3114 if (rv == -1) 3115 error = EFAULT; 3116 3117 /* set read contention bit */ 3118 while (error == 0 && (state & wrflags) && 3119 !(state & URWLOCK_READ_WAITERS)) { 3120 rv = casueword32(&rwlock->rw_state, state, 3121 &oldstate, state | URWLOCK_READ_WAITERS); 3122 if (rv == -1) { 3123 error = EFAULT; 3124 break; 3125 } 3126 if (rv == 0) { 3127 MPASS(oldstate == state); 3128 goto sleep; 3129 } 3130 state = oldstate; 3131 error = thread_check_susp(td, false); 3132 if (error != 0) 3133 break; 3134 } 3135 if (error != 0) { 3136 umtxq_unbusy_unlocked(&uq->uq_key); 3137 break; 3138 } 3139 3140 /* state is changed while setting flags, restart */ 3141 if (!(state & wrflags)) { 3142 umtxq_unbusy_unlocked(&uq->uq_key); 3143 error = thread_check_susp(td, true); 3144 if (error != 0) 3145 break; 3146 continue; 3147 } 3148 3149 sleep: 3150 /* 3151 * Contention bit is set, before sleeping, increase 3152 * read waiter count. 3153 */ 3154 rv = fueword32(&rwlock->rw_blocked_readers, 3155 &blocked_readers); 3156 if (rv == -1) { 3157 umtxq_unbusy_unlocked(&uq->uq_key); 3158 error = EFAULT; 3159 break; 3160 } 3161 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3162 3163 while (state & wrflags) { 3164 umtxq_lock(&uq->uq_key); 3165 umtxq_insert(uq); 3166 umtxq_unbusy(&uq->uq_key); 3167 3168 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3169 NULL : &timo); 3170 3171 umtxq_busy(&uq->uq_key); 3172 umtxq_remove(uq); 3173 umtxq_unlock(&uq->uq_key); 3174 if (error) 3175 break; 3176 rv = fueword32(&rwlock->rw_state, &state); 3177 if (rv == -1) { 3178 error = EFAULT; 3179 break; 3180 } 3181 } 3182 3183 /* decrease read waiter count, and may clear read contention bit */ 3184 rv = fueword32(&rwlock->rw_blocked_readers, 3185 &blocked_readers); 3186 if (rv == -1) { 3187 umtxq_unbusy_unlocked(&uq->uq_key); 3188 error = EFAULT; 3189 break; 3190 } 3191 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3192 if (blocked_readers == 1) { 3193 rv = fueword32(&rwlock->rw_state, &state); 3194 if (rv == -1) { 3195 umtxq_unbusy_unlocked(&uq->uq_key); 3196 error = EFAULT; 3197 break; 3198 } 3199 for (;;) { 3200 rv = casueword32(&rwlock->rw_state, state, 3201 &oldstate, state & ~URWLOCK_READ_WAITERS); 3202 if (rv == -1) { 3203 error = EFAULT; 3204 break; 3205 } 3206 if (rv == 0) { 3207 MPASS(oldstate == state); 3208 break; 3209 } 3210 state = oldstate; 3211 error1 = thread_check_susp(td, false); 3212 if (error1 != 0) { 3213 if (error == 0) 3214 error = error1; 3215 break; 3216 } 3217 } 3218 } 3219 3220 umtxq_unbusy_unlocked(&uq->uq_key); 3221 if (error != 0) 3222 break; 3223 } 3224 umtx_key_release(&uq->uq_key); 3225 if (error == ERESTART) 3226 error = EINTR; 3227 return (error); 3228 } 3229 3230 static int 3231 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3232 { 3233 struct umtx_abs_timeout timo; 3234 struct umtx_q *uq; 3235 uint32_t flags; 3236 int32_t state, oldstate; 3237 int32_t blocked_writers; 3238 int32_t blocked_readers; 3239 int error, error1, rv; 3240 3241 uq = td->td_umtxq; 3242 error = fueword32(&rwlock->rw_flags, &flags); 3243 if (error == -1) 3244 return (EFAULT); 3245 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3246 if (error != 0) 3247 return (error); 3248 3249 if (timeout != NULL) 3250 umtx_abs_timeout_init2(&timo, timeout); 3251 3252 blocked_readers = 0; 3253 for (;;) { 3254 rv = fueword32(&rwlock->rw_state, &state); 3255 if (rv == -1) { 3256 umtx_key_release(&uq->uq_key); 3257 return (EFAULT); 3258 } 3259 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3260 URWLOCK_READER_COUNT(state) == 0) { 3261 rv = casueword32(&rwlock->rw_state, state, 3262 &oldstate, state | URWLOCK_WRITE_OWNER); 3263 if (rv == -1) { 3264 umtx_key_release(&uq->uq_key); 3265 return (EFAULT); 3266 } 3267 if (rv == 0) { 3268 MPASS(oldstate == state); 3269 umtx_key_release(&uq->uq_key); 3270 return (0); 3271 } 3272 state = oldstate; 3273 error = thread_check_susp(td, true); 3274 if (error != 0) 3275 break; 3276 } 3277 3278 if (error) { 3279 if ((state & (URWLOCK_WRITE_OWNER | 3280 URWLOCK_WRITE_WAITERS)) == 0 && 3281 blocked_readers != 0) { 3282 umtxq_lock(&uq->uq_key); 3283 umtxq_busy(&uq->uq_key); 3284 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3285 UMTX_SHARED_QUEUE); 3286 umtxq_unbusy(&uq->uq_key); 3287 umtxq_unlock(&uq->uq_key); 3288 } 3289 3290 break; 3291 } 3292 3293 /* grab monitor lock */ 3294 umtxq_lock(&uq->uq_key); 3295 umtxq_busy(&uq->uq_key); 3296 umtxq_unlock(&uq->uq_key); 3297 3298 /* 3299 * Re-read the state, in case it changed between the 3300 * try-lock above and the check below. 3301 */ 3302 rv = fueword32(&rwlock->rw_state, &state); 3303 if (rv == -1) 3304 error = EFAULT; 3305 3306 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3307 URWLOCK_READER_COUNT(state) != 0) && 3308 (state & URWLOCK_WRITE_WAITERS) == 0) { 3309 rv = casueword32(&rwlock->rw_state, state, 3310 &oldstate, state | URWLOCK_WRITE_WAITERS); 3311 if (rv == -1) { 3312 error = EFAULT; 3313 break; 3314 } 3315 if (rv == 0) { 3316 MPASS(oldstate == state); 3317 goto sleep; 3318 } 3319 state = oldstate; 3320 error = thread_check_susp(td, false); 3321 if (error != 0) 3322 break; 3323 } 3324 if (error != 0) { 3325 umtxq_unbusy_unlocked(&uq->uq_key); 3326 break; 3327 } 3328 3329 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3330 URWLOCK_READER_COUNT(state) == 0) { 3331 umtxq_unbusy_unlocked(&uq->uq_key); 3332 error = thread_check_susp(td, false); 3333 if (error != 0) 3334 break; 3335 continue; 3336 } 3337 sleep: 3338 rv = fueword32(&rwlock->rw_blocked_writers, 3339 &blocked_writers); 3340 if (rv == -1) { 3341 umtxq_unbusy_unlocked(&uq->uq_key); 3342 error = EFAULT; 3343 break; 3344 } 3345 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3346 3347 while ((state & URWLOCK_WRITE_OWNER) || 3348 URWLOCK_READER_COUNT(state) != 0) { 3349 umtxq_lock(&uq->uq_key); 3350 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3351 umtxq_unbusy(&uq->uq_key); 3352 3353 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3354 NULL : &timo); 3355 3356 umtxq_busy(&uq->uq_key); 3357 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3358 umtxq_unlock(&uq->uq_key); 3359 if (error) 3360 break; 3361 rv = fueword32(&rwlock->rw_state, &state); 3362 if (rv == -1) { 3363 error = EFAULT; 3364 break; 3365 } 3366 } 3367 3368 rv = fueword32(&rwlock->rw_blocked_writers, 3369 &blocked_writers); 3370 if (rv == -1) { 3371 umtxq_unbusy_unlocked(&uq->uq_key); 3372 error = EFAULT; 3373 break; 3374 } 3375 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3376 if (blocked_writers == 1) { 3377 rv = fueword32(&rwlock->rw_state, &state); 3378 if (rv == -1) { 3379 umtxq_unbusy_unlocked(&uq->uq_key); 3380 error = EFAULT; 3381 break; 3382 } 3383 for (;;) { 3384 rv = casueword32(&rwlock->rw_state, state, 3385 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3386 if (rv == -1) { 3387 error = EFAULT; 3388 break; 3389 } 3390 if (rv == 0) { 3391 MPASS(oldstate == state); 3392 break; 3393 } 3394 state = oldstate; 3395 error1 = thread_check_susp(td, false); 3396 /* 3397 * We are leaving the URWLOCK_WRITE_WAITERS 3398 * behind, but this should not harm the 3399 * correctness. 3400 */ 3401 if (error1 != 0) { 3402 if (error == 0) 3403 error = error1; 3404 break; 3405 } 3406 } 3407 rv = fueword32(&rwlock->rw_blocked_readers, 3408 &blocked_readers); 3409 if (rv == -1) { 3410 umtxq_unbusy_unlocked(&uq->uq_key); 3411 error = EFAULT; 3412 break; 3413 } 3414 } else 3415 blocked_readers = 0; 3416 3417 umtxq_unbusy_unlocked(&uq->uq_key); 3418 } 3419 3420 umtx_key_release(&uq->uq_key); 3421 if (error == ERESTART) 3422 error = EINTR; 3423 return (error); 3424 } 3425 3426 static int 3427 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3428 { 3429 struct umtx_q *uq; 3430 uint32_t flags; 3431 int32_t state, oldstate; 3432 int error, rv, q, count; 3433 3434 uq = td->td_umtxq; 3435 error = fueword32(&rwlock->rw_flags, &flags); 3436 if (error == -1) 3437 return (EFAULT); 3438 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3439 if (error != 0) 3440 return (error); 3441 3442 error = fueword32(&rwlock->rw_state, &state); 3443 if (error == -1) { 3444 error = EFAULT; 3445 goto out; 3446 } 3447 if (state & URWLOCK_WRITE_OWNER) { 3448 for (;;) { 3449 rv = casueword32(&rwlock->rw_state, state, 3450 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3451 if (rv == -1) { 3452 error = EFAULT; 3453 goto out; 3454 } 3455 if (rv == 1) { 3456 state = oldstate; 3457 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3458 error = EPERM; 3459 goto out; 3460 } 3461 error = thread_check_susp(td, true); 3462 if (error != 0) 3463 goto out; 3464 } else 3465 break; 3466 } 3467 } else if (URWLOCK_READER_COUNT(state) != 0) { 3468 for (;;) { 3469 rv = casueword32(&rwlock->rw_state, state, 3470 &oldstate, state - 1); 3471 if (rv == -1) { 3472 error = EFAULT; 3473 goto out; 3474 } 3475 if (rv == 1) { 3476 state = oldstate; 3477 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3478 error = EPERM; 3479 goto out; 3480 } 3481 error = thread_check_susp(td, true); 3482 if (error != 0) 3483 goto out; 3484 } else 3485 break; 3486 } 3487 } else { 3488 error = EPERM; 3489 goto out; 3490 } 3491 3492 count = 0; 3493 3494 if (!(flags & URWLOCK_PREFER_READER)) { 3495 if (state & URWLOCK_WRITE_WAITERS) { 3496 count = 1; 3497 q = UMTX_EXCLUSIVE_QUEUE; 3498 } else if (state & URWLOCK_READ_WAITERS) { 3499 count = INT_MAX; 3500 q = UMTX_SHARED_QUEUE; 3501 } 3502 } else { 3503 if (state & URWLOCK_READ_WAITERS) { 3504 count = INT_MAX; 3505 q = UMTX_SHARED_QUEUE; 3506 } else if (state & URWLOCK_WRITE_WAITERS) { 3507 count = 1; 3508 q = UMTX_EXCLUSIVE_QUEUE; 3509 } 3510 } 3511 3512 if (count) { 3513 umtxq_lock(&uq->uq_key); 3514 umtxq_busy(&uq->uq_key); 3515 umtxq_signal_queue(&uq->uq_key, count, q); 3516 umtxq_unbusy(&uq->uq_key); 3517 umtxq_unlock(&uq->uq_key); 3518 } 3519 out: 3520 umtx_key_release(&uq->uq_key); 3521 return (error); 3522 } 3523 3524 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3525 static int 3526 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3527 { 3528 struct umtx_abs_timeout timo; 3529 struct umtx_q *uq; 3530 uint32_t flags, count, count1; 3531 int error, rv, rv1; 3532 3533 uq = td->td_umtxq; 3534 error = fueword32(&sem->_flags, &flags); 3535 if (error == -1) 3536 return (EFAULT); 3537 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3538 if (error != 0) 3539 return (error); 3540 3541 if (timeout != NULL) 3542 umtx_abs_timeout_init2(&timo, timeout); 3543 3544 again: 3545 umtxq_lock(&uq->uq_key); 3546 umtxq_busy(&uq->uq_key); 3547 umtxq_insert(uq); 3548 umtxq_unlock(&uq->uq_key); 3549 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3550 if (rv != -1) 3551 rv1 = fueword32(&sem->_count, &count); 3552 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3553 if (rv == 0) 3554 suword32(&sem->_has_waiters, 0); 3555 umtxq_lock(&uq->uq_key); 3556 umtxq_unbusy(&uq->uq_key); 3557 umtxq_remove(uq); 3558 umtxq_unlock(&uq->uq_key); 3559 if (rv == -1 || rv1 == -1) { 3560 error = EFAULT; 3561 goto out; 3562 } 3563 if (count != 0) { 3564 error = 0; 3565 goto out; 3566 } 3567 MPASS(rv == 1 && count1 == 0); 3568 rv = thread_check_susp(td, true); 3569 if (rv == 0) 3570 goto again; 3571 error = rv; 3572 goto out; 3573 } 3574 umtxq_lock(&uq->uq_key); 3575 umtxq_unbusy(&uq->uq_key); 3576 3577 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3578 3579 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3580 error = 0; 3581 else { 3582 umtxq_remove(uq); 3583 /* A relative timeout cannot be restarted. */ 3584 if (error == ERESTART && timeout != NULL && 3585 (timeout->_flags & UMTX_ABSTIME) == 0) 3586 error = EINTR; 3587 } 3588 umtxq_unlock(&uq->uq_key); 3589 out: 3590 umtx_key_release(&uq->uq_key); 3591 return (error); 3592 } 3593 3594 /* 3595 * Signal a userland semaphore. 3596 */ 3597 static int 3598 do_sem_wake(struct thread *td, struct _usem *sem) 3599 { 3600 struct umtx_key key; 3601 int error, cnt; 3602 uint32_t flags; 3603 3604 error = fueword32(&sem->_flags, &flags); 3605 if (error == -1) 3606 return (EFAULT); 3607 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3608 return (error); 3609 umtxq_lock(&key); 3610 umtxq_busy(&key); 3611 cnt = umtxq_count(&key); 3612 if (cnt > 0) { 3613 /* 3614 * Check if count is greater than 0, this means the memory is 3615 * still being referenced by user code, so we can safely 3616 * update _has_waiters flag. 3617 */ 3618 if (cnt == 1) { 3619 umtxq_unlock(&key); 3620 error = suword32(&sem->_has_waiters, 0); 3621 umtxq_lock(&key); 3622 if (error == -1) 3623 error = EFAULT; 3624 } 3625 umtxq_signal(&key, 1); 3626 } 3627 umtxq_unbusy(&key); 3628 umtxq_unlock(&key); 3629 umtx_key_release(&key); 3630 return (error); 3631 } 3632 #endif 3633 3634 static int 3635 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3636 { 3637 struct umtx_abs_timeout timo; 3638 struct umtx_q *uq; 3639 uint32_t count, flags; 3640 int error, rv; 3641 3642 uq = td->td_umtxq; 3643 flags = fuword32(&sem->_flags); 3644 if (timeout != NULL) 3645 umtx_abs_timeout_init2(&timo, timeout); 3646 3647 again: 3648 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3649 if (error != 0) 3650 return (error); 3651 umtxq_lock(&uq->uq_key); 3652 umtxq_busy(&uq->uq_key); 3653 umtxq_insert(uq); 3654 umtxq_unlock(&uq->uq_key); 3655 rv = fueword32(&sem->_count, &count); 3656 if (rv == -1) { 3657 umtxq_lock(&uq->uq_key); 3658 umtxq_unbusy(&uq->uq_key); 3659 umtxq_remove(uq); 3660 umtxq_unlock(&uq->uq_key); 3661 umtx_key_release(&uq->uq_key); 3662 return (EFAULT); 3663 } 3664 for (;;) { 3665 if (USEM_COUNT(count) != 0) { 3666 umtxq_lock(&uq->uq_key); 3667 umtxq_unbusy(&uq->uq_key); 3668 umtxq_remove(uq); 3669 umtxq_unlock(&uq->uq_key); 3670 umtx_key_release(&uq->uq_key); 3671 return (0); 3672 } 3673 if (count == USEM_HAS_WAITERS) 3674 break; 3675 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3676 if (rv == 0) 3677 break; 3678 umtxq_lock(&uq->uq_key); 3679 umtxq_unbusy(&uq->uq_key); 3680 umtxq_remove(uq); 3681 umtxq_unlock(&uq->uq_key); 3682 umtx_key_release(&uq->uq_key); 3683 if (rv == -1) 3684 return (EFAULT); 3685 rv = thread_check_susp(td, true); 3686 if (rv != 0) 3687 return (rv); 3688 goto again; 3689 } 3690 umtxq_lock(&uq->uq_key); 3691 umtxq_unbusy(&uq->uq_key); 3692 3693 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3694 3695 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3696 error = 0; 3697 else { 3698 umtxq_remove(uq); 3699 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3700 /* A relative timeout cannot be restarted. */ 3701 if (error == ERESTART) 3702 error = EINTR; 3703 if (error == EINTR) { 3704 kern_clock_gettime(curthread, timo.clockid, 3705 &timo.cur); 3706 timespecsub(&timo.end, &timo.cur, 3707 &timeout->_timeout); 3708 } 3709 } 3710 } 3711 umtxq_unlock(&uq->uq_key); 3712 umtx_key_release(&uq->uq_key); 3713 return (error); 3714 } 3715 3716 /* 3717 * Signal a userland semaphore. 3718 */ 3719 static int 3720 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3721 { 3722 struct umtx_key key; 3723 int error, cnt, rv; 3724 uint32_t count, flags; 3725 3726 rv = fueword32(&sem->_flags, &flags); 3727 if (rv == -1) 3728 return (EFAULT); 3729 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3730 return (error); 3731 umtxq_lock(&key); 3732 umtxq_busy(&key); 3733 cnt = umtxq_count(&key); 3734 if (cnt > 0) { 3735 /* 3736 * If this was the last sleeping thread, clear the waiters 3737 * flag in _count. 3738 */ 3739 if (cnt == 1) { 3740 umtxq_unlock(&key); 3741 rv = fueword32(&sem->_count, &count); 3742 while (rv != -1 && count & USEM_HAS_WAITERS) { 3743 rv = casueword32(&sem->_count, count, &count, 3744 count & ~USEM_HAS_WAITERS); 3745 if (rv == 1) { 3746 rv = thread_check_susp(td, true); 3747 if (rv != 0) 3748 break; 3749 } 3750 } 3751 if (rv == -1) 3752 error = EFAULT; 3753 else if (rv > 0) { 3754 error = rv; 3755 } 3756 umtxq_lock(&key); 3757 } 3758 3759 umtxq_signal(&key, 1); 3760 } 3761 umtxq_unbusy(&key); 3762 umtxq_unlock(&key); 3763 umtx_key_release(&key); 3764 return (error); 3765 } 3766 3767 #ifdef COMPAT_FREEBSD10 3768 int 3769 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3770 { 3771 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3772 } 3773 3774 int 3775 freebsd10__umtx_unlock(struct thread *td, 3776 struct freebsd10__umtx_unlock_args *uap) 3777 { 3778 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3779 } 3780 #endif 3781 3782 inline int 3783 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3784 { 3785 int error; 3786 3787 error = copyin(uaddr, tsp, sizeof(*tsp)); 3788 if (error == 0) { 3789 if (!timespecvalid_interval(tsp)) 3790 error = EINVAL; 3791 } 3792 return (error); 3793 } 3794 3795 static inline int 3796 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3797 { 3798 int error; 3799 3800 if (size <= sizeof(tp->_timeout)) { 3801 tp->_clockid = CLOCK_REALTIME; 3802 tp->_flags = 0; 3803 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3804 } else 3805 error = copyin(uaddr, tp, sizeof(*tp)); 3806 if (error != 0) 3807 return (error); 3808 if (!timespecvalid_interval(&tp->_timeout)) 3809 return (EINVAL); 3810 return (0); 3811 } 3812 3813 static int 3814 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3815 struct umtx_robust_lists_params *rb) 3816 { 3817 3818 if (size > sizeof(*rb)) 3819 return (EINVAL); 3820 return (copyin(uaddr, rb, size)); 3821 } 3822 3823 static int 3824 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3825 { 3826 3827 /* 3828 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3829 * and we're only called if sz >= sizeof(timespec) as supplied in the 3830 * copyops. 3831 */ 3832 KASSERT(sz >= sizeof(*tsp), 3833 ("umtx_copyops specifies incorrect sizes")); 3834 3835 return (copyout(tsp, uaddr, sizeof(*tsp))); 3836 } 3837 3838 #ifdef COMPAT_FREEBSD10 3839 static int 3840 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3841 const struct umtx_copyops *ops) 3842 { 3843 struct timespec *ts, timeout; 3844 int error; 3845 3846 /* Allow a null timespec (wait forever). */ 3847 if (uap->uaddr2 == NULL) 3848 ts = NULL; 3849 else { 3850 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3851 if (error != 0) 3852 return (error); 3853 ts = &timeout; 3854 } 3855 #ifdef COMPAT_FREEBSD32 3856 if (ops->compat32) 3857 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3858 #endif 3859 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3860 } 3861 3862 static int 3863 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3864 const struct umtx_copyops *ops) 3865 { 3866 #ifdef COMPAT_FREEBSD32 3867 if (ops->compat32) 3868 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3869 #endif 3870 return (do_unlock_umtx(td, uap->obj, uap->val)); 3871 } 3872 #endif /* COMPAT_FREEBSD10 */ 3873 3874 #if !defined(COMPAT_FREEBSD10) 3875 static int 3876 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3877 const struct umtx_copyops *ops __unused) 3878 { 3879 return (EOPNOTSUPP); 3880 } 3881 #endif /* COMPAT_FREEBSD10 */ 3882 3883 static int 3884 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3885 const struct umtx_copyops *ops) 3886 { 3887 struct _umtx_time timeout, *tm_p; 3888 int error; 3889 3890 if (uap->uaddr2 == NULL) 3891 tm_p = NULL; 3892 else { 3893 error = ops->copyin_umtx_time( 3894 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3895 if (error != 0) 3896 return (error); 3897 tm_p = &timeout; 3898 } 3899 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3900 } 3901 3902 static int 3903 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3904 const struct umtx_copyops *ops) 3905 { 3906 struct _umtx_time timeout, *tm_p; 3907 int error; 3908 3909 if (uap->uaddr2 == NULL) 3910 tm_p = NULL; 3911 else { 3912 error = ops->copyin_umtx_time( 3913 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3914 if (error != 0) 3915 return (error); 3916 tm_p = &timeout; 3917 } 3918 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3919 } 3920 3921 static int 3922 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3923 const struct umtx_copyops *ops) 3924 { 3925 struct _umtx_time *tm_p, timeout; 3926 int error; 3927 3928 if (uap->uaddr2 == NULL) 3929 tm_p = NULL; 3930 else { 3931 error = ops->copyin_umtx_time( 3932 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3933 if (error != 0) 3934 return (error); 3935 tm_p = &timeout; 3936 } 3937 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3938 } 3939 3940 static int 3941 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3942 const struct umtx_copyops *ops __unused) 3943 { 3944 3945 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3946 } 3947 3948 #define BATCH_SIZE 128 3949 static int 3950 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3951 { 3952 char *uaddrs[BATCH_SIZE], **upp; 3953 int count, error, i, pos, tocopy; 3954 3955 upp = (char **)uap->obj; 3956 error = 0; 3957 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3958 pos += tocopy) { 3959 tocopy = MIN(count, BATCH_SIZE); 3960 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3961 if (error != 0) 3962 break; 3963 for (i = 0; i < tocopy; ++i) { 3964 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3965 } 3966 maybe_yield(); 3967 } 3968 return (error); 3969 } 3970 3971 static int 3972 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3973 { 3974 uint32_t uaddrs[BATCH_SIZE], *upp; 3975 int count, error, i, pos, tocopy; 3976 3977 upp = (uint32_t *)uap->obj; 3978 error = 0; 3979 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3980 pos += tocopy) { 3981 tocopy = MIN(count, BATCH_SIZE); 3982 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3983 if (error != 0) 3984 break; 3985 for (i = 0; i < tocopy; ++i) { 3986 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3987 INT_MAX, 1); 3988 } 3989 maybe_yield(); 3990 } 3991 return (error); 3992 } 3993 3994 static int 3995 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3996 const struct umtx_copyops *ops) 3997 { 3998 3999 if (ops->compat32) 4000 return (__umtx_op_nwake_private_compat32(td, uap)); 4001 return (__umtx_op_nwake_private_native(td, uap)); 4002 } 4003 4004 static int 4005 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4006 const struct umtx_copyops *ops __unused) 4007 { 4008 4009 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4010 } 4011 4012 static int 4013 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4014 const struct umtx_copyops *ops) 4015 { 4016 struct _umtx_time *tm_p, timeout; 4017 int error; 4018 4019 /* Allow a null timespec (wait forever). */ 4020 if (uap->uaddr2 == NULL) 4021 tm_p = NULL; 4022 else { 4023 error = ops->copyin_umtx_time( 4024 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4025 if (error != 0) 4026 return (error); 4027 tm_p = &timeout; 4028 } 4029 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4030 } 4031 4032 static int 4033 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4034 const struct umtx_copyops *ops __unused) 4035 { 4036 4037 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4038 } 4039 4040 static int 4041 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4042 const struct umtx_copyops *ops) 4043 { 4044 struct _umtx_time *tm_p, timeout; 4045 int error; 4046 4047 /* Allow a null timespec (wait forever). */ 4048 if (uap->uaddr2 == NULL) 4049 tm_p = NULL; 4050 else { 4051 error = ops->copyin_umtx_time( 4052 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4053 if (error != 0) 4054 return (error); 4055 tm_p = &timeout; 4056 } 4057 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4058 } 4059 4060 static int 4061 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4062 const struct umtx_copyops *ops __unused) 4063 { 4064 4065 return (do_wake_umutex(td, uap->obj)); 4066 } 4067 4068 static int 4069 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4070 const struct umtx_copyops *ops __unused) 4071 { 4072 4073 return (do_unlock_umutex(td, uap->obj, false)); 4074 } 4075 4076 static int 4077 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4078 const struct umtx_copyops *ops __unused) 4079 { 4080 4081 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4082 } 4083 4084 static int 4085 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4086 const struct umtx_copyops *ops) 4087 { 4088 struct timespec *ts, timeout; 4089 int error; 4090 4091 /* Allow a null timespec (wait forever). */ 4092 if (uap->uaddr2 == NULL) 4093 ts = NULL; 4094 else { 4095 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4096 if (error != 0) 4097 return (error); 4098 ts = &timeout; 4099 } 4100 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4101 } 4102 4103 static int 4104 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4105 const struct umtx_copyops *ops __unused) 4106 { 4107 4108 return (do_cv_signal(td, uap->obj)); 4109 } 4110 4111 static int 4112 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4113 const struct umtx_copyops *ops __unused) 4114 { 4115 4116 return (do_cv_broadcast(td, uap->obj)); 4117 } 4118 4119 static int 4120 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4121 const struct umtx_copyops *ops) 4122 { 4123 struct _umtx_time timeout; 4124 int error; 4125 4126 /* Allow a null timespec (wait forever). */ 4127 if (uap->uaddr2 == NULL) { 4128 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4129 } else { 4130 error = ops->copyin_umtx_time(uap->uaddr2, 4131 (size_t)uap->uaddr1, &timeout); 4132 if (error != 0) 4133 return (error); 4134 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4135 } 4136 return (error); 4137 } 4138 4139 static int 4140 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4141 const struct umtx_copyops *ops) 4142 { 4143 struct _umtx_time timeout; 4144 int error; 4145 4146 /* Allow a null timespec (wait forever). */ 4147 if (uap->uaddr2 == NULL) { 4148 error = do_rw_wrlock(td, uap->obj, 0); 4149 } else { 4150 error = ops->copyin_umtx_time(uap->uaddr2, 4151 (size_t)uap->uaddr1, &timeout); 4152 if (error != 0) 4153 return (error); 4154 4155 error = do_rw_wrlock(td, uap->obj, &timeout); 4156 } 4157 return (error); 4158 } 4159 4160 static int 4161 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4162 const struct umtx_copyops *ops __unused) 4163 { 4164 4165 return (do_rw_unlock(td, uap->obj)); 4166 } 4167 4168 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4169 static int 4170 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4171 const struct umtx_copyops *ops) 4172 { 4173 struct _umtx_time *tm_p, timeout; 4174 int error; 4175 4176 /* Allow a null timespec (wait forever). */ 4177 if (uap->uaddr2 == NULL) 4178 tm_p = NULL; 4179 else { 4180 error = ops->copyin_umtx_time( 4181 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4182 if (error != 0) 4183 return (error); 4184 tm_p = &timeout; 4185 } 4186 return (do_sem_wait(td, uap->obj, tm_p)); 4187 } 4188 4189 static int 4190 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4191 const struct umtx_copyops *ops __unused) 4192 { 4193 4194 return (do_sem_wake(td, uap->obj)); 4195 } 4196 #endif 4197 4198 static int 4199 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4200 const struct umtx_copyops *ops __unused) 4201 { 4202 4203 return (do_wake2_umutex(td, uap->obj, uap->val)); 4204 } 4205 4206 static int 4207 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4208 const struct umtx_copyops *ops) 4209 { 4210 struct _umtx_time *tm_p, timeout; 4211 size_t uasize; 4212 int error; 4213 4214 /* Allow a null timespec (wait forever). */ 4215 if (uap->uaddr2 == NULL) { 4216 uasize = 0; 4217 tm_p = NULL; 4218 } else { 4219 uasize = (size_t)uap->uaddr1; 4220 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4221 if (error != 0) 4222 return (error); 4223 tm_p = &timeout; 4224 } 4225 error = do_sem2_wait(td, uap->obj, tm_p); 4226 if (error == EINTR && uap->uaddr2 != NULL && 4227 (timeout._flags & UMTX_ABSTIME) == 0 && 4228 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4229 error = ops->copyout_timeout( 4230 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4231 uasize - ops->umtx_time_sz, &timeout._timeout); 4232 if (error == 0) { 4233 error = EINTR; 4234 } 4235 } 4236 4237 return (error); 4238 } 4239 4240 static int 4241 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4242 const struct umtx_copyops *ops __unused) 4243 { 4244 4245 return (do_sem2_wake(td, uap->obj)); 4246 } 4247 4248 #define USHM_OBJ_UMTX(o) \ 4249 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4250 4251 #define USHMF_REG_LINKED 0x0001 4252 #define USHMF_OBJ_LINKED 0x0002 4253 struct umtx_shm_reg { 4254 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4255 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4256 struct umtx_key ushm_key; 4257 struct ucred *ushm_cred; 4258 struct shmfd *ushm_obj; 4259 u_int ushm_refcnt; 4260 u_int ushm_flags; 4261 }; 4262 4263 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4264 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4265 4266 static uma_zone_t umtx_shm_reg_zone; 4267 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4268 static struct mtx umtx_shm_lock; 4269 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4270 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4271 4272 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4273 4274 static void 4275 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4276 { 4277 struct umtx_shm_reg_head d; 4278 struct umtx_shm_reg *reg, *reg1; 4279 4280 TAILQ_INIT(&d); 4281 mtx_lock(&umtx_shm_lock); 4282 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4283 mtx_unlock(&umtx_shm_lock); 4284 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4285 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4286 umtx_shm_free_reg(reg); 4287 } 4288 } 4289 4290 static struct task umtx_shm_reg_delfree_task = 4291 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4292 4293 static struct umtx_shm_reg * 4294 umtx_shm_find_reg_locked(const struct umtx_key *key) 4295 { 4296 struct umtx_shm_reg *reg; 4297 struct umtx_shm_reg_head *reg_head; 4298 4299 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4300 mtx_assert(&umtx_shm_lock, MA_OWNED); 4301 reg_head = &umtx_shm_registry[key->hash]; 4302 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4303 KASSERT(reg->ushm_key.shared, 4304 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4305 if (reg->ushm_key.info.shared.object == 4306 key->info.shared.object && 4307 reg->ushm_key.info.shared.offset == 4308 key->info.shared.offset) { 4309 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4310 KASSERT(reg->ushm_refcnt > 0, 4311 ("reg %p refcnt 0 onlist", reg)); 4312 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4313 ("reg %p not linked", reg)); 4314 reg->ushm_refcnt++; 4315 return (reg); 4316 } 4317 } 4318 return (NULL); 4319 } 4320 4321 static struct umtx_shm_reg * 4322 umtx_shm_find_reg(const struct umtx_key *key) 4323 { 4324 struct umtx_shm_reg *reg; 4325 4326 mtx_lock(&umtx_shm_lock); 4327 reg = umtx_shm_find_reg_locked(key); 4328 mtx_unlock(&umtx_shm_lock); 4329 return (reg); 4330 } 4331 4332 static void 4333 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4334 { 4335 4336 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4337 crfree(reg->ushm_cred); 4338 shm_drop(reg->ushm_obj); 4339 uma_zfree(umtx_shm_reg_zone, reg); 4340 } 4341 4342 static bool 4343 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4344 { 4345 bool res; 4346 4347 mtx_assert(&umtx_shm_lock, MA_OWNED); 4348 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4349 reg->ushm_refcnt--; 4350 res = reg->ushm_refcnt == 0; 4351 if (res || force) { 4352 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4353 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4354 reg, ushm_reg_link); 4355 reg->ushm_flags &= ~USHMF_REG_LINKED; 4356 } 4357 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4358 LIST_REMOVE(reg, ushm_obj_link); 4359 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4360 } 4361 } 4362 return (res); 4363 } 4364 4365 static void 4366 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4367 { 4368 vm_object_t object; 4369 bool dofree; 4370 4371 if (force) { 4372 object = reg->ushm_obj->shm_object; 4373 VM_OBJECT_WLOCK(object); 4374 vm_object_set_flag(object, OBJ_UMTXDEAD); 4375 VM_OBJECT_WUNLOCK(object); 4376 } 4377 mtx_lock(&umtx_shm_lock); 4378 dofree = umtx_shm_unref_reg_locked(reg, force); 4379 mtx_unlock(&umtx_shm_lock); 4380 if (dofree) 4381 umtx_shm_free_reg(reg); 4382 } 4383 4384 void 4385 umtx_shm_object_init(vm_object_t object) 4386 { 4387 4388 LIST_INIT(USHM_OBJ_UMTX(object)); 4389 } 4390 4391 void 4392 umtx_shm_object_terminated(vm_object_t object) 4393 { 4394 struct umtx_shm_reg *reg, *reg1; 4395 bool dofree; 4396 4397 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4398 return; 4399 4400 dofree = false; 4401 mtx_lock(&umtx_shm_lock); 4402 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4403 if (umtx_shm_unref_reg_locked(reg, true)) { 4404 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4405 ushm_reg_link); 4406 dofree = true; 4407 } 4408 } 4409 mtx_unlock(&umtx_shm_lock); 4410 if (dofree) 4411 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4412 } 4413 4414 static int 4415 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4416 struct umtx_shm_reg **res) 4417 { 4418 struct umtx_shm_reg *reg, *reg1; 4419 struct ucred *cred; 4420 int error; 4421 4422 reg = umtx_shm_find_reg(key); 4423 if (reg != NULL) { 4424 *res = reg; 4425 return (0); 4426 } 4427 cred = td->td_ucred; 4428 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4429 return (ENOMEM); 4430 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4431 reg->ushm_refcnt = 1; 4432 bcopy(key, ®->ushm_key, sizeof(*key)); 4433 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4434 reg->ushm_cred = crhold(cred); 4435 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4436 if (error != 0) { 4437 umtx_shm_free_reg(reg); 4438 return (error); 4439 } 4440 mtx_lock(&umtx_shm_lock); 4441 reg1 = umtx_shm_find_reg_locked(key); 4442 if (reg1 != NULL) { 4443 mtx_unlock(&umtx_shm_lock); 4444 umtx_shm_free_reg(reg); 4445 *res = reg1; 4446 return (0); 4447 } 4448 reg->ushm_refcnt++; 4449 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4450 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4451 ushm_obj_link); 4452 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4453 mtx_unlock(&umtx_shm_lock); 4454 *res = reg; 4455 return (0); 4456 } 4457 4458 static int 4459 umtx_shm_alive(struct thread *td, void *addr) 4460 { 4461 vm_map_t map; 4462 vm_map_entry_t entry; 4463 vm_object_t object; 4464 vm_pindex_t pindex; 4465 vm_prot_t prot; 4466 int res, ret; 4467 boolean_t wired; 4468 4469 map = &td->td_proc->p_vmspace->vm_map; 4470 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4471 &object, &pindex, &prot, &wired); 4472 if (res != KERN_SUCCESS) 4473 return (EFAULT); 4474 if (object == NULL) 4475 ret = EINVAL; 4476 else 4477 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4478 vm_map_lookup_done(map, entry); 4479 return (ret); 4480 } 4481 4482 static void 4483 umtx_shm_init(void) 4484 { 4485 int i; 4486 4487 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4488 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4489 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4490 for (i = 0; i < nitems(umtx_shm_registry); i++) 4491 TAILQ_INIT(&umtx_shm_registry[i]); 4492 } 4493 4494 static int 4495 umtx_shm(struct thread *td, void *addr, u_int flags) 4496 { 4497 struct umtx_key key; 4498 struct umtx_shm_reg *reg; 4499 struct file *fp; 4500 int error, fd; 4501 4502 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4503 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4504 return (EINVAL); 4505 if ((flags & UMTX_SHM_ALIVE) != 0) 4506 return (umtx_shm_alive(td, addr)); 4507 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4508 if (error != 0) 4509 return (error); 4510 KASSERT(key.shared == 1, ("non-shared key")); 4511 if ((flags & UMTX_SHM_CREAT) != 0) { 4512 error = umtx_shm_create_reg(td, &key, ®); 4513 } else { 4514 reg = umtx_shm_find_reg(&key); 4515 if (reg == NULL) 4516 error = ESRCH; 4517 } 4518 umtx_key_release(&key); 4519 if (error != 0) 4520 return (error); 4521 KASSERT(reg != NULL, ("no reg")); 4522 if ((flags & UMTX_SHM_DESTROY) != 0) { 4523 umtx_shm_unref_reg(reg, true); 4524 } else { 4525 #if 0 4526 #ifdef MAC 4527 error = mac_posixshm_check_open(td->td_ucred, 4528 reg->ushm_obj, FFLAGS(O_RDWR)); 4529 if (error == 0) 4530 #endif 4531 error = shm_access(reg->ushm_obj, td->td_ucred, 4532 FFLAGS(O_RDWR)); 4533 if (error == 0) 4534 #endif 4535 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4536 if (error == 0) { 4537 shm_hold(reg->ushm_obj); 4538 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4539 &shm_ops); 4540 td->td_retval[0] = fd; 4541 fdrop(fp, td); 4542 } 4543 } 4544 umtx_shm_unref_reg(reg, false); 4545 return (error); 4546 } 4547 4548 static int 4549 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4550 const struct umtx_copyops *ops __unused) 4551 { 4552 4553 return (umtx_shm(td, uap->uaddr1, uap->val)); 4554 } 4555 4556 static int 4557 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4558 const struct umtx_copyops *ops) 4559 { 4560 struct umtx_robust_lists_params rb; 4561 int error; 4562 4563 if (ops->compat32) { 4564 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4565 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4566 td->td_rb_inact != 0)) 4567 return (EBUSY); 4568 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4569 return (EBUSY); 4570 } 4571 4572 bzero(&rb, sizeof(rb)); 4573 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4574 if (error != 0) 4575 return (error); 4576 4577 if (ops->compat32) 4578 td->td_pflags2 |= TDP2_COMPAT32RB; 4579 4580 td->td_rb_list = rb.robust_list_offset; 4581 td->td_rbp_list = rb.robust_priv_list_offset; 4582 td->td_rb_inact = rb.robust_inact_offset; 4583 return (0); 4584 } 4585 4586 #if defined(__i386__) || defined(__amd64__) 4587 /* 4588 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4589 * 32-bit time_t there. Other architectures just need the i386 definitions 4590 * along with their standard compat32. 4591 */ 4592 struct timespecx32 { 4593 int64_t tv_sec; 4594 int32_t tv_nsec; 4595 }; 4596 4597 struct umtx_timex32 { 4598 struct timespecx32 _timeout; 4599 uint32_t _flags; 4600 uint32_t _clockid; 4601 }; 4602 4603 #ifndef __i386__ 4604 #define timespeci386 timespec32 4605 #define umtx_timei386 umtx_time32 4606 #endif 4607 #else /* !__i386__ && !__amd64__ */ 4608 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4609 struct timespeci386 { 4610 int32_t tv_sec; 4611 int32_t tv_nsec; 4612 }; 4613 4614 struct umtx_timei386 { 4615 struct timespeci386 _timeout; 4616 uint32_t _flags; 4617 uint32_t _clockid; 4618 }; 4619 4620 #if defined(__LP64__) 4621 #define timespecx32 timespec32 4622 #define umtx_timex32 umtx_time32 4623 #endif 4624 #endif 4625 4626 static int 4627 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4628 struct umtx_robust_lists_params *rbp) 4629 { 4630 struct umtx_robust_lists_params_compat32 rb32; 4631 int error; 4632 4633 if (size > sizeof(rb32)) 4634 return (EINVAL); 4635 bzero(&rb32, sizeof(rb32)); 4636 error = copyin(uaddr, &rb32, size); 4637 if (error != 0) 4638 return (error); 4639 CP(rb32, *rbp, robust_list_offset); 4640 CP(rb32, *rbp, robust_priv_list_offset); 4641 CP(rb32, *rbp, robust_inact_offset); 4642 return (0); 4643 } 4644 4645 #ifndef __i386__ 4646 static inline int 4647 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4648 { 4649 struct timespeci386 ts32; 4650 int error; 4651 4652 error = copyin(uaddr, &ts32, sizeof(ts32)); 4653 if (error == 0) { 4654 if (!timespecvalid_interval(&ts32)) 4655 error = EINVAL; 4656 else { 4657 CP(ts32, *tsp, tv_sec); 4658 CP(ts32, *tsp, tv_nsec); 4659 } 4660 } 4661 return (error); 4662 } 4663 4664 static inline int 4665 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4666 { 4667 struct umtx_timei386 t32; 4668 int error; 4669 4670 t32._clockid = CLOCK_REALTIME; 4671 t32._flags = 0; 4672 if (size <= sizeof(t32._timeout)) 4673 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4674 else 4675 error = copyin(uaddr, &t32, sizeof(t32)); 4676 if (error != 0) 4677 return (error); 4678 if (!timespecvalid_interval(&t32._timeout)) 4679 return (EINVAL); 4680 TS_CP(t32, *tp, _timeout); 4681 CP(t32, *tp, _flags); 4682 CP(t32, *tp, _clockid); 4683 return (0); 4684 } 4685 4686 static int 4687 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4688 { 4689 struct timespeci386 remain32 = { 4690 .tv_sec = tsp->tv_sec, 4691 .tv_nsec = tsp->tv_nsec, 4692 }; 4693 4694 /* 4695 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4696 * and we're only called if sz >= sizeof(timespec) as supplied in the 4697 * copyops. 4698 */ 4699 KASSERT(sz >= sizeof(remain32), 4700 ("umtx_copyops specifies incorrect sizes")); 4701 4702 return (copyout(&remain32, uaddr, sizeof(remain32))); 4703 } 4704 #endif /* !__i386__ */ 4705 4706 #if defined(__i386__) || defined(__LP64__) 4707 static inline int 4708 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4709 { 4710 struct timespecx32 ts32; 4711 int error; 4712 4713 error = copyin(uaddr, &ts32, sizeof(ts32)); 4714 if (error == 0) { 4715 if (!timespecvalid_interval(&ts32)) 4716 error = EINVAL; 4717 else { 4718 CP(ts32, *tsp, tv_sec); 4719 CP(ts32, *tsp, tv_nsec); 4720 } 4721 } 4722 return (error); 4723 } 4724 4725 static inline int 4726 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4727 { 4728 struct umtx_timex32 t32; 4729 int error; 4730 4731 t32._clockid = CLOCK_REALTIME; 4732 t32._flags = 0; 4733 if (size <= sizeof(t32._timeout)) 4734 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4735 else 4736 error = copyin(uaddr, &t32, sizeof(t32)); 4737 if (error != 0) 4738 return (error); 4739 if (!timespecvalid_interval(&t32._timeout)) 4740 return (EINVAL); 4741 TS_CP(t32, *tp, _timeout); 4742 CP(t32, *tp, _flags); 4743 CP(t32, *tp, _clockid); 4744 return (0); 4745 } 4746 4747 static int 4748 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4749 { 4750 struct timespecx32 remain32 = { 4751 .tv_sec = tsp->tv_sec, 4752 .tv_nsec = tsp->tv_nsec, 4753 }; 4754 4755 /* 4756 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4757 * and we're only called if sz >= sizeof(timespec) as supplied in the 4758 * copyops. 4759 */ 4760 KASSERT(sz >= sizeof(remain32), 4761 ("umtx_copyops specifies incorrect sizes")); 4762 4763 return (copyout(&remain32, uaddr, sizeof(remain32))); 4764 } 4765 #endif /* __i386__ || __LP64__ */ 4766 4767 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4768 const struct umtx_copyops *umtx_ops); 4769 4770 static const _umtx_op_func op_table[] = { 4771 #ifdef COMPAT_FREEBSD10 4772 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4773 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4774 #else 4775 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4776 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4777 #endif 4778 [UMTX_OP_WAIT] = __umtx_op_wait, 4779 [UMTX_OP_WAKE] = __umtx_op_wake, 4780 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4781 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4782 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4783 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4784 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4785 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4786 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4787 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4788 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4789 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4790 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4791 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4792 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4793 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4794 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4795 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4796 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4797 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4798 #else 4799 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4800 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4801 #endif 4802 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4803 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4804 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4805 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4806 [UMTX_OP_SHM] = __umtx_op_shm, 4807 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4808 }; 4809 4810 static const struct umtx_copyops umtx_native_ops = { 4811 .copyin_timeout = umtx_copyin_timeout, 4812 .copyin_umtx_time = umtx_copyin_umtx_time, 4813 .copyin_robust_lists = umtx_copyin_robust_lists, 4814 .copyout_timeout = umtx_copyout_timeout, 4815 .timespec_sz = sizeof(struct timespec), 4816 .umtx_time_sz = sizeof(struct _umtx_time), 4817 }; 4818 4819 #ifndef __i386__ 4820 static const struct umtx_copyops umtx_native_opsi386 = { 4821 .copyin_timeout = umtx_copyin_timeouti386, 4822 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4823 .copyin_robust_lists = umtx_copyin_robust_lists32, 4824 .copyout_timeout = umtx_copyout_timeouti386, 4825 .timespec_sz = sizeof(struct timespeci386), 4826 .umtx_time_sz = sizeof(struct umtx_timei386), 4827 .compat32 = true, 4828 }; 4829 #endif 4830 4831 #if defined(__i386__) || defined(__LP64__) 4832 /* i386 can emulate other 32-bit archs, too! */ 4833 static const struct umtx_copyops umtx_native_opsx32 = { 4834 .copyin_timeout = umtx_copyin_timeoutx32, 4835 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4836 .copyin_robust_lists = umtx_copyin_robust_lists32, 4837 .copyout_timeout = umtx_copyout_timeoutx32, 4838 .timespec_sz = sizeof(struct timespecx32), 4839 .umtx_time_sz = sizeof(struct umtx_timex32), 4840 .compat32 = true, 4841 }; 4842 4843 #ifdef COMPAT_FREEBSD32 4844 #ifdef __amd64__ 4845 #define umtx_native_ops32 umtx_native_opsi386 4846 #else 4847 #define umtx_native_ops32 umtx_native_opsx32 4848 #endif 4849 #endif /* COMPAT_FREEBSD32 */ 4850 #endif /* __i386__ || __LP64__ */ 4851 4852 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4853 4854 static int 4855 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4856 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4857 { 4858 struct _umtx_op_args uap = { 4859 .obj = obj, 4860 .op = op & ~UMTX_OP__FLAGS, 4861 .val = val, 4862 .uaddr1 = uaddr1, 4863 .uaddr2 = uaddr2 4864 }; 4865 4866 if ((uap.op >= nitems(op_table))) 4867 return (EINVAL); 4868 return ((*op_table[uap.op])(td, &uap, ops)); 4869 } 4870 4871 int 4872 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4873 { 4874 static const struct umtx_copyops *umtx_ops; 4875 4876 umtx_ops = &umtx_native_ops; 4877 #ifdef __LP64__ 4878 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4879 if ((uap->op & UMTX_OP__I386) != 0) 4880 umtx_ops = &umtx_native_opsi386; 4881 else 4882 umtx_ops = &umtx_native_opsx32; 4883 } 4884 #elif !defined(__i386__) 4885 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4886 if ((uap->op & UMTX_OP__I386) != 0) 4887 umtx_ops = &umtx_native_opsi386; 4888 #else 4889 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4890 if ((uap->op & UMTX_OP__32BIT) != 0) 4891 umtx_ops = &umtx_native_opsx32; 4892 #endif 4893 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4894 uap->uaddr2, umtx_ops)); 4895 } 4896 4897 #ifdef COMPAT_FREEBSD32 4898 #ifdef COMPAT_FREEBSD10 4899 int 4900 freebsd10_freebsd32__umtx_lock(struct thread *td, 4901 struct freebsd10_freebsd32__umtx_lock_args *uap) 4902 { 4903 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4904 } 4905 4906 int 4907 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4908 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4909 { 4910 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4911 } 4912 #endif /* COMPAT_FREEBSD10 */ 4913 4914 int 4915 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4916 { 4917 4918 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4919 uap->uaddr2, &umtx_native_ops32)); 4920 } 4921 #endif /* COMPAT_FREEBSD32 */ 4922 4923 void 4924 umtx_thread_init(struct thread *td) 4925 { 4926 4927 td->td_umtxq = umtxq_alloc(); 4928 td->td_umtxq->uq_thread = td; 4929 } 4930 4931 void 4932 umtx_thread_fini(struct thread *td) 4933 { 4934 4935 umtxq_free(td->td_umtxq); 4936 } 4937 4938 /* 4939 * It will be called when new thread is created, e.g fork(). 4940 */ 4941 void 4942 umtx_thread_alloc(struct thread *td) 4943 { 4944 struct umtx_q *uq; 4945 4946 uq = td->td_umtxq; 4947 uq->uq_inherited_pri = PRI_MAX; 4948 4949 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4950 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4951 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4952 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4953 } 4954 4955 /* 4956 * exec() hook. 4957 * 4958 * Clear robust lists for all process' threads, not delaying the 4959 * cleanup to thread exit, since the relevant address space is 4960 * destroyed right now. 4961 */ 4962 void 4963 umtx_exec(struct proc *p) 4964 { 4965 struct thread *td; 4966 4967 KASSERT(p == curproc, ("need curproc")); 4968 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4969 (p->p_flag & P_STOPPED_SINGLE) != 0, 4970 ("curproc must be single-threaded")); 4971 /* 4972 * There is no need to lock the list as only this thread can be 4973 * running. 4974 */ 4975 FOREACH_THREAD_IN_PROC(p, td) { 4976 KASSERT(td == curthread || 4977 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4978 ("running thread %p %p", p, td)); 4979 umtx_thread_cleanup(td); 4980 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4981 } 4982 } 4983 4984 /* 4985 * thread exit hook. 4986 */ 4987 void 4988 umtx_thread_exit(struct thread *td) 4989 { 4990 4991 umtx_thread_cleanup(td); 4992 } 4993 4994 static int 4995 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4996 { 4997 u_long res1; 4998 uint32_t res32; 4999 int error; 5000 5001 if (compat32) { 5002 error = fueword32((void *)ptr, &res32); 5003 if (error == 0) 5004 res1 = res32; 5005 } else { 5006 error = fueword((void *)ptr, &res1); 5007 } 5008 if (error == 0) 5009 *res = res1; 5010 else 5011 error = EFAULT; 5012 return (error); 5013 } 5014 5015 static void 5016 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5017 bool compat32) 5018 { 5019 struct umutex32 m32; 5020 5021 if (compat32) { 5022 memcpy(&m32, m, sizeof(m32)); 5023 *rb_list = m32.m_rb_lnk; 5024 } else { 5025 *rb_list = m->m_rb_lnk; 5026 } 5027 } 5028 5029 static int 5030 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5031 bool compat32) 5032 { 5033 struct umutex m; 5034 int error; 5035 5036 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5037 error = copyin((void *)rbp, &m, sizeof(m)); 5038 if (error != 0) 5039 return (error); 5040 if (rb_list != NULL) 5041 umtx_read_rb_list(td, &m, rb_list, compat32); 5042 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5043 return (EINVAL); 5044 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5045 /* inact is cleared after unlock, allow the inconsistency */ 5046 return (inact ? 0 : EINVAL); 5047 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5048 } 5049 5050 static void 5051 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5052 const char *name, bool compat32) 5053 { 5054 int error, i; 5055 uintptr_t rbp; 5056 bool inact; 5057 5058 if (rb_list == 0) 5059 return; 5060 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5061 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5062 if (rbp == *rb_inact) { 5063 inact = true; 5064 *rb_inact = 0; 5065 } else 5066 inact = false; 5067 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5068 } 5069 if (i == umtx_max_rb && umtx_verbose_rb) { 5070 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5071 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5072 } 5073 if (error != 0 && umtx_verbose_rb) { 5074 uprintf("comm %s pid %d: handling %srb error %d\n", 5075 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5076 } 5077 } 5078 5079 /* 5080 * Clean up umtx data. 5081 */ 5082 static void 5083 umtx_thread_cleanup(struct thread *td) 5084 { 5085 struct umtx_q *uq; 5086 struct umtx_pi *pi; 5087 uintptr_t rb_inact; 5088 bool compat32; 5089 5090 /* 5091 * Disown pi mutexes. 5092 */ 5093 uq = td->td_umtxq; 5094 if (uq != NULL) { 5095 if (uq->uq_inherited_pri != PRI_MAX || 5096 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5097 mtx_lock(&umtx_lock); 5098 uq->uq_inherited_pri = PRI_MAX; 5099 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5100 pi->pi_owner = NULL; 5101 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5102 } 5103 mtx_unlock(&umtx_lock); 5104 } 5105 sched_lend_user_prio_cond(td, PRI_MAX); 5106 } 5107 5108 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5109 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5110 5111 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5112 return; 5113 5114 /* 5115 * Handle terminated robust mutexes. Must be done after 5116 * robust pi disown, otherwise unlock could see unowned 5117 * entries. 5118 */ 5119 rb_inact = td->td_rb_inact; 5120 if (rb_inact != 0) 5121 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5122 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5123 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5124 if (rb_inact != 0) 5125 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5126 } 5127