1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/time.h> 63 #include <sys/eventhandler.h> 64 #include <sys/umtx.h> 65 #include <sys/umtxvar.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #include <compat/freebsd32/freebsd32.h> 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 92 #ifdef INVARIANTS 93 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 94 struct umtxq_chain *uc; \ 95 \ 96 uc = umtxq_getchain(key); \ 97 mtx_assert(&uc->uc_lock, MA_OWNED); \ 98 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 99 } while (0) 100 #else 101 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 102 #endif 103 104 /* 105 * Don't propagate time-sharing priority, there is a security reason, 106 * a user can simply introduce PI-mutex, let thread A lock the mutex, 107 * and let another thread B block on the mutex, because B is 108 * sleeping, its priority will be boosted, this causes A's priority to 109 * be boosted via priority propagating too and will never be lowered even 110 * if it is using 100%CPU, this is unfair to other processes. 111 */ 112 113 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 114 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 115 PRI_MAX_TIMESHARE : (td)->td_user_pri) 116 117 #define GOLDEN_RATIO_PRIME 2654404609U 118 #ifndef UMTX_CHAINS 119 #define UMTX_CHAINS 512 120 #endif 121 #define UMTX_SHIFTS (__WORD_BIT - 9) 122 123 #define GET_SHARE(flags) \ 124 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 125 126 #define BUSY_SPINS 200 127 128 struct umtx_copyops { 129 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 130 int (*copyin_umtx_time)(const void *uaddr, size_t size, 131 struct _umtx_time *tp); 132 int (*copyin_robust_lists)(const void *uaddr, size_t size, 133 struct umtx_robust_lists_params *rbp); 134 int (*copyout_timeout)(void *uaddr, size_t size, 135 struct timespec *tsp); 136 const size_t timespec_sz; 137 const size_t umtx_time_sz; 138 const bool compat32; 139 }; 140 141 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 142 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 143 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 144 145 int umtx_shm_vnobj_persistent = 0; 146 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 147 &umtx_shm_vnobj_persistent, 0, 148 "False forces destruction of umtx attached to file, on last close"); 149 static int umtx_max_rb = 1000; 150 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 151 &umtx_max_rb, 0, 152 "Maximum number of robust mutexes allowed for each thread"); 153 154 static uma_zone_t umtx_pi_zone; 155 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 156 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 157 static int umtx_pi_allocated; 158 159 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 160 "umtx debug"); 161 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 162 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 163 static int umtx_verbose_rb = 1; 164 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 165 &umtx_verbose_rb, 0, 166 ""); 167 168 #ifdef UMTX_PROFILING 169 static long max_length; 170 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 171 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 172 "umtx chain stats"); 173 #endif 174 175 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 176 const struct _umtx_time *umtxtime); 177 178 static void umtx_shm_init(void); 179 static void umtxq_sysinit(void *); 180 static void umtxq_hash(struct umtx_key *key); 181 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 182 bool rb); 183 static void umtx_thread_cleanup(struct thread *td); 184 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 185 186 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 187 188 static struct mtx umtx_lock; 189 190 #ifdef UMTX_PROFILING 191 static void 192 umtx_init_profiling(void) 193 { 194 struct sysctl_oid *chain_oid; 195 char chain_name[10]; 196 int i; 197 198 for (i = 0; i < UMTX_CHAINS; ++i) { 199 snprintf(chain_name, sizeof(chain_name), "%d", i); 200 chain_oid = SYSCTL_ADD_NODE(NULL, 201 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 202 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 203 "umtx hash stats"); 204 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 205 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 206 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 207 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 208 } 209 } 210 211 static int 212 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 213 { 214 char buf[512]; 215 struct sbuf sb; 216 struct umtxq_chain *uc; 217 u_int fract, i, j, tot, whole; 218 u_int sf0, sf1, sf2, sf3, sf4; 219 u_int si0, si1, si2, si3, si4; 220 u_int sw0, sw1, sw2, sw3, sw4; 221 222 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 223 for (i = 0; i < 2; i++) { 224 tot = 0; 225 for (j = 0; j < UMTX_CHAINS; ++j) { 226 uc = &umtxq_chains[i][j]; 227 mtx_lock(&uc->uc_lock); 228 tot += uc->max_length; 229 mtx_unlock(&uc->uc_lock); 230 } 231 if (tot == 0) 232 sbuf_printf(&sb, "%u) Empty ", i); 233 else { 234 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 235 si0 = si1 = si2 = si3 = si4 = 0; 236 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 237 for (j = 0; j < UMTX_CHAINS; j++) { 238 uc = &umtxq_chains[i][j]; 239 mtx_lock(&uc->uc_lock); 240 whole = uc->max_length * 100; 241 mtx_unlock(&uc->uc_lock); 242 fract = (whole % tot) * 100; 243 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 244 sf0 = fract; 245 si0 = j; 246 sw0 = whole; 247 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 248 sf1)) { 249 sf1 = fract; 250 si1 = j; 251 sw1 = whole; 252 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 253 sf2)) { 254 sf2 = fract; 255 si2 = j; 256 sw2 = whole; 257 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 258 sf3)) { 259 sf3 = fract; 260 si3 = j; 261 sw3 = whole; 262 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 263 sf4)) { 264 sf4 = fract; 265 si4 = j; 266 sw4 = whole; 267 } 268 } 269 sbuf_printf(&sb, "queue %u:\n", i); 270 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 271 sf0 / tot, si0); 272 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 273 sf1 / tot, si1); 274 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 275 sf2 / tot, si2); 276 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 277 sf3 / tot, si3); 278 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 279 sf4 / tot, si4); 280 } 281 } 282 sbuf_trim(&sb); 283 sbuf_finish(&sb); 284 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 285 sbuf_delete(&sb); 286 return (0); 287 } 288 289 static int 290 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 291 { 292 struct umtxq_chain *uc; 293 u_int i, j; 294 int clear, error; 295 296 clear = 0; 297 error = sysctl_handle_int(oidp, &clear, 0, req); 298 if (error != 0 || req->newptr == NULL) 299 return (error); 300 301 if (clear != 0) { 302 for (i = 0; i < 2; ++i) { 303 for (j = 0; j < UMTX_CHAINS; ++j) { 304 uc = &umtxq_chains[i][j]; 305 mtx_lock(&uc->uc_lock); 306 uc->length = 0; 307 uc->max_length = 0; 308 mtx_unlock(&uc->uc_lock); 309 } 310 } 311 } 312 return (0); 313 } 314 315 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 316 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 317 sysctl_debug_umtx_chains_clear, "I", 318 "Clear umtx chains statistics"); 319 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 320 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 321 sysctl_debug_umtx_chains_peaks, "A", 322 "Highest peaks in chains max length"); 323 #endif 324 325 static void 326 umtxq_sysinit(void *arg __unused) 327 { 328 int i, j; 329 330 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 331 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 332 for (i = 0; i < 2; ++i) { 333 for (j = 0; j < UMTX_CHAINS; ++j) { 334 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 335 MTX_DEF | MTX_DUPOK); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 337 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 338 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 339 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 340 umtxq_chains[i][j].uc_busy = 0; 341 umtxq_chains[i][j].uc_waiters = 0; 342 #ifdef UMTX_PROFILING 343 umtxq_chains[i][j].length = 0; 344 umtxq_chains[i][j].max_length = 0; 345 #endif 346 } 347 } 348 #ifdef UMTX_PROFILING 349 umtx_init_profiling(); 350 #endif 351 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 352 umtx_shm_init(); 353 } 354 355 struct umtx_q * 356 umtxq_alloc(void) 357 { 358 struct umtx_q *uq; 359 360 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 361 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 362 M_WAITOK | M_ZERO); 363 TAILQ_INIT(&uq->uq_spare_queue->head); 364 TAILQ_INIT(&uq->uq_pi_contested); 365 uq->uq_inherited_pri = PRI_MAX; 366 return (uq); 367 } 368 369 void 370 umtxq_free(struct umtx_q *uq) 371 { 372 373 MPASS(uq->uq_spare_queue != NULL); 374 free(uq->uq_spare_queue, M_UMTX); 375 free(uq, M_UMTX); 376 } 377 378 static inline void 379 umtxq_hash(struct umtx_key *key) 380 { 381 unsigned n; 382 383 n = (uintptr_t)key->info.both.a + key->info.both.b; 384 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 385 } 386 387 struct umtxq_chain * 388 umtxq_getchain(struct umtx_key *key) 389 { 390 391 if (key->type <= TYPE_SEM) 392 return (&umtxq_chains[1][key->hash]); 393 return (&umtxq_chains[0][key->hash]); 394 } 395 396 /* 397 * Set chain to busy state when following operation 398 * may be blocked (kernel mutex can not be used). 399 */ 400 void 401 umtxq_busy(struct umtx_key *key) 402 { 403 struct umtxq_chain *uc; 404 405 uc = umtxq_getchain(key); 406 mtx_assert(&uc->uc_lock, MA_OWNED); 407 if (uc->uc_busy) { 408 #ifdef SMP 409 if (smp_cpus > 1) { 410 int count = BUSY_SPINS; 411 if (count > 0) { 412 umtxq_unlock(key); 413 while (uc->uc_busy && --count > 0) 414 cpu_spinwait(); 415 umtxq_lock(key); 416 } 417 } 418 #endif 419 while (uc->uc_busy) { 420 uc->uc_waiters++; 421 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 422 uc->uc_waiters--; 423 } 424 } 425 uc->uc_busy = 1; 426 } 427 428 /* 429 * Unbusy a chain. 430 */ 431 void 432 umtxq_unbusy(struct umtx_key *key) 433 { 434 struct umtxq_chain *uc; 435 436 uc = umtxq_getchain(key); 437 mtx_assert(&uc->uc_lock, MA_OWNED); 438 KASSERT(uc->uc_busy != 0, ("not busy")); 439 uc->uc_busy = 0; 440 if (uc->uc_waiters) 441 wakeup_one(uc); 442 } 443 444 void 445 umtxq_unbusy_unlocked(struct umtx_key *key) 446 { 447 448 umtxq_lock(key); 449 umtxq_unbusy(key); 450 umtxq_unlock(key); 451 } 452 453 static struct umtxq_queue * 454 umtxq_queue_lookup(struct umtx_key *key, int q) 455 { 456 struct umtxq_queue *uh; 457 struct umtxq_chain *uc; 458 459 uc = umtxq_getchain(key); 460 UMTXQ_LOCKED_ASSERT(uc); 461 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 462 if (umtx_key_match(&uh->key, key)) 463 return (uh); 464 } 465 466 return (NULL); 467 } 468 469 void 470 umtxq_insert_queue(struct umtx_q *uq, int q) 471 { 472 struct umtxq_queue *uh; 473 struct umtxq_chain *uc; 474 475 uc = umtxq_getchain(&uq->uq_key); 476 UMTXQ_LOCKED_ASSERT(uc); 477 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 478 uh = umtxq_queue_lookup(&uq->uq_key, q); 479 if (uh != NULL) { 480 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 481 } else { 482 uh = uq->uq_spare_queue; 483 uh->key = uq->uq_key; 484 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 485 #ifdef UMTX_PROFILING 486 uc->length++; 487 if (uc->length > uc->max_length) { 488 uc->max_length = uc->length; 489 if (uc->max_length > max_length) 490 max_length = uc->max_length; 491 } 492 #endif 493 } 494 uq->uq_spare_queue = NULL; 495 496 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 497 uh->length++; 498 uq->uq_flags |= UQF_UMTXQ; 499 uq->uq_cur_queue = uh; 500 return; 501 } 502 503 void 504 umtxq_remove_queue(struct umtx_q *uq, int q) 505 { 506 struct umtxq_chain *uc; 507 struct umtxq_queue *uh; 508 509 uc = umtxq_getchain(&uq->uq_key); 510 UMTXQ_LOCKED_ASSERT(uc); 511 if (uq->uq_flags & UQF_UMTXQ) { 512 uh = uq->uq_cur_queue; 513 TAILQ_REMOVE(&uh->head, uq, uq_link); 514 uh->length--; 515 uq->uq_flags &= ~UQF_UMTXQ; 516 if (TAILQ_EMPTY(&uh->head)) { 517 KASSERT(uh->length == 0, 518 ("inconsistent umtxq_queue length")); 519 #ifdef UMTX_PROFILING 520 uc->length--; 521 #endif 522 LIST_REMOVE(uh, link); 523 } else { 524 uh = LIST_FIRST(&uc->uc_spare_queue); 525 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 526 LIST_REMOVE(uh, link); 527 } 528 uq->uq_spare_queue = uh; 529 uq->uq_cur_queue = NULL; 530 } 531 } 532 533 /* 534 * Check if there are multiple waiters 535 */ 536 int 537 umtxq_count(struct umtx_key *key) 538 { 539 struct umtxq_queue *uh; 540 541 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 542 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 543 if (uh != NULL) 544 return (uh->length); 545 return (0); 546 } 547 548 /* 549 * Check if there are multiple PI waiters and returns first 550 * waiter. 551 */ 552 static int 553 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 554 { 555 struct umtxq_queue *uh; 556 557 *first = NULL; 558 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 559 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 560 if (uh != NULL) { 561 *first = TAILQ_FIRST(&uh->head); 562 return (uh->length); 563 } 564 return (0); 565 } 566 567 /* 568 * Wake up threads waiting on an userland object by a bit mask. 569 */ 570 int 571 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 572 { 573 struct umtxq_queue *uh; 574 struct umtx_q *uq, *uq_temp; 575 int ret; 576 577 ret = 0; 578 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 579 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 580 if (uh == NULL) 581 return (0); 582 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 583 if ((uq->uq_bitset & bitset) == 0) 584 continue; 585 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 586 wakeup_one(uq); 587 if (++ret >= n_wake) 588 break; 589 } 590 return (ret); 591 } 592 593 /* 594 * Wake up threads waiting on an userland object. 595 */ 596 597 static int 598 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 599 { 600 struct umtxq_queue *uh; 601 struct umtx_q *uq; 602 int ret; 603 604 ret = 0; 605 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 606 uh = umtxq_queue_lookup(key, q); 607 if (uh != NULL) { 608 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 609 umtxq_remove_queue(uq, q); 610 wakeup(uq); 611 if (++ret >= n_wake) 612 return (ret); 613 } 614 } 615 return (ret); 616 } 617 618 /* 619 * Wake up specified thread. 620 */ 621 static inline void 622 umtxq_signal_thread(struct umtx_q *uq) 623 { 624 625 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 626 umtxq_remove(uq); 627 wakeup(uq); 628 } 629 630 /* 631 * Wake up a maximum of n_wake threads that are waiting on an userland 632 * object identified by key. The remaining threads are removed from queue 633 * identified by key and added to the queue identified by key2 (requeued). 634 * The n_requeue specifies an upper limit on the number of threads that 635 * are requeued to the second queue. 636 */ 637 int 638 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 639 int n_requeue) 640 { 641 struct umtxq_queue *uh; 642 struct umtx_q *uq, *uq_temp; 643 int ret; 644 645 ret = 0; 646 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 647 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 648 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 649 if (uh == NULL) 650 return (0); 651 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 652 if (++ret <= n_wake) { 653 umtxq_remove(uq); 654 wakeup_one(uq); 655 } else { 656 umtxq_remove(uq); 657 uq->uq_key = *key2; 658 umtxq_insert(uq); 659 if (ret - n_wake == n_requeue) 660 break; 661 } 662 } 663 return (ret); 664 } 665 666 static inline int 667 tstohz(const struct timespec *tsp) 668 { 669 struct timeval tv; 670 671 TIMESPEC_TO_TIMEVAL(&tv, tsp); 672 return tvtohz(&tv); 673 } 674 675 void 676 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 677 int absolute, const struct timespec *timeout) 678 { 679 680 timo->clockid = clockid; 681 if (!absolute) { 682 timo->is_abs_real = false; 683 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 684 timespecadd(&timo->cur, timeout, &timo->end); 685 } else { 686 timo->end = *timeout; 687 timo->is_abs_real = clockid == CLOCK_REALTIME || 688 clockid == CLOCK_REALTIME_FAST || 689 clockid == CLOCK_REALTIME_PRECISE || 690 clockid == CLOCK_SECOND; 691 } 692 } 693 694 static void 695 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 696 const struct _umtx_time *umtxtime) 697 { 698 699 umtx_abs_timeout_init(timo, umtxtime->_clockid, 700 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 701 } 702 703 static int 704 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 705 int *flags) 706 { 707 struct bintime bt, bbt; 708 struct timespec tts; 709 sbintime_t rem; 710 711 switch (timo->clockid) { 712 713 /* Clocks that can be converted into absolute time. */ 714 case CLOCK_REALTIME: 715 case CLOCK_REALTIME_PRECISE: 716 case CLOCK_REALTIME_FAST: 717 case CLOCK_MONOTONIC: 718 case CLOCK_MONOTONIC_PRECISE: 719 case CLOCK_MONOTONIC_FAST: 720 case CLOCK_UPTIME: 721 case CLOCK_UPTIME_PRECISE: 722 case CLOCK_UPTIME_FAST: 723 case CLOCK_SECOND: 724 timespec2bintime(&timo->end, &bt); 725 switch (timo->clockid) { 726 case CLOCK_REALTIME: 727 case CLOCK_REALTIME_PRECISE: 728 case CLOCK_REALTIME_FAST: 729 case CLOCK_SECOND: 730 getboottimebin(&bbt); 731 bintime_sub(&bt, &bbt); 732 break; 733 } 734 if (bt.sec < 0) 735 return (ETIMEDOUT); 736 if (bt.sec >= (SBT_MAX >> 32)) { 737 *sbt = 0; 738 *flags = 0; 739 return (0); 740 } 741 *sbt = bttosbt(bt); 742 743 /* 744 * Check if the absolute time should be aligned to 745 * avoid firing multiple timer events in non-periodic 746 * timer mode. 747 */ 748 switch (timo->clockid) { 749 case CLOCK_REALTIME_FAST: 750 case CLOCK_MONOTONIC_FAST: 751 case CLOCK_UPTIME_FAST: 752 rem = *sbt % tc_tick_sbt; 753 if (__predict_true(rem != 0)) 754 *sbt += tc_tick_sbt - rem; 755 break; 756 case CLOCK_SECOND: 757 rem = *sbt % SBT_1S; 758 if (__predict_true(rem != 0)) 759 *sbt += SBT_1S - rem; 760 break; 761 } 762 *flags = C_ABSOLUTE; 763 return (0); 764 765 /* Clocks that has to be periodically polled. */ 766 case CLOCK_VIRTUAL: 767 case CLOCK_PROF: 768 case CLOCK_THREAD_CPUTIME_ID: 769 case CLOCK_PROCESS_CPUTIME_ID: 770 default: 771 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 772 if (timespeccmp(&timo->end, &timo->cur, <=)) 773 return (ETIMEDOUT); 774 timespecsub(&timo->end, &timo->cur, &tts); 775 *sbt = tick_sbt * tstohz(&tts); 776 *flags = C_HARDCLOCK; 777 return (0); 778 } 779 } 780 781 static uint32_t 782 umtx_unlock_val(uint32_t flags, bool rb) 783 { 784 785 if (rb) 786 return (UMUTEX_RB_OWNERDEAD); 787 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 788 return (UMUTEX_RB_NOTRECOV); 789 else 790 return (UMUTEX_UNOWNED); 791 792 } 793 794 /* 795 * Put thread into sleep state, before sleeping, check if 796 * thread was removed from umtx queue. 797 */ 798 int 799 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 800 struct umtx_abs_timeout *timo) 801 { 802 struct umtxq_chain *uc; 803 sbintime_t sbt = 0; 804 int error, flags = 0; 805 806 uc = umtxq_getchain(&uq->uq_key); 807 UMTXQ_LOCKED_ASSERT(uc); 808 for (;;) { 809 if (!(uq->uq_flags & UQF_UMTXQ)) { 810 error = 0; 811 break; 812 } 813 if (timo != NULL) { 814 if (timo->is_abs_real) 815 curthread->td_rtcgen = 816 atomic_load_acq_int(&rtc_generation); 817 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 818 if (error != 0) 819 break; 820 } 821 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 822 sbt, 0, flags); 823 uc = umtxq_getchain(&uq->uq_key); 824 mtx_lock(&uc->uc_lock); 825 if (error == EINTR || error == ERESTART) 826 break; 827 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 828 error = ETIMEDOUT; 829 break; 830 } 831 } 832 833 curthread->td_rtcgen = 0; 834 return (error); 835 } 836 837 /* 838 * Convert userspace address into unique logical address. 839 */ 840 int 841 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 842 { 843 struct thread *td = curthread; 844 vm_map_t map; 845 vm_map_entry_t entry; 846 vm_pindex_t pindex; 847 vm_prot_t prot; 848 boolean_t wired; 849 850 key->type = type; 851 if (share == THREAD_SHARE) { 852 key->shared = 0; 853 key->info.private.vs = td->td_proc->p_vmspace; 854 key->info.private.addr = (uintptr_t)addr; 855 } else { 856 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 857 map = &td->td_proc->p_vmspace->vm_map; 858 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 859 &entry, &key->info.shared.object, &pindex, &prot, 860 &wired) != KERN_SUCCESS) { 861 return (EFAULT); 862 } 863 864 if ((share == PROCESS_SHARE) || 865 (share == AUTO_SHARE && 866 VM_INHERIT_SHARE == entry->inheritance)) { 867 key->shared = 1; 868 key->info.shared.offset = (vm_offset_t)addr - 869 entry->start + entry->offset; 870 vm_object_reference(key->info.shared.object); 871 } else { 872 key->shared = 0; 873 key->info.private.vs = td->td_proc->p_vmspace; 874 key->info.private.addr = (uintptr_t)addr; 875 } 876 vm_map_lookup_done(map, entry); 877 } 878 879 umtxq_hash(key); 880 return (0); 881 } 882 883 /* 884 * Release key. 885 */ 886 void 887 umtx_key_release(struct umtx_key *key) 888 { 889 if (key->shared) 890 vm_object_deallocate(key->info.shared.object); 891 } 892 893 #ifdef COMPAT_FREEBSD10 894 /* 895 * Lock a umtx object. 896 */ 897 static int 898 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 899 const struct timespec *timeout) 900 { 901 struct umtx_abs_timeout timo; 902 struct umtx_q *uq; 903 u_long owner; 904 u_long old; 905 int error = 0; 906 907 uq = td->td_umtxq; 908 if (timeout != NULL) 909 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 910 911 /* 912 * Care must be exercised when dealing with umtx structure. It 913 * can fault on any access. 914 */ 915 for (;;) { 916 /* 917 * Try the uncontested case. This should be done in userland. 918 */ 919 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 920 921 /* The acquire succeeded. */ 922 if (owner == UMTX_UNOWNED) 923 return (0); 924 925 /* The address was invalid. */ 926 if (owner == -1) 927 return (EFAULT); 928 929 /* If no one owns it but it is contested try to acquire it. */ 930 if (owner == UMTX_CONTESTED) { 931 owner = casuword(&umtx->u_owner, 932 UMTX_CONTESTED, id | UMTX_CONTESTED); 933 934 if (owner == UMTX_CONTESTED) 935 return (0); 936 937 /* The address was invalid. */ 938 if (owner == -1) 939 return (EFAULT); 940 941 error = thread_check_susp(td, false); 942 if (error != 0) 943 break; 944 945 /* If this failed the lock has changed, restart. */ 946 continue; 947 } 948 949 /* 950 * If we caught a signal, we have retried and now 951 * exit immediately. 952 */ 953 if (error != 0) 954 break; 955 956 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 957 AUTO_SHARE, &uq->uq_key)) != 0) 958 return (error); 959 960 umtxq_lock(&uq->uq_key); 961 umtxq_busy(&uq->uq_key); 962 umtxq_insert(uq); 963 umtxq_unbusy(&uq->uq_key); 964 umtxq_unlock(&uq->uq_key); 965 966 /* 967 * Set the contested bit so that a release in user space 968 * knows to use the system call for unlock. If this fails 969 * either some one else has acquired the lock or it has been 970 * released. 971 */ 972 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 973 974 /* The address was invalid. */ 975 if (old == -1) { 976 umtxq_lock(&uq->uq_key); 977 umtxq_remove(uq); 978 umtxq_unlock(&uq->uq_key); 979 umtx_key_release(&uq->uq_key); 980 return (EFAULT); 981 } 982 983 /* 984 * We set the contested bit, sleep. Otherwise the lock changed 985 * and we need to retry or we lost a race to the thread 986 * unlocking the umtx. 987 */ 988 umtxq_lock(&uq->uq_key); 989 if (old == owner) 990 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 991 &timo); 992 umtxq_remove(uq); 993 umtxq_unlock(&uq->uq_key); 994 umtx_key_release(&uq->uq_key); 995 996 if (error == 0) 997 error = thread_check_susp(td, false); 998 } 999 1000 if (timeout == NULL) { 1001 /* Mutex locking is restarted if it is interrupted. */ 1002 if (error == EINTR) 1003 error = ERESTART; 1004 } else { 1005 /* Timed-locking is not restarted. */ 1006 if (error == ERESTART) 1007 error = EINTR; 1008 } 1009 return (error); 1010 } 1011 1012 /* 1013 * Unlock a umtx object. 1014 */ 1015 static int 1016 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1017 { 1018 struct umtx_key key; 1019 u_long owner; 1020 u_long old; 1021 int error; 1022 int count; 1023 1024 /* 1025 * Make sure we own this mtx. 1026 */ 1027 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1028 if (owner == -1) 1029 return (EFAULT); 1030 1031 if ((owner & ~UMTX_CONTESTED) != id) 1032 return (EPERM); 1033 1034 /* This should be done in userland */ 1035 if ((owner & UMTX_CONTESTED) == 0) { 1036 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1037 if (old == -1) 1038 return (EFAULT); 1039 if (old == owner) 1040 return (0); 1041 owner = old; 1042 } 1043 1044 /* We should only ever be in here for contested locks */ 1045 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1046 &key)) != 0) 1047 return (error); 1048 1049 umtxq_lock(&key); 1050 umtxq_busy(&key); 1051 count = umtxq_count(&key); 1052 umtxq_unlock(&key); 1053 1054 /* 1055 * When unlocking the umtx, it must be marked as unowned if 1056 * there is zero or one thread only waiting for it. 1057 * Otherwise, it must be marked as contested. 1058 */ 1059 old = casuword(&umtx->u_owner, owner, 1060 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1061 umtxq_lock(&key); 1062 umtxq_signal(&key,1); 1063 umtxq_unbusy(&key); 1064 umtxq_unlock(&key); 1065 umtx_key_release(&key); 1066 if (old == -1) 1067 return (EFAULT); 1068 if (old != owner) 1069 return (EINVAL); 1070 return (0); 1071 } 1072 1073 #ifdef COMPAT_FREEBSD32 1074 1075 /* 1076 * Lock a umtx object. 1077 */ 1078 static int 1079 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1080 const struct timespec *timeout) 1081 { 1082 struct umtx_abs_timeout timo; 1083 struct umtx_q *uq; 1084 uint32_t owner; 1085 uint32_t old; 1086 int error = 0; 1087 1088 uq = td->td_umtxq; 1089 1090 if (timeout != NULL) 1091 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1092 1093 /* 1094 * Care must be exercised when dealing with umtx structure. It 1095 * can fault on any access. 1096 */ 1097 for (;;) { 1098 /* 1099 * Try the uncontested case. This should be done in userland. 1100 */ 1101 owner = casuword32(m, UMUTEX_UNOWNED, id); 1102 1103 /* The acquire succeeded. */ 1104 if (owner == UMUTEX_UNOWNED) 1105 return (0); 1106 1107 /* The address was invalid. */ 1108 if (owner == -1) 1109 return (EFAULT); 1110 1111 /* If no one owns it but it is contested try to acquire it. */ 1112 if (owner == UMUTEX_CONTESTED) { 1113 owner = casuword32(m, 1114 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1115 if (owner == UMUTEX_CONTESTED) 1116 return (0); 1117 1118 /* The address was invalid. */ 1119 if (owner == -1) 1120 return (EFAULT); 1121 1122 error = thread_check_susp(td, false); 1123 if (error != 0) 1124 break; 1125 1126 /* If this failed the lock has changed, restart. */ 1127 continue; 1128 } 1129 1130 /* 1131 * If we caught a signal, we have retried and now 1132 * exit immediately. 1133 */ 1134 if (error != 0) 1135 return (error); 1136 1137 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1138 AUTO_SHARE, &uq->uq_key)) != 0) 1139 return (error); 1140 1141 umtxq_lock(&uq->uq_key); 1142 umtxq_busy(&uq->uq_key); 1143 umtxq_insert(uq); 1144 umtxq_unbusy(&uq->uq_key); 1145 umtxq_unlock(&uq->uq_key); 1146 1147 /* 1148 * Set the contested bit so that a release in user space 1149 * knows to use the system call for unlock. If this fails 1150 * either some one else has acquired the lock or it has been 1151 * released. 1152 */ 1153 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1154 1155 /* The address was invalid. */ 1156 if (old == -1) { 1157 umtxq_lock(&uq->uq_key); 1158 umtxq_remove(uq); 1159 umtxq_unlock(&uq->uq_key); 1160 umtx_key_release(&uq->uq_key); 1161 return (EFAULT); 1162 } 1163 1164 /* 1165 * We set the contested bit, sleep. Otherwise the lock changed 1166 * and we need to retry or we lost a race to the thread 1167 * unlocking the umtx. 1168 */ 1169 umtxq_lock(&uq->uq_key); 1170 if (old == owner) 1171 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1172 NULL : &timo); 1173 umtxq_remove(uq); 1174 umtxq_unlock(&uq->uq_key); 1175 umtx_key_release(&uq->uq_key); 1176 1177 if (error == 0) 1178 error = thread_check_susp(td, false); 1179 } 1180 1181 if (timeout == NULL) { 1182 /* Mutex locking is restarted if it is interrupted. */ 1183 if (error == EINTR) 1184 error = ERESTART; 1185 } else { 1186 /* Timed-locking is not restarted. */ 1187 if (error == ERESTART) 1188 error = EINTR; 1189 } 1190 return (error); 1191 } 1192 1193 /* 1194 * Unlock a umtx object. 1195 */ 1196 static int 1197 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1198 { 1199 struct umtx_key key; 1200 uint32_t owner; 1201 uint32_t old; 1202 int error; 1203 int count; 1204 1205 /* 1206 * Make sure we own this mtx. 1207 */ 1208 owner = fuword32(m); 1209 if (owner == -1) 1210 return (EFAULT); 1211 1212 if ((owner & ~UMUTEX_CONTESTED) != id) 1213 return (EPERM); 1214 1215 /* This should be done in userland */ 1216 if ((owner & UMUTEX_CONTESTED) == 0) { 1217 old = casuword32(m, owner, UMUTEX_UNOWNED); 1218 if (old == -1) 1219 return (EFAULT); 1220 if (old == owner) 1221 return (0); 1222 owner = old; 1223 } 1224 1225 /* We should only ever be in here for contested locks */ 1226 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1227 &key)) != 0) 1228 return (error); 1229 1230 umtxq_lock(&key); 1231 umtxq_busy(&key); 1232 count = umtxq_count(&key); 1233 umtxq_unlock(&key); 1234 1235 /* 1236 * When unlocking the umtx, it must be marked as unowned if 1237 * there is zero or one thread only waiting for it. 1238 * Otherwise, it must be marked as contested. 1239 */ 1240 old = casuword32(m, owner, 1241 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1242 umtxq_lock(&key); 1243 umtxq_signal(&key,1); 1244 umtxq_unbusy(&key); 1245 umtxq_unlock(&key); 1246 umtx_key_release(&key); 1247 if (old == -1) 1248 return (EFAULT); 1249 if (old != owner) 1250 return (EINVAL); 1251 return (0); 1252 } 1253 #endif /* COMPAT_FREEBSD32 */ 1254 #endif /* COMPAT_FREEBSD10 */ 1255 1256 /* 1257 * Fetch and compare value, sleep on the address if value is not changed. 1258 */ 1259 static int 1260 do_wait(struct thread *td, void *addr, u_long id, 1261 struct _umtx_time *timeout, int compat32, int is_private) 1262 { 1263 struct umtx_abs_timeout timo; 1264 struct umtx_q *uq; 1265 u_long tmp; 1266 uint32_t tmp32; 1267 int error = 0; 1268 1269 uq = td->td_umtxq; 1270 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1271 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1272 return (error); 1273 1274 if (timeout != NULL) 1275 umtx_abs_timeout_init2(&timo, timeout); 1276 1277 umtxq_lock(&uq->uq_key); 1278 umtxq_insert(uq); 1279 umtxq_unlock(&uq->uq_key); 1280 if (compat32 == 0) { 1281 error = fueword(addr, &tmp); 1282 if (error != 0) 1283 error = EFAULT; 1284 } else { 1285 error = fueword32(addr, &tmp32); 1286 if (error == 0) 1287 tmp = tmp32; 1288 else 1289 error = EFAULT; 1290 } 1291 umtxq_lock(&uq->uq_key); 1292 if (error == 0) { 1293 if (tmp == id) 1294 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1295 NULL : &timo); 1296 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1297 error = 0; 1298 else 1299 umtxq_remove(uq); 1300 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1301 umtxq_remove(uq); 1302 } 1303 umtxq_unlock(&uq->uq_key); 1304 umtx_key_release(&uq->uq_key); 1305 if (error == ERESTART) 1306 error = EINTR; 1307 return (error); 1308 } 1309 1310 /* 1311 * Wake up threads sleeping on the specified address. 1312 */ 1313 int 1314 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1315 { 1316 struct umtx_key key; 1317 int ret; 1318 1319 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1320 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1321 return (ret); 1322 umtxq_lock(&key); 1323 umtxq_signal(&key, n_wake); 1324 umtxq_unlock(&key); 1325 umtx_key_release(&key); 1326 return (0); 1327 } 1328 1329 /* 1330 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1331 */ 1332 static int 1333 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1334 struct _umtx_time *timeout, int mode) 1335 { 1336 struct umtx_abs_timeout timo; 1337 struct umtx_q *uq; 1338 uint32_t owner, old, id; 1339 int error, rv; 1340 1341 id = td->td_tid; 1342 uq = td->td_umtxq; 1343 error = 0; 1344 if (timeout != NULL) 1345 umtx_abs_timeout_init2(&timo, timeout); 1346 1347 /* 1348 * Care must be exercised when dealing with umtx structure. It 1349 * can fault on any access. 1350 */ 1351 for (;;) { 1352 rv = fueword32(&m->m_owner, &owner); 1353 if (rv == -1) 1354 return (EFAULT); 1355 if (mode == _UMUTEX_WAIT) { 1356 if (owner == UMUTEX_UNOWNED || 1357 owner == UMUTEX_CONTESTED || 1358 owner == UMUTEX_RB_OWNERDEAD || 1359 owner == UMUTEX_RB_NOTRECOV) 1360 return (0); 1361 } else { 1362 /* 1363 * Robust mutex terminated. Kernel duty is to 1364 * return EOWNERDEAD to the userspace. The 1365 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1366 * by the common userspace code. 1367 */ 1368 if (owner == UMUTEX_RB_OWNERDEAD) { 1369 rv = casueword32(&m->m_owner, 1370 UMUTEX_RB_OWNERDEAD, &owner, 1371 id | UMUTEX_CONTESTED); 1372 if (rv == -1) 1373 return (EFAULT); 1374 if (rv == 0) { 1375 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1376 return (EOWNERDEAD); /* success */ 1377 } 1378 MPASS(rv == 1); 1379 rv = thread_check_susp(td, false); 1380 if (rv != 0) 1381 return (rv); 1382 continue; 1383 } 1384 if (owner == UMUTEX_RB_NOTRECOV) 1385 return (ENOTRECOVERABLE); 1386 1387 /* 1388 * Try the uncontested case. This should be 1389 * done in userland. 1390 */ 1391 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1392 &owner, id); 1393 /* The address was invalid. */ 1394 if (rv == -1) 1395 return (EFAULT); 1396 1397 /* The acquire succeeded. */ 1398 if (rv == 0) { 1399 MPASS(owner == UMUTEX_UNOWNED); 1400 return (0); 1401 } 1402 1403 /* 1404 * If no one owns it but it is contested try 1405 * to acquire it. 1406 */ 1407 MPASS(rv == 1); 1408 if (owner == UMUTEX_CONTESTED) { 1409 rv = casueword32(&m->m_owner, 1410 UMUTEX_CONTESTED, &owner, 1411 id | UMUTEX_CONTESTED); 1412 /* The address was invalid. */ 1413 if (rv == -1) 1414 return (EFAULT); 1415 if (rv == 0) { 1416 MPASS(owner == UMUTEX_CONTESTED); 1417 return (0); 1418 } 1419 if (rv == 1) { 1420 rv = thread_check_susp(td, false); 1421 if (rv != 0) 1422 return (rv); 1423 } 1424 1425 /* 1426 * If this failed the lock has 1427 * changed, restart. 1428 */ 1429 continue; 1430 } 1431 1432 /* rv == 1 but not contested, likely store failure */ 1433 rv = thread_check_susp(td, false); 1434 if (rv != 0) 1435 return (rv); 1436 } 1437 1438 if (mode == _UMUTEX_TRY) 1439 return (EBUSY); 1440 1441 /* 1442 * If we caught a signal, we have retried and now 1443 * exit immediately. 1444 */ 1445 if (error != 0) 1446 return (error); 1447 1448 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1449 GET_SHARE(flags), &uq->uq_key)) != 0) 1450 return (error); 1451 1452 umtxq_lock(&uq->uq_key); 1453 umtxq_busy(&uq->uq_key); 1454 umtxq_insert(uq); 1455 umtxq_unlock(&uq->uq_key); 1456 1457 /* 1458 * Set the contested bit so that a release in user space 1459 * knows to use the system call for unlock. If this fails 1460 * either some one else has acquired the lock or it has been 1461 * released. 1462 */ 1463 rv = casueword32(&m->m_owner, owner, &old, 1464 owner | UMUTEX_CONTESTED); 1465 1466 /* The address was invalid or casueword failed to store. */ 1467 if (rv == -1 || rv == 1) { 1468 umtxq_lock(&uq->uq_key); 1469 umtxq_remove(uq); 1470 umtxq_unbusy(&uq->uq_key); 1471 umtxq_unlock(&uq->uq_key); 1472 umtx_key_release(&uq->uq_key); 1473 if (rv == -1) 1474 return (EFAULT); 1475 if (rv == 1) { 1476 rv = thread_check_susp(td, false); 1477 if (rv != 0) 1478 return (rv); 1479 } 1480 continue; 1481 } 1482 1483 /* 1484 * We set the contested bit, sleep. Otherwise the lock changed 1485 * and we need to retry or we lost a race to the thread 1486 * unlocking the umtx. 1487 */ 1488 umtxq_lock(&uq->uq_key); 1489 umtxq_unbusy(&uq->uq_key); 1490 MPASS(old == owner); 1491 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1492 NULL : &timo); 1493 umtxq_remove(uq); 1494 umtxq_unlock(&uq->uq_key); 1495 umtx_key_release(&uq->uq_key); 1496 1497 if (error == 0) 1498 error = thread_check_susp(td, false); 1499 } 1500 1501 return (0); 1502 } 1503 1504 /* 1505 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1506 */ 1507 static int 1508 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1509 { 1510 struct umtx_key key; 1511 uint32_t owner, old, id, newlock; 1512 int error, count; 1513 1514 id = td->td_tid; 1515 1516 again: 1517 /* 1518 * Make sure we own this mtx. 1519 */ 1520 error = fueword32(&m->m_owner, &owner); 1521 if (error == -1) 1522 return (EFAULT); 1523 1524 if ((owner & ~UMUTEX_CONTESTED) != id) 1525 return (EPERM); 1526 1527 newlock = umtx_unlock_val(flags, rb); 1528 if ((owner & UMUTEX_CONTESTED) == 0) { 1529 error = casueword32(&m->m_owner, owner, &old, newlock); 1530 if (error == -1) 1531 return (EFAULT); 1532 if (error == 1) { 1533 error = thread_check_susp(td, false); 1534 if (error != 0) 1535 return (error); 1536 goto again; 1537 } 1538 MPASS(old == owner); 1539 return (0); 1540 } 1541 1542 /* We should only ever be in here for contested locks */ 1543 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1544 &key)) != 0) 1545 return (error); 1546 1547 umtxq_lock(&key); 1548 umtxq_busy(&key); 1549 count = umtxq_count(&key); 1550 umtxq_unlock(&key); 1551 1552 /* 1553 * When unlocking the umtx, it must be marked as unowned if 1554 * there is zero or one thread only waiting for it. 1555 * Otherwise, it must be marked as contested. 1556 */ 1557 if (count > 1) 1558 newlock |= UMUTEX_CONTESTED; 1559 error = casueword32(&m->m_owner, owner, &old, newlock); 1560 umtxq_lock(&key); 1561 umtxq_signal(&key, 1); 1562 umtxq_unbusy(&key); 1563 umtxq_unlock(&key); 1564 umtx_key_release(&key); 1565 if (error == -1) 1566 return (EFAULT); 1567 if (error == 1) { 1568 if (old != owner) 1569 return (EINVAL); 1570 error = thread_check_susp(td, false); 1571 if (error != 0) 1572 return (error); 1573 goto again; 1574 } 1575 return (0); 1576 } 1577 1578 /* 1579 * Check if the mutex is available and wake up a waiter, 1580 * only for simple mutex. 1581 */ 1582 static int 1583 do_wake_umutex(struct thread *td, struct umutex *m) 1584 { 1585 struct umtx_key key; 1586 uint32_t owner; 1587 uint32_t flags; 1588 int error; 1589 int count; 1590 1591 again: 1592 error = fueword32(&m->m_owner, &owner); 1593 if (error == -1) 1594 return (EFAULT); 1595 1596 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1597 owner != UMUTEX_RB_NOTRECOV) 1598 return (0); 1599 1600 error = fueword32(&m->m_flags, &flags); 1601 if (error == -1) 1602 return (EFAULT); 1603 1604 /* We should only ever be in here for contested locks */ 1605 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1606 &key)) != 0) 1607 return (error); 1608 1609 umtxq_lock(&key); 1610 umtxq_busy(&key); 1611 count = umtxq_count(&key); 1612 umtxq_unlock(&key); 1613 1614 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1615 owner != UMUTEX_RB_NOTRECOV) { 1616 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1617 UMUTEX_UNOWNED); 1618 if (error == -1) { 1619 error = EFAULT; 1620 } else if (error == 1) { 1621 umtxq_lock(&key); 1622 umtxq_unbusy(&key); 1623 umtxq_unlock(&key); 1624 umtx_key_release(&key); 1625 error = thread_check_susp(td, false); 1626 if (error != 0) 1627 return (error); 1628 goto again; 1629 } 1630 } 1631 1632 umtxq_lock(&key); 1633 if (error == 0 && count != 0) { 1634 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1635 owner == UMUTEX_RB_OWNERDEAD || 1636 owner == UMUTEX_RB_NOTRECOV); 1637 umtxq_signal(&key, 1); 1638 } 1639 umtxq_unbusy(&key); 1640 umtxq_unlock(&key); 1641 umtx_key_release(&key); 1642 return (error); 1643 } 1644 1645 /* 1646 * Check if the mutex has waiters and tries to fix contention bit. 1647 */ 1648 static int 1649 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1650 { 1651 struct umtx_key key; 1652 uint32_t owner, old; 1653 int type; 1654 int error; 1655 int count; 1656 1657 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1658 UMUTEX_ROBUST)) { 1659 case 0: 1660 case UMUTEX_ROBUST: 1661 type = TYPE_NORMAL_UMUTEX; 1662 break; 1663 case UMUTEX_PRIO_INHERIT: 1664 type = TYPE_PI_UMUTEX; 1665 break; 1666 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1667 type = TYPE_PI_ROBUST_UMUTEX; 1668 break; 1669 case UMUTEX_PRIO_PROTECT: 1670 type = TYPE_PP_UMUTEX; 1671 break; 1672 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1673 type = TYPE_PP_ROBUST_UMUTEX; 1674 break; 1675 default: 1676 return (EINVAL); 1677 } 1678 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1679 return (error); 1680 1681 owner = 0; 1682 umtxq_lock(&key); 1683 umtxq_busy(&key); 1684 count = umtxq_count(&key); 1685 umtxq_unlock(&key); 1686 1687 error = fueword32(&m->m_owner, &owner); 1688 if (error == -1) 1689 error = EFAULT; 1690 1691 /* 1692 * Only repair contention bit if there is a waiter, this means 1693 * the mutex is still being referenced by userland code, 1694 * otherwise don't update any memory. 1695 */ 1696 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1697 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1698 error = casueword32(&m->m_owner, owner, &old, 1699 owner | UMUTEX_CONTESTED); 1700 if (error == -1) { 1701 error = EFAULT; 1702 break; 1703 } 1704 if (error == 0) { 1705 MPASS(old == owner); 1706 break; 1707 } 1708 owner = old; 1709 error = thread_check_susp(td, false); 1710 } 1711 1712 umtxq_lock(&key); 1713 if (error == EFAULT) { 1714 umtxq_signal(&key, INT_MAX); 1715 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1716 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1717 umtxq_signal(&key, 1); 1718 umtxq_unbusy(&key); 1719 umtxq_unlock(&key); 1720 umtx_key_release(&key); 1721 return (error); 1722 } 1723 1724 struct umtx_pi * 1725 umtx_pi_alloc(int flags) 1726 { 1727 struct umtx_pi *pi; 1728 1729 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1730 TAILQ_INIT(&pi->pi_blocked); 1731 atomic_add_int(&umtx_pi_allocated, 1); 1732 return (pi); 1733 } 1734 1735 void 1736 umtx_pi_free(struct umtx_pi *pi) 1737 { 1738 uma_zfree(umtx_pi_zone, pi); 1739 atomic_add_int(&umtx_pi_allocated, -1); 1740 } 1741 1742 /* 1743 * Adjust the thread's position on a pi_state after its priority has been 1744 * changed. 1745 */ 1746 static int 1747 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1748 { 1749 struct umtx_q *uq, *uq1, *uq2; 1750 struct thread *td1; 1751 1752 mtx_assert(&umtx_lock, MA_OWNED); 1753 if (pi == NULL) 1754 return (0); 1755 1756 uq = td->td_umtxq; 1757 1758 /* 1759 * Check if the thread needs to be moved on the blocked chain. 1760 * It needs to be moved if either its priority is lower than 1761 * the previous thread or higher than the next thread. 1762 */ 1763 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1764 uq2 = TAILQ_NEXT(uq, uq_lockq); 1765 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1766 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1767 /* 1768 * Remove thread from blocked chain and determine where 1769 * it should be moved to. 1770 */ 1771 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1772 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1773 td1 = uq1->uq_thread; 1774 MPASS(td1->td_proc->p_magic == P_MAGIC); 1775 if (UPRI(td1) > UPRI(td)) 1776 break; 1777 } 1778 1779 if (uq1 == NULL) 1780 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1781 else 1782 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1783 } 1784 return (1); 1785 } 1786 1787 static struct umtx_pi * 1788 umtx_pi_next(struct umtx_pi *pi) 1789 { 1790 struct umtx_q *uq_owner; 1791 1792 if (pi->pi_owner == NULL) 1793 return (NULL); 1794 uq_owner = pi->pi_owner->td_umtxq; 1795 if (uq_owner == NULL) 1796 return (NULL); 1797 return (uq_owner->uq_pi_blocked); 1798 } 1799 1800 /* 1801 * Floyd's Cycle-Finding Algorithm. 1802 */ 1803 static bool 1804 umtx_pi_check_loop(struct umtx_pi *pi) 1805 { 1806 struct umtx_pi *pi1; /* fast iterator */ 1807 1808 mtx_assert(&umtx_lock, MA_OWNED); 1809 if (pi == NULL) 1810 return (false); 1811 pi1 = pi; 1812 for (;;) { 1813 pi = umtx_pi_next(pi); 1814 if (pi == NULL) 1815 break; 1816 pi1 = umtx_pi_next(pi1); 1817 if (pi1 == NULL) 1818 break; 1819 pi1 = umtx_pi_next(pi1); 1820 if (pi1 == NULL) 1821 break; 1822 if (pi == pi1) 1823 return (true); 1824 } 1825 return (false); 1826 } 1827 1828 /* 1829 * Propagate priority when a thread is blocked on POSIX 1830 * PI mutex. 1831 */ 1832 static void 1833 umtx_propagate_priority(struct thread *td) 1834 { 1835 struct umtx_q *uq; 1836 struct umtx_pi *pi; 1837 int pri; 1838 1839 mtx_assert(&umtx_lock, MA_OWNED); 1840 pri = UPRI(td); 1841 uq = td->td_umtxq; 1842 pi = uq->uq_pi_blocked; 1843 if (pi == NULL) 1844 return; 1845 if (umtx_pi_check_loop(pi)) 1846 return; 1847 1848 for (;;) { 1849 td = pi->pi_owner; 1850 if (td == NULL || td == curthread) 1851 return; 1852 1853 MPASS(td->td_proc != NULL); 1854 MPASS(td->td_proc->p_magic == P_MAGIC); 1855 1856 thread_lock(td); 1857 if (td->td_lend_user_pri > pri) 1858 sched_lend_user_prio(td, pri); 1859 else { 1860 thread_unlock(td); 1861 break; 1862 } 1863 thread_unlock(td); 1864 1865 /* 1866 * Pick up the lock that td is blocked on. 1867 */ 1868 uq = td->td_umtxq; 1869 pi = uq->uq_pi_blocked; 1870 if (pi == NULL) 1871 break; 1872 /* Resort td on the list if needed. */ 1873 umtx_pi_adjust_thread(pi, td); 1874 } 1875 } 1876 1877 /* 1878 * Unpropagate priority for a PI mutex when a thread blocked on 1879 * it is interrupted by signal or resumed by others. 1880 */ 1881 static void 1882 umtx_repropagate_priority(struct umtx_pi *pi) 1883 { 1884 struct umtx_q *uq, *uq_owner; 1885 struct umtx_pi *pi2; 1886 int pri; 1887 1888 mtx_assert(&umtx_lock, MA_OWNED); 1889 1890 if (umtx_pi_check_loop(pi)) 1891 return; 1892 while (pi != NULL && pi->pi_owner != NULL) { 1893 pri = PRI_MAX; 1894 uq_owner = pi->pi_owner->td_umtxq; 1895 1896 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1897 uq = TAILQ_FIRST(&pi2->pi_blocked); 1898 if (uq != NULL) { 1899 if (pri > UPRI(uq->uq_thread)) 1900 pri = UPRI(uq->uq_thread); 1901 } 1902 } 1903 1904 if (pri > uq_owner->uq_inherited_pri) 1905 pri = uq_owner->uq_inherited_pri; 1906 thread_lock(pi->pi_owner); 1907 sched_lend_user_prio(pi->pi_owner, pri); 1908 thread_unlock(pi->pi_owner); 1909 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1910 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1911 } 1912 } 1913 1914 /* 1915 * Insert a PI mutex into owned list. 1916 */ 1917 static void 1918 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1919 { 1920 struct umtx_q *uq_owner; 1921 1922 uq_owner = owner->td_umtxq; 1923 mtx_assert(&umtx_lock, MA_OWNED); 1924 MPASS(pi->pi_owner == NULL); 1925 pi->pi_owner = owner; 1926 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1927 } 1928 1929 /* 1930 * Disown a PI mutex, and remove it from the owned list. 1931 */ 1932 static void 1933 umtx_pi_disown(struct umtx_pi *pi) 1934 { 1935 1936 mtx_assert(&umtx_lock, MA_OWNED); 1937 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1938 pi->pi_owner = NULL; 1939 } 1940 1941 /* 1942 * Claim ownership of a PI mutex. 1943 */ 1944 int 1945 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1946 { 1947 struct umtx_q *uq; 1948 int pri; 1949 1950 mtx_lock(&umtx_lock); 1951 if (pi->pi_owner == owner) { 1952 mtx_unlock(&umtx_lock); 1953 return (0); 1954 } 1955 1956 if (pi->pi_owner != NULL) { 1957 /* 1958 * userland may have already messed the mutex, sigh. 1959 */ 1960 mtx_unlock(&umtx_lock); 1961 return (EPERM); 1962 } 1963 umtx_pi_setowner(pi, owner); 1964 uq = TAILQ_FIRST(&pi->pi_blocked); 1965 if (uq != NULL) { 1966 pri = UPRI(uq->uq_thread); 1967 thread_lock(owner); 1968 if (pri < UPRI(owner)) 1969 sched_lend_user_prio(owner, pri); 1970 thread_unlock(owner); 1971 } 1972 mtx_unlock(&umtx_lock); 1973 return (0); 1974 } 1975 1976 /* 1977 * Adjust a thread's order position in its blocked PI mutex, 1978 * this may result new priority propagating process. 1979 */ 1980 void 1981 umtx_pi_adjust(struct thread *td, u_char oldpri) 1982 { 1983 struct umtx_q *uq; 1984 struct umtx_pi *pi; 1985 1986 uq = td->td_umtxq; 1987 mtx_lock(&umtx_lock); 1988 /* 1989 * Pick up the lock that td is blocked on. 1990 */ 1991 pi = uq->uq_pi_blocked; 1992 if (pi != NULL) { 1993 umtx_pi_adjust_thread(pi, td); 1994 umtx_repropagate_priority(pi); 1995 } 1996 mtx_unlock(&umtx_lock); 1997 } 1998 1999 /* 2000 * Sleep on a PI mutex. 2001 */ 2002 int 2003 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2004 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2005 { 2006 struct thread *td, *td1; 2007 struct umtx_q *uq1; 2008 int error, pri; 2009 #ifdef INVARIANTS 2010 struct umtxq_chain *uc; 2011 2012 uc = umtxq_getchain(&pi->pi_key); 2013 #endif 2014 error = 0; 2015 td = uq->uq_thread; 2016 KASSERT(td == curthread, ("inconsistent uq_thread")); 2017 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2018 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2019 umtxq_insert(uq); 2020 mtx_lock(&umtx_lock); 2021 if (pi->pi_owner == NULL) { 2022 mtx_unlock(&umtx_lock); 2023 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2024 mtx_lock(&umtx_lock); 2025 if (td1 != NULL) { 2026 if (pi->pi_owner == NULL) 2027 umtx_pi_setowner(pi, td1); 2028 PROC_UNLOCK(td1->td_proc); 2029 } 2030 } 2031 2032 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2033 pri = UPRI(uq1->uq_thread); 2034 if (pri > UPRI(td)) 2035 break; 2036 } 2037 2038 if (uq1 != NULL) 2039 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2040 else 2041 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2042 2043 uq->uq_pi_blocked = pi; 2044 thread_lock(td); 2045 td->td_flags |= TDF_UPIBLOCKED; 2046 thread_unlock(td); 2047 umtx_propagate_priority(td); 2048 mtx_unlock(&umtx_lock); 2049 umtxq_unbusy(&uq->uq_key); 2050 2051 error = umtxq_sleep(uq, wmesg, timo); 2052 umtxq_remove(uq); 2053 2054 mtx_lock(&umtx_lock); 2055 uq->uq_pi_blocked = NULL; 2056 thread_lock(td); 2057 td->td_flags &= ~TDF_UPIBLOCKED; 2058 thread_unlock(td); 2059 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2060 umtx_repropagate_priority(pi); 2061 mtx_unlock(&umtx_lock); 2062 umtxq_unlock(&uq->uq_key); 2063 2064 return (error); 2065 } 2066 2067 /* 2068 * Add reference count for a PI mutex. 2069 */ 2070 void 2071 umtx_pi_ref(struct umtx_pi *pi) 2072 { 2073 2074 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2075 pi->pi_refcount++; 2076 } 2077 2078 /* 2079 * Decrease reference count for a PI mutex, if the counter 2080 * is decreased to zero, its memory space is freed. 2081 */ 2082 void 2083 umtx_pi_unref(struct umtx_pi *pi) 2084 { 2085 struct umtxq_chain *uc; 2086 2087 uc = umtxq_getchain(&pi->pi_key); 2088 UMTXQ_LOCKED_ASSERT(uc); 2089 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2090 if (--pi->pi_refcount == 0) { 2091 mtx_lock(&umtx_lock); 2092 if (pi->pi_owner != NULL) 2093 umtx_pi_disown(pi); 2094 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2095 ("blocked queue not empty")); 2096 mtx_unlock(&umtx_lock); 2097 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2098 umtx_pi_free(pi); 2099 } 2100 } 2101 2102 /* 2103 * Find a PI mutex in hash table. 2104 */ 2105 struct umtx_pi * 2106 umtx_pi_lookup(struct umtx_key *key) 2107 { 2108 struct umtxq_chain *uc; 2109 struct umtx_pi *pi; 2110 2111 uc = umtxq_getchain(key); 2112 UMTXQ_LOCKED_ASSERT(uc); 2113 2114 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2115 if (umtx_key_match(&pi->pi_key, key)) { 2116 return (pi); 2117 } 2118 } 2119 return (NULL); 2120 } 2121 2122 /* 2123 * Insert a PI mutex into hash table. 2124 */ 2125 void 2126 umtx_pi_insert(struct umtx_pi *pi) 2127 { 2128 struct umtxq_chain *uc; 2129 2130 uc = umtxq_getchain(&pi->pi_key); 2131 UMTXQ_LOCKED_ASSERT(uc); 2132 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2133 } 2134 2135 /* 2136 * Drop a PI mutex and wakeup a top waiter. 2137 */ 2138 int 2139 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2140 { 2141 struct umtx_q *uq_first, *uq_first2, *uq_me; 2142 struct umtx_pi *pi, *pi2; 2143 int pri; 2144 2145 UMTXQ_ASSERT_LOCKED_BUSY(key); 2146 *count = umtxq_count_pi(key, &uq_first); 2147 if (uq_first != NULL) { 2148 mtx_lock(&umtx_lock); 2149 pi = uq_first->uq_pi_blocked; 2150 KASSERT(pi != NULL, ("pi == NULL?")); 2151 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2152 mtx_unlock(&umtx_lock); 2153 /* userland messed the mutex */ 2154 return (EPERM); 2155 } 2156 uq_me = td->td_umtxq; 2157 if (pi->pi_owner == td) 2158 umtx_pi_disown(pi); 2159 /* get highest priority thread which is still sleeping. */ 2160 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2161 while (uq_first != NULL && 2162 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2163 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2164 } 2165 pri = PRI_MAX; 2166 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2167 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2168 if (uq_first2 != NULL) { 2169 if (pri > UPRI(uq_first2->uq_thread)) 2170 pri = UPRI(uq_first2->uq_thread); 2171 } 2172 } 2173 thread_lock(td); 2174 sched_lend_user_prio(td, pri); 2175 thread_unlock(td); 2176 mtx_unlock(&umtx_lock); 2177 if (uq_first) 2178 umtxq_signal_thread(uq_first); 2179 } else { 2180 pi = umtx_pi_lookup(key); 2181 /* 2182 * A umtx_pi can exist if a signal or timeout removed the 2183 * last waiter from the umtxq, but there is still 2184 * a thread in do_lock_pi() holding the umtx_pi. 2185 */ 2186 if (pi != NULL) { 2187 /* 2188 * The umtx_pi can be unowned, such as when a thread 2189 * has just entered do_lock_pi(), allocated the 2190 * umtx_pi, and unlocked the umtxq. 2191 * If the current thread owns it, it must disown it. 2192 */ 2193 mtx_lock(&umtx_lock); 2194 if (pi->pi_owner == td) 2195 umtx_pi_disown(pi); 2196 mtx_unlock(&umtx_lock); 2197 } 2198 } 2199 return (0); 2200 } 2201 2202 /* 2203 * Lock a PI mutex. 2204 */ 2205 static int 2206 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2207 struct _umtx_time *timeout, int try) 2208 { 2209 struct umtx_abs_timeout timo; 2210 struct umtx_q *uq; 2211 struct umtx_pi *pi, *new_pi; 2212 uint32_t id, old_owner, owner, old; 2213 int error, rv; 2214 2215 id = td->td_tid; 2216 uq = td->td_umtxq; 2217 2218 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2219 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2220 &uq->uq_key)) != 0) 2221 return (error); 2222 2223 if (timeout != NULL) 2224 umtx_abs_timeout_init2(&timo, timeout); 2225 2226 umtxq_lock(&uq->uq_key); 2227 pi = umtx_pi_lookup(&uq->uq_key); 2228 if (pi == NULL) { 2229 new_pi = umtx_pi_alloc(M_NOWAIT); 2230 if (new_pi == NULL) { 2231 umtxq_unlock(&uq->uq_key); 2232 new_pi = umtx_pi_alloc(M_WAITOK); 2233 umtxq_lock(&uq->uq_key); 2234 pi = umtx_pi_lookup(&uq->uq_key); 2235 if (pi != NULL) { 2236 umtx_pi_free(new_pi); 2237 new_pi = NULL; 2238 } 2239 } 2240 if (new_pi != NULL) { 2241 new_pi->pi_key = uq->uq_key; 2242 umtx_pi_insert(new_pi); 2243 pi = new_pi; 2244 } 2245 } 2246 umtx_pi_ref(pi); 2247 umtxq_unlock(&uq->uq_key); 2248 2249 /* 2250 * Care must be exercised when dealing with umtx structure. It 2251 * can fault on any access. 2252 */ 2253 for (;;) { 2254 /* 2255 * Try the uncontested case. This should be done in userland. 2256 */ 2257 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2258 /* The address was invalid. */ 2259 if (rv == -1) { 2260 error = EFAULT; 2261 break; 2262 } 2263 /* The acquire succeeded. */ 2264 if (rv == 0) { 2265 MPASS(owner == UMUTEX_UNOWNED); 2266 error = 0; 2267 break; 2268 } 2269 2270 if (owner == UMUTEX_RB_NOTRECOV) { 2271 error = ENOTRECOVERABLE; 2272 break; 2273 } 2274 2275 /* 2276 * Nobody owns it, but the acquire failed. This can happen 2277 * with ll/sc atomics. 2278 */ 2279 if (owner == UMUTEX_UNOWNED) { 2280 error = thread_check_susp(td, true); 2281 if (error != 0) 2282 break; 2283 continue; 2284 } 2285 2286 /* 2287 * Avoid overwriting a possible error from sleep due 2288 * to the pending signal with suspension check result. 2289 */ 2290 if (error == 0) { 2291 error = thread_check_susp(td, true); 2292 if (error != 0) 2293 break; 2294 } 2295 2296 /* If no one owns it but it is contested try to acquire it. */ 2297 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2298 old_owner = owner; 2299 rv = casueword32(&m->m_owner, owner, &owner, 2300 id | UMUTEX_CONTESTED); 2301 /* The address was invalid. */ 2302 if (rv == -1) { 2303 error = EFAULT; 2304 break; 2305 } 2306 if (rv == 1) { 2307 if (error == 0) { 2308 error = thread_check_susp(td, true); 2309 if (error != 0) 2310 break; 2311 } 2312 2313 /* 2314 * If this failed the lock could 2315 * changed, restart. 2316 */ 2317 continue; 2318 } 2319 2320 MPASS(rv == 0); 2321 MPASS(owner == old_owner); 2322 umtxq_lock(&uq->uq_key); 2323 umtxq_busy(&uq->uq_key); 2324 error = umtx_pi_claim(pi, td); 2325 umtxq_unbusy(&uq->uq_key); 2326 umtxq_unlock(&uq->uq_key); 2327 if (error != 0) { 2328 /* 2329 * Since we're going to return an 2330 * error, restore the m_owner to its 2331 * previous, unowned state to avoid 2332 * compounding the problem. 2333 */ 2334 (void)casuword32(&m->m_owner, 2335 id | UMUTEX_CONTESTED, old_owner); 2336 } 2337 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2338 error = EOWNERDEAD; 2339 break; 2340 } 2341 2342 if ((owner & ~UMUTEX_CONTESTED) == id) { 2343 error = EDEADLK; 2344 break; 2345 } 2346 2347 if (try != 0) { 2348 error = EBUSY; 2349 break; 2350 } 2351 2352 /* 2353 * If we caught a signal, we have retried and now 2354 * exit immediately. 2355 */ 2356 if (error != 0) 2357 break; 2358 2359 umtxq_lock(&uq->uq_key); 2360 umtxq_busy(&uq->uq_key); 2361 umtxq_unlock(&uq->uq_key); 2362 2363 /* 2364 * Set the contested bit so that a release in user space 2365 * knows to use the system call for unlock. If this fails 2366 * either some one else has acquired the lock or it has been 2367 * released. 2368 */ 2369 rv = casueword32(&m->m_owner, owner, &old, owner | 2370 UMUTEX_CONTESTED); 2371 2372 /* The address was invalid. */ 2373 if (rv == -1) { 2374 umtxq_unbusy_unlocked(&uq->uq_key); 2375 error = EFAULT; 2376 break; 2377 } 2378 if (rv == 1) { 2379 umtxq_unbusy_unlocked(&uq->uq_key); 2380 error = thread_check_susp(td, true); 2381 if (error != 0) 2382 break; 2383 2384 /* 2385 * The lock changed and we need to retry or we 2386 * lost a race to the thread unlocking the 2387 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2388 * value for owner is impossible there. 2389 */ 2390 continue; 2391 } 2392 2393 umtxq_lock(&uq->uq_key); 2394 2395 /* We set the contested bit, sleep. */ 2396 MPASS(old == owner); 2397 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2398 "umtxpi", timeout == NULL ? NULL : &timo, 2399 (flags & USYNC_PROCESS_SHARED) != 0); 2400 if (error != 0) 2401 continue; 2402 2403 error = thread_check_susp(td, false); 2404 if (error != 0) 2405 break; 2406 } 2407 2408 umtxq_lock(&uq->uq_key); 2409 umtx_pi_unref(pi); 2410 umtxq_unlock(&uq->uq_key); 2411 2412 umtx_key_release(&uq->uq_key); 2413 return (error); 2414 } 2415 2416 /* 2417 * Unlock a PI mutex. 2418 */ 2419 static int 2420 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2421 { 2422 struct umtx_key key; 2423 uint32_t id, new_owner, old, owner; 2424 int count, error; 2425 2426 id = td->td_tid; 2427 2428 usrloop: 2429 /* 2430 * Make sure we own this mtx. 2431 */ 2432 error = fueword32(&m->m_owner, &owner); 2433 if (error == -1) 2434 return (EFAULT); 2435 2436 if ((owner & ~UMUTEX_CONTESTED) != id) 2437 return (EPERM); 2438 2439 new_owner = umtx_unlock_val(flags, rb); 2440 2441 /* This should be done in userland */ 2442 if ((owner & UMUTEX_CONTESTED) == 0) { 2443 error = casueword32(&m->m_owner, owner, &old, new_owner); 2444 if (error == -1) 2445 return (EFAULT); 2446 if (error == 1) { 2447 error = thread_check_susp(td, true); 2448 if (error != 0) 2449 return (error); 2450 goto usrloop; 2451 } 2452 if (old == owner) 2453 return (0); 2454 owner = old; 2455 } 2456 2457 /* We should only ever be in here for contested locks */ 2458 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2459 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2460 &key)) != 0) 2461 return (error); 2462 2463 umtxq_lock(&key); 2464 umtxq_busy(&key); 2465 error = umtx_pi_drop(td, &key, rb, &count); 2466 if (error != 0) { 2467 umtxq_unbusy(&key); 2468 umtxq_unlock(&key); 2469 umtx_key_release(&key); 2470 /* userland messed the mutex */ 2471 return (error); 2472 } 2473 umtxq_unlock(&key); 2474 2475 /* 2476 * When unlocking the umtx, it must be marked as unowned if 2477 * there is zero or one thread only waiting for it. 2478 * Otherwise, it must be marked as contested. 2479 */ 2480 2481 if (count > 1) 2482 new_owner |= UMUTEX_CONTESTED; 2483 again: 2484 error = casueword32(&m->m_owner, owner, &old, new_owner); 2485 if (error == 1) { 2486 error = thread_check_susp(td, false); 2487 if (error == 0) 2488 goto again; 2489 } 2490 umtxq_unbusy_unlocked(&key); 2491 umtx_key_release(&key); 2492 if (error == -1) 2493 return (EFAULT); 2494 if (error == 0 && old != owner) 2495 return (EINVAL); 2496 return (error); 2497 } 2498 2499 /* 2500 * Lock a PP mutex. 2501 */ 2502 static int 2503 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2504 struct _umtx_time *timeout, int try) 2505 { 2506 struct umtx_abs_timeout timo; 2507 struct umtx_q *uq, *uq2; 2508 struct umtx_pi *pi; 2509 uint32_t ceiling; 2510 uint32_t owner, id; 2511 int error, pri, old_inherited_pri, su, rv; 2512 2513 id = td->td_tid; 2514 uq = td->td_umtxq; 2515 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2516 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2517 &uq->uq_key)) != 0) 2518 return (error); 2519 2520 if (timeout != NULL) 2521 umtx_abs_timeout_init2(&timo, timeout); 2522 2523 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2524 for (;;) { 2525 old_inherited_pri = uq->uq_inherited_pri; 2526 umtxq_lock(&uq->uq_key); 2527 umtxq_busy(&uq->uq_key); 2528 umtxq_unlock(&uq->uq_key); 2529 2530 rv = fueword32(&m->m_ceilings[0], &ceiling); 2531 if (rv == -1) { 2532 error = EFAULT; 2533 goto out; 2534 } 2535 ceiling = RTP_PRIO_MAX - ceiling; 2536 if (ceiling > RTP_PRIO_MAX) { 2537 error = EINVAL; 2538 goto out; 2539 } 2540 2541 mtx_lock(&umtx_lock); 2542 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2543 mtx_unlock(&umtx_lock); 2544 error = EINVAL; 2545 goto out; 2546 } 2547 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2548 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2549 thread_lock(td); 2550 if (uq->uq_inherited_pri < UPRI(td)) 2551 sched_lend_user_prio(td, uq->uq_inherited_pri); 2552 thread_unlock(td); 2553 } 2554 mtx_unlock(&umtx_lock); 2555 2556 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2557 id | UMUTEX_CONTESTED); 2558 /* The address was invalid. */ 2559 if (rv == -1) { 2560 error = EFAULT; 2561 break; 2562 } 2563 if (rv == 0) { 2564 MPASS(owner == UMUTEX_CONTESTED); 2565 error = 0; 2566 break; 2567 } 2568 /* rv == 1 */ 2569 if (owner == UMUTEX_RB_OWNERDEAD) { 2570 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2571 &owner, id | UMUTEX_CONTESTED); 2572 if (rv == -1) { 2573 error = EFAULT; 2574 break; 2575 } 2576 if (rv == 0) { 2577 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2578 error = EOWNERDEAD; /* success */ 2579 break; 2580 } 2581 2582 /* 2583 * rv == 1, only check for suspension if we 2584 * did not already catched a signal. If we 2585 * get an error from the check, the same 2586 * condition is checked by the umtxq_sleep() 2587 * call below, so we should obliterate the 2588 * error to not skip the last loop iteration. 2589 */ 2590 if (error == 0) { 2591 error = thread_check_susp(td, false); 2592 if (error == 0) { 2593 if (try != 0) 2594 error = EBUSY; 2595 else 2596 continue; 2597 } 2598 error = 0; 2599 } 2600 } else if (owner == UMUTEX_RB_NOTRECOV) { 2601 error = ENOTRECOVERABLE; 2602 } 2603 2604 if (try != 0) 2605 error = EBUSY; 2606 2607 /* 2608 * If we caught a signal, we have retried and now 2609 * exit immediately. 2610 */ 2611 if (error != 0) 2612 break; 2613 2614 umtxq_lock(&uq->uq_key); 2615 umtxq_insert(uq); 2616 umtxq_unbusy(&uq->uq_key); 2617 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2618 NULL : &timo); 2619 umtxq_remove(uq); 2620 umtxq_unlock(&uq->uq_key); 2621 2622 mtx_lock(&umtx_lock); 2623 uq->uq_inherited_pri = old_inherited_pri; 2624 pri = PRI_MAX; 2625 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2626 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2627 if (uq2 != NULL) { 2628 if (pri > UPRI(uq2->uq_thread)) 2629 pri = UPRI(uq2->uq_thread); 2630 } 2631 } 2632 if (pri > uq->uq_inherited_pri) 2633 pri = uq->uq_inherited_pri; 2634 thread_lock(td); 2635 sched_lend_user_prio(td, pri); 2636 thread_unlock(td); 2637 mtx_unlock(&umtx_lock); 2638 } 2639 2640 if (error != 0 && error != EOWNERDEAD) { 2641 mtx_lock(&umtx_lock); 2642 uq->uq_inherited_pri = old_inherited_pri; 2643 pri = PRI_MAX; 2644 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2645 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2646 if (uq2 != NULL) { 2647 if (pri > UPRI(uq2->uq_thread)) 2648 pri = UPRI(uq2->uq_thread); 2649 } 2650 } 2651 if (pri > uq->uq_inherited_pri) 2652 pri = uq->uq_inherited_pri; 2653 thread_lock(td); 2654 sched_lend_user_prio(td, pri); 2655 thread_unlock(td); 2656 mtx_unlock(&umtx_lock); 2657 } 2658 2659 out: 2660 umtxq_unbusy_unlocked(&uq->uq_key); 2661 umtx_key_release(&uq->uq_key); 2662 return (error); 2663 } 2664 2665 /* 2666 * Unlock a PP mutex. 2667 */ 2668 static int 2669 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2670 { 2671 struct umtx_key key; 2672 struct umtx_q *uq, *uq2; 2673 struct umtx_pi *pi; 2674 uint32_t id, owner, rceiling; 2675 int error, pri, new_inherited_pri, su; 2676 2677 id = td->td_tid; 2678 uq = td->td_umtxq; 2679 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2680 2681 /* 2682 * Make sure we own this mtx. 2683 */ 2684 error = fueword32(&m->m_owner, &owner); 2685 if (error == -1) 2686 return (EFAULT); 2687 2688 if ((owner & ~UMUTEX_CONTESTED) != id) 2689 return (EPERM); 2690 2691 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2692 if (error != 0) 2693 return (error); 2694 2695 if (rceiling == -1) 2696 new_inherited_pri = PRI_MAX; 2697 else { 2698 rceiling = RTP_PRIO_MAX - rceiling; 2699 if (rceiling > RTP_PRIO_MAX) 2700 return (EINVAL); 2701 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2702 } 2703 2704 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2705 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2706 &key)) != 0) 2707 return (error); 2708 umtxq_lock(&key); 2709 umtxq_busy(&key); 2710 umtxq_unlock(&key); 2711 /* 2712 * For priority protected mutex, always set unlocked state 2713 * to UMUTEX_CONTESTED, so that userland always enters kernel 2714 * to lock the mutex, it is necessary because thread priority 2715 * has to be adjusted for such mutex. 2716 */ 2717 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2718 UMUTEX_CONTESTED); 2719 2720 umtxq_lock(&key); 2721 if (error == 0) 2722 umtxq_signal(&key, 1); 2723 umtxq_unbusy(&key); 2724 umtxq_unlock(&key); 2725 2726 if (error == -1) 2727 error = EFAULT; 2728 else { 2729 mtx_lock(&umtx_lock); 2730 if (su != 0) 2731 uq->uq_inherited_pri = new_inherited_pri; 2732 pri = PRI_MAX; 2733 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2734 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2735 if (uq2 != NULL) { 2736 if (pri > UPRI(uq2->uq_thread)) 2737 pri = UPRI(uq2->uq_thread); 2738 } 2739 } 2740 if (pri > uq->uq_inherited_pri) 2741 pri = uq->uq_inherited_pri; 2742 thread_lock(td); 2743 sched_lend_user_prio(td, pri); 2744 thread_unlock(td); 2745 mtx_unlock(&umtx_lock); 2746 } 2747 umtx_key_release(&key); 2748 return (error); 2749 } 2750 2751 static int 2752 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2753 uint32_t *old_ceiling) 2754 { 2755 struct umtx_q *uq; 2756 uint32_t flags, id, owner, save_ceiling; 2757 int error, rv, rv1; 2758 2759 error = fueword32(&m->m_flags, &flags); 2760 if (error == -1) 2761 return (EFAULT); 2762 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2763 return (EINVAL); 2764 if (ceiling > RTP_PRIO_MAX) 2765 return (EINVAL); 2766 id = td->td_tid; 2767 uq = td->td_umtxq; 2768 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2769 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2770 &uq->uq_key)) != 0) 2771 return (error); 2772 for (;;) { 2773 umtxq_lock(&uq->uq_key); 2774 umtxq_busy(&uq->uq_key); 2775 umtxq_unlock(&uq->uq_key); 2776 2777 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2778 if (rv == -1) { 2779 error = EFAULT; 2780 break; 2781 } 2782 2783 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2784 id | UMUTEX_CONTESTED); 2785 if (rv == -1) { 2786 error = EFAULT; 2787 break; 2788 } 2789 2790 if (rv == 0) { 2791 MPASS(owner == UMUTEX_CONTESTED); 2792 rv = suword32(&m->m_ceilings[0], ceiling); 2793 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2794 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2795 break; 2796 } 2797 2798 if ((owner & ~UMUTEX_CONTESTED) == id) { 2799 rv = suword32(&m->m_ceilings[0], ceiling); 2800 error = rv == 0 ? 0 : EFAULT; 2801 break; 2802 } 2803 2804 if (owner == UMUTEX_RB_OWNERDEAD) { 2805 error = EOWNERDEAD; 2806 break; 2807 } else if (owner == UMUTEX_RB_NOTRECOV) { 2808 error = ENOTRECOVERABLE; 2809 break; 2810 } 2811 2812 /* 2813 * If we caught a signal, we have retried and now 2814 * exit immediately. 2815 */ 2816 if (error != 0) 2817 break; 2818 2819 /* 2820 * We set the contested bit, sleep. Otherwise the lock changed 2821 * and we need to retry or we lost a race to the thread 2822 * unlocking the umtx. 2823 */ 2824 umtxq_lock(&uq->uq_key); 2825 umtxq_insert(uq); 2826 umtxq_unbusy(&uq->uq_key); 2827 error = umtxq_sleep(uq, "umtxpp", NULL); 2828 umtxq_remove(uq); 2829 umtxq_unlock(&uq->uq_key); 2830 } 2831 umtxq_lock(&uq->uq_key); 2832 if (error == 0) 2833 umtxq_signal(&uq->uq_key, INT_MAX); 2834 umtxq_unbusy(&uq->uq_key); 2835 umtxq_unlock(&uq->uq_key); 2836 umtx_key_release(&uq->uq_key); 2837 if (error == 0 && old_ceiling != NULL) { 2838 rv = suword32(old_ceiling, save_ceiling); 2839 error = rv == 0 ? 0 : EFAULT; 2840 } 2841 return (error); 2842 } 2843 2844 /* 2845 * Lock a userland POSIX mutex. 2846 */ 2847 static int 2848 do_lock_umutex(struct thread *td, struct umutex *m, 2849 struct _umtx_time *timeout, int mode) 2850 { 2851 uint32_t flags; 2852 int error; 2853 2854 error = fueword32(&m->m_flags, &flags); 2855 if (error == -1) 2856 return (EFAULT); 2857 2858 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2859 case 0: 2860 error = do_lock_normal(td, m, flags, timeout, mode); 2861 break; 2862 case UMUTEX_PRIO_INHERIT: 2863 error = do_lock_pi(td, m, flags, timeout, mode); 2864 break; 2865 case UMUTEX_PRIO_PROTECT: 2866 error = do_lock_pp(td, m, flags, timeout, mode); 2867 break; 2868 default: 2869 return (EINVAL); 2870 } 2871 if (timeout == NULL) { 2872 if (error == EINTR && mode != _UMUTEX_WAIT) 2873 error = ERESTART; 2874 } else { 2875 /* Timed-locking is not restarted. */ 2876 if (error == ERESTART) 2877 error = EINTR; 2878 } 2879 return (error); 2880 } 2881 2882 /* 2883 * Unlock a userland POSIX mutex. 2884 */ 2885 static int 2886 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2887 { 2888 uint32_t flags; 2889 int error; 2890 2891 error = fueword32(&m->m_flags, &flags); 2892 if (error == -1) 2893 return (EFAULT); 2894 2895 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2896 case 0: 2897 return (do_unlock_normal(td, m, flags, rb)); 2898 case UMUTEX_PRIO_INHERIT: 2899 return (do_unlock_pi(td, m, flags, rb)); 2900 case UMUTEX_PRIO_PROTECT: 2901 return (do_unlock_pp(td, m, flags, rb)); 2902 } 2903 2904 return (EINVAL); 2905 } 2906 2907 static int 2908 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2909 struct timespec *timeout, u_long wflags) 2910 { 2911 struct umtx_abs_timeout timo; 2912 struct umtx_q *uq; 2913 uint32_t flags, clockid, hasw; 2914 int error; 2915 2916 uq = td->td_umtxq; 2917 error = fueword32(&cv->c_flags, &flags); 2918 if (error == -1) 2919 return (EFAULT); 2920 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2921 if (error != 0) 2922 return (error); 2923 2924 if ((wflags & CVWAIT_CLOCKID) != 0) { 2925 error = fueword32(&cv->c_clockid, &clockid); 2926 if (error == -1) { 2927 umtx_key_release(&uq->uq_key); 2928 return (EFAULT); 2929 } 2930 if (clockid < CLOCK_REALTIME || 2931 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2932 /* hmm, only HW clock id will work. */ 2933 umtx_key_release(&uq->uq_key); 2934 return (EINVAL); 2935 } 2936 } else { 2937 clockid = CLOCK_REALTIME; 2938 } 2939 2940 umtxq_lock(&uq->uq_key); 2941 umtxq_busy(&uq->uq_key); 2942 umtxq_insert(uq); 2943 umtxq_unlock(&uq->uq_key); 2944 2945 /* 2946 * Set c_has_waiters to 1 before releasing user mutex, also 2947 * don't modify cache line when unnecessary. 2948 */ 2949 error = fueword32(&cv->c_has_waiters, &hasw); 2950 if (error == 0 && hasw == 0) 2951 suword32(&cv->c_has_waiters, 1); 2952 2953 umtxq_unbusy_unlocked(&uq->uq_key); 2954 2955 error = do_unlock_umutex(td, m, false); 2956 2957 if (timeout != NULL) 2958 umtx_abs_timeout_init(&timo, clockid, 2959 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2960 2961 umtxq_lock(&uq->uq_key); 2962 if (error == 0) { 2963 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2964 NULL : &timo); 2965 } 2966 2967 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2968 error = 0; 2969 else { 2970 /* 2971 * This must be timeout,interrupted by signal or 2972 * surprious wakeup, clear c_has_waiter flag when 2973 * necessary. 2974 */ 2975 umtxq_busy(&uq->uq_key); 2976 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2977 int oldlen = uq->uq_cur_queue->length; 2978 umtxq_remove(uq); 2979 if (oldlen == 1) { 2980 umtxq_unlock(&uq->uq_key); 2981 suword32(&cv->c_has_waiters, 0); 2982 umtxq_lock(&uq->uq_key); 2983 } 2984 } 2985 umtxq_unbusy(&uq->uq_key); 2986 if (error == ERESTART) 2987 error = EINTR; 2988 } 2989 2990 umtxq_unlock(&uq->uq_key); 2991 umtx_key_release(&uq->uq_key); 2992 return (error); 2993 } 2994 2995 /* 2996 * Signal a userland condition variable. 2997 */ 2998 static int 2999 do_cv_signal(struct thread *td, struct ucond *cv) 3000 { 3001 struct umtx_key key; 3002 int error, cnt, nwake; 3003 uint32_t flags; 3004 3005 error = fueword32(&cv->c_flags, &flags); 3006 if (error == -1) 3007 return (EFAULT); 3008 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3009 return (error); 3010 umtxq_lock(&key); 3011 umtxq_busy(&key); 3012 cnt = umtxq_count(&key); 3013 nwake = umtxq_signal(&key, 1); 3014 if (cnt <= nwake) { 3015 umtxq_unlock(&key); 3016 error = suword32(&cv->c_has_waiters, 0); 3017 if (error == -1) 3018 error = EFAULT; 3019 umtxq_lock(&key); 3020 } 3021 umtxq_unbusy(&key); 3022 umtxq_unlock(&key); 3023 umtx_key_release(&key); 3024 return (error); 3025 } 3026 3027 static int 3028 do_cv_broadcast(struct thread *td, struct ucond *cv) 3029 { 3030 struct umtx_key key; 3031 int error; 3032 uint32_t flags; 3033 3034 error = fueword32(&cv->c_flags, &flags); 3035 if (error == -1) 3036 return (EFAULT); 3037 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3038 return (error); 3039 3040 umtxq_lock(&key); 3041 umtxq_busy(&key); 3042 umtxq_signal(&key, INT_MAX); 3043 umtxq_unlock(&key); 3044 3045 error = suword32(&cv->c_has_waiters, 0); 3046 if (error == -1) 3047 error = EFAULT; 3048 3049 umtxq_unbusy_unlocked(&key); 3050 3051 umtx_key_release(&key); 3052 return (error); 3053 } 3054 3055 static int 3056 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3057 struct _umtx_time *timeout) 3058 { 3059 struct umtx_abs_timeout timo; 3060 struct umtx_q *uq; 3061 uint32_t flags, wrflags; 3062 int32_t state, oldstate; 3063 int32_t blocked_readers; 3064 int error, error1, rv; 3065 3066 uq = td->td_umtxq; 3067 error = fueword32(&rwlock->rw_flags, &flags); 3068 if (error == -1) 3069 return (EFAULT); 3070 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3071 if (error != 0) 3072 return (error); 3073 3074 if (timeout != NULL) 3075 umtx_abs_timeout_init2(&timo, timeout); 3076 3077 wrflags = URWLOCK_WRITE_OWNER; 3078 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3079 wrflags |= URWLOCK_WRITE_WAITERS; 3080 3081 for (;;) { 3082 rv = fueword32(&rwlock->rw_state, &state); 3083 if (rv == -1) { 3084 umtx_key_release(&uq->uq_key); 3085 return (EFAULT); 3086 } 3087 3088 /* try to lock it */ 3089 while (!(state & wrflags)) { 3090 if (__predict_false(URWLOCK_READER_COUNT(state) == 3091 URWLOCK_MAX_READERS)) { 3092 umtx_key_release(&uq->uq_key); 3093 return (EAGAIN); 3094 } 3095 rv = casueword32(&rwlock->rw_state, state, 3096 &oldstate, state + 1); 3097 if (rv == -1) { 3098 umtx_key_release(&uq->uq_key); 3099 return (EFAULT); 3100 } 3101 if (rv == 0) { 3102 MPASS(oldstate == state); 3103 umtx_key_release(&uq->uq_key); 3104 return (0); 3105 } 3106 error = thread_check_susp(td, true); 3107 if (error != 0) 3108 break; 3109 state = oldstate; 3110 } 3111 3112 if (error) 3113 break; 3114 3115 /* grab monitor lock */ 3116 umtxq_lock(&uq->uq_key); 3117 umtxq_busy(&uq->uq_key); 3118 umtxq_unlock(&uq->uq_key); 3119 3120 /* 3121 * re-read the state, in case it changed between the try-lock above 3122 * and the check below 3123 */ 3124 rv = fueword32(&rwlock->rw_state, &state); 3125 if (rv == -1) 3126 error = EFAULT; 3127 3128 /* set read contention bit */ 3129 while (error == 0 && (state & wrflags) && 3130 !(state & URWLOCK_READ_WAITERS)) { 3131 rv = casueword32(&rwlock->rw_state, state, 3132 &oldstate, state | URWLOCK_READ_WAITERS); 3133 if (rv == -1) { 3134 error = EFAULT; 3135 break; 3136 } 3137 if (rv == 0) { 3138 MPASS(oldstate == state); 3139 goto sleep; 3140 } 3141 state = oldstate; 3142 error = thread_check_susp(td, false); 3143 if (error != 0) 3144 break; 3145 } 3146 if (error != 0) { 3147 umtxq_unbusy_unlocked(&uq->uq_key); 3148 break; 3149 } 3150 3151 /* state is changed while setting flags, restart */ 3152 if (!(state & wrflags)) { 3153 umtxq_unbusy_unlocked(&uq->uq_key); 3154 error = thread_check_susp(td, true); 3155 if (error != 0) 3156 break; 3157 continue; 3158 } 3159 3160 sleep: 3161 /* 3162 * Contention bit is set, before sleeping, increase 3163 * read waiter count. 3164 */ 3165 rv = fueword32(&rwlock->rw_blocked_readers, 3166 &blocked_readers); 3167 if (rv == -1) { 3168 umtxq_unbusy_unlocked(&uq->uq_key); 3169 error = EFAULT; 3170 break; 3171 } 3172 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3173 3174 while (state & wrflags) { 3175 umtxq_lock(&uq->uq_key); 3176 umtxq_insert(uq); 3177 umtxq_unbusy(&uq->uq_key); 3178 3179 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3180 NULL : &timo); 3181 3182 umtxq_busy(&uq->uq_key); 3183 umtxq_remove(uq); 3184 umtxq_unlock(&uq->uq_key); 3185 if (error) 3186 break; 3187 rv = fueword32(&rwlock->rw_state, &state); 3188 if (rv == -1) { 3189 error = EFAULT; 3190 break; 3191 } 3192 } 3193 3194 /* decrease read waiter count, and may clear read contention bit */ 3195 rv = fueword32(&rwlock->rw_blocked_readers, 3196 &blocked_readers); 3197 if (rv == -1) { 3198 umtxq_unbusy_unlocked(&uq->uq_key); 3199 error = EFAULT; 3200 break; 3201 } 3202 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3203 if (blocked_readers == 1) { 3204 rv = fueword32(&rwlock->rw_state, &state); 3205 if (rv == -1) { 3206 umtxq_unbusy_unlocked(&uq->uq_key); 3207 error = EFAULT; 3208 break; 3209 } 3210 for (;;) { 3211 rv = casueword32(&rwlock->rw_state, state, 3212 &oldstate, state & ~URWLOCK_READ_WAITERS); 3213 if (rv == -1) { 3214 error = EFAULT; 3215 break; 3216 } 3217 if (rv == 0) { 3218 MPASS(oldstate == state); 3219 break; 3220 } 3221 state = oldstate; 3222 error1 = thread_check_susp(td, false); 3223 if (error1 != 0) { 3224 if (error == 0) 3225 error = error1; 3226 break; 3227 } 3228 } 3229 } 3230 3231 umtxq_unbusy_unlocked(&uq->uq_key); 3232 if (error != 0) 3233 break; 3234 } 3235 umtx_key_release(&uq->uq_key); 3236 if (error == ERESTART) 3237 error = EINTR; 3238 return (error); 3239 } 3240 3241 static int 3242 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3243 { 3244 struct umtx_abs_timeout timo; 3245 struct umtx_q *uq; 3246 uint32_t flags; 3247 int32_t state, oldstate; 3248 int32_t blocked_writers; 3249 int32_t blocked_readers; 3250 int error, error1, rv; 3251 3252 uq = td->td_umtxq; 3253 error = fueword32(&rwlock->rw_flags, &flags); 3254 if (error == -1) 3255 return (EFAULT); 3256 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3257 if (error != 0) 3258 return (error); 3259 3260 if (timeout != NULL) 3261 umtx_abs_timeout_init2(&timo, timeout); 3262 3263 blocked_readers = 0; 3264 for (;;) { 3265 rv = fueword32(&rwlock->rw_state, &state); 3266 if (rv == -1) { 3267 umtx_key_release(&uq->uq_key); 3268 return (EFAULT); 3269 } 3270 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3271 URWLOCK_READER_COUNT(state) == 0) { 3272 rv = casueword32(&rwlock->rw_state, state, 3273 &oldstate, state | URWLOCK_WRITE_OWNER); 3274 if (rv == -1) { 3275 umtx_key_release(&uq->uq_key); 3276 return (EFAULT); 3277 } 3278 if (rv == 0) { 3279 MPASS(oldstate == state); 3280 umtx_key_release(&uq->uq_key); 3281 return (0); 3282 } 3283 state = oldstate; 3284 error = thread_check_susp(td, true); 3285 if (error != 0) 3286 break; 3287 } 3288 3289 if (error) { 3290 if ((state & (URWLOCK_WRITE_OWNER | 3291 URWLOCK_WRITE_WAITERS)) == 0 && 3292 blocked_readers != 0) { 3293 umtxq_lock(&uq->uq_key); 3294 umtxq_busy(&uq->uq_key); 3295 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3296 UMTX_SHARED_QUEUE); 3297 umtxq_unbusy(&uq->uq_key); 3298 umtxq_unlock(&uq->uq_key); 3299 } 3300 3301 break; 3302 } 3303 3304 /* grab monitor lock */ 3305 umtxq_lock(&uq->uq_key); 3306 umtxq_busy(&uq->uq_key); 3307 umtxq_unlock(&uq->uq_key); 3308 3309 /* 3310 * Re-read the state, in case it changed between the 3311 * try-lock above and the check below. 3312 */ 3313 rv = fueword32(&rwlock->rw_state, &state); 3314 if (rv == -1) 3315 error = EFAULT; 3316 3317 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3318 URWLOCK_READER_COUNT(state) != 0) && 3319 (state & URWLOCK_WRITE_WAITERS) == 0) { 3320 rv = casueword32(&rwlock->rw_state, state, 3321 &oldstate, state | URWLOCK_WRITE_WAITERS); 3322 if (rv == -1) { 3323 error = EFAULT; 3324 break; 3325 } 3326 if (rv == 0) { 3327 MPASS(oldstate == state); 3328 goto sleep; 3329 } 3330 state = oldstate; 3331 error = thread_check_susp(td, false); 3332 if (error != 0) 3333 break; 3334 } 3335 if (error != 0) { 3336 umtxq_unbusy_unlocked(&uq->uq_key); 3337 break; 3338 } 3339 3340 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3341 URWLOCK_READER_COUNT(state) == 0) { 3342 umtxq_unbusy_unlocked(&uq->uq_key); 3343 error = thread_check_susp(td, false); 3344 if (error != 0) 3345 break; 3346 continue; 3347 } 3348 sleep: 3349 rv = fueword32(&rwlock->rw_blocked_writers, 3350 &blocked_writers); 3351 if (rv == -1) { 3352 umtxq_unbusy_unlocked(&uq->uq_key); 3353 error = EFAULT; 3354 break; 3355 } 3356 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3357 3358 while ((state & URWLOCK_WRITE_OWNER) || 3359 URWLOCK_READER_COUNT(state) != 0) { 3360 umtxq_lock(&uq->uq_key); 3361 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3362 umtxq_unbusy(&uq->uq_key); 3363 3364 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3365 NULL : &timo); 3366 3367 umtxq_busy(&uq->uq_key); 3368 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3369 umtxq_unlock(&uq->uq_key); 3370 if (error) 3371 break; 3372 rv = fueword32(&rwlock->rw_state, &state); 3373 if (rv == -1) { 3374 error = EFAULT; 3375 break; 3376 } 3377 } 3378 3379 rv = fueword32(&rwlock->rw_blocked_writers, 3380 &blocked_writers); 3381 if (rv == -1) { 3382 umtxq_unbusy_unlocked(&uq->uq_key); 3383 error = EFAULT; 3384 break; 3385 } 3386 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3387 if (blocked_writers == 1) { 3388 rv = fueword32(&rwlock->rw_state, &state); 3389 if (rv == -1) { 3390 umtxq_unbusy_unlocked(&uq->uq_key); 3391 error = EFAULT; 3392 break; 3393 } 3394 for (;;) { 3395 rv = casueword32(&rwlock->rw_state, state, 3396 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3397 if (rv == -1) { 3398 error = EFAULT; 3399 break; 3400 } 3401 if (rv == 0) { 3402 MPASS(oldstate == state); 3403 break; 3404 } 3405 state = oldstate; 3406 error1 = thread_check_susp(td, false); 3407 /* 3408 * We are leaving the URWLOCK_WRITE_WAITERS 3409 * behind, but this should not harm the 3410 * correctness. 3411 */ 3412 if (error1 != 0) { 3413 if (error == 0) 3414 error = error1; 3415 break; 3416 } 3417 } 3418 rv = fueword32(&rwlock->rw_blocked_readers, 3419 &blocked_readers); 3420 if (rv == -1) { 3421 umtxq_unbusy_unlocked(&uq->uq_key); 3422 error = EFAULT; 3423 break; 3424 } 3425 } else 3426 blocked_readers = 0; 3427 3428 umtxq_unbusy_unlocked(&uq->uq_key); 3429 } 3430 3431 umtx_key_release(&uq->uq_key); 3432 if (error == ERESTART) 3433 error = EINTR; 3434 return (error); 3435 } 3436 3437 static int 3438 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3439 { 3440 struct umtx_q *uq; 3441 uint32_t flags; 3442 int32_t state, oldstate; 3443 int error, rv, q, count; 3444 3445 uq = td->td_umtxq; 3446 error = fueword32(&rwlock->rw_flags, &flags); 3447 if (error == -1) 3448 return (EFAULT); 3449 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3450 if (error != 0) 3451 return (error); 3452 3453 error = fueword32(&rwlock->rw_state, &state); 3454 if (error == -1) { 3455 error = EFAULT; 3456 goto out; 3457 } 3458 if (state & URWLOCK_WRITE_OWNER) { 3459 for (;;) { 3460 rv = casueword32(&rwlock->rw_state, state, 3461 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3462 if (rv == -1) { 3463 error = EFAULT; 3464 goto out; 3465 } 3466 if (rv == 1) { 3467 state = oldstate; 3468 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3469 error = EPERM; 3470 goto out; 3471 } 3472 error = thread_check_susp(td, true); 3473 if (error != 0) 3474 goto out; 3475 } else 3476 break; 3477 } 3478 } else if (URWLOCK_READER_COUNT(state) != 0) { 3479 for (;;) { 3480 rv = casueword32(&rwlock->rw_state, state, 3481 &oldstate, state - 1); 3482 if (rv == -1) { 3483 error = EFAULT; 3484 goto out; 3485 } 3486 if (rv == 1) { 3487 state = oldstate; 3488 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3489 error = EPERM; 3490 goto out; 3491 } 3492 error = thread_check_susp(td, true); 3493 if (error != 0) 3494 goto out; 3495 } else 3496 break; 3497 } 3498 } else { 3499 error = EPERM; 3500 goto out; 3501 } 3502 3503 count = 0; 3504 3505 if (!(flags & URWLOCK_PREFER_READER)) { 3506 if (state & URWLOCK_WRITE_WAITERS) { 3507 count = 1; 3508 q = UMTX_EXCLUSIVE_QUEUE; 3509 } else if (state & URWLOCK_READ_WAITERS) { 3510 count = INT_MAX; 3511 q = UMTX_SHARED_QUEUE; 3512 } 3513 } else { 3514 if (state & URWLOCK_READ_WAITERS) { 3515 count = INT_MAX; 3516 q = UMTX_SHARED_QUEUE; 3517 } else if (state & URWLOCK_WRITE_WAITERS) { 3518 count = 1; 3519 q = UMTX_EXCLUSIVE_QUEUE; 3520 } 3521 } 3522 3523 if (count) { 3524 umtxq_lock(&uq->uq_key); 3525 umtxq_busy(&uq->uq_key); 3526 umtxq_signal_queue(&uq->uq_key, count, q); 3527 umtxq_unbusy(&uq->uq_key); 3528 umtxq_unlock(&uq->uq_key); 3529 } 3530 out: 3531 umtx_key_release(&uq->uq_key); 3532 return (error); 3533 } 3534 3535 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3536 static int 3537 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3538 { 3539 struct umtx_abs_timeout timo; 3540 struct umtx_q *uq; 3541 uint32_t flags, count, count1; 3542 int error, rv, rv1; 3543 3544 uq = td->td_umtxq; 3545 error = fueword32(&sem->_flags, &flags); 3546 if (error == -1) 3547 return (EFAULT); 3548 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3549 if (error != 0) 3550 return (error); 3551 3552 if (timeout != NULL) 3553 umtx_abs_timeout_init2(&timo, timeout); 3554 3555 again: 3556 umtxq_lock(&uq->uq_key); 3557 umtxq_busy(&uq->uq_key); 3558 umtxq_insert(uq); 3559 umtxq_unlock(&uq->uq_key); 3560 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3561 if (rv != -1) 3562 rv1 = fueword32(&sem->_count, &count); 3563 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3564 if (rv == 0) 3565 suword32(&sem->_has_waiters, 0); 3566 umtxq_lock(&uq->uq_key); 3567 umtxq_unbusy(&uq->uq_key); 3568 umtxq_remove(uq); 3569 umtxq_unlock(&uq->uq_key); 3570 if (rv == -1 || rv1 == -1) { 3571 error = EFAULT; 3572 goto out; 3573 } 3574 if (count != 0) { 3575 error = 0; 3576 goto out; 3577 } 3578 MPASS(rv == 1 && count1 == 0); 3579 rv = thread_check_susp(td, true); 3580 if (rv == 0) 3581 goto again; 3582 error = rv; 3583 goto out; 3584 } 3585 umtxq_lock(&uq->uq_key); 3586 umtxq_unbusy(&uq->uq_key); 3587 3588 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3589 3590 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3591 error = 0; 3592 else { 3593 umtxq_remove(uq); 3594 /* A relative timeout cannot be restarted. */ 3595 if (error == ERESTART && timeout != NULL && 3596 (timeout->_flags & UMTX_ABSTIME) == 0) 3597 error = EINTR; 3598 } 3599 umtxq_unlock(&uq->uq_key); 3600 out: 3601 umtx_key_release(&uq->uq_key); 3602 return (error); 3603 } 3604 3605 /* 3606 * Signal a userland semaphore. 3607 */ 3608 static int 3609 do_sem_wake(struct thread *td, struct _usem *sem) 3610 { 3611 struct umtx_key key; 3612 int error, cnt; 3613 uint32_t flags; 3614 3615 error = fueword32(&sem->_flags, &flags); 3616 if (error == -1) 3617 return (EFAULT); 3618 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3619 return (error); 3620 umtxq_lock(&key); 3621 umtxq_busy(&key); 3622 cnt = umtxq_count(&key); 3623 if (cnt > 0) { 3624 /* 3625 * Check if count is greater than 0, this means the memory is 3626 * still being referenced by user code, so we can safely 3627 * update _has_waiters flag. 3628 */ 3629 if (cnt == 1) { 3630 umtxq_unlock(&key); 3631 error = suword32(&sem->_has_waiters, 0); 3632 umtxq_lock(&key); 3633 if (error == -1) 3634 error = EFAULT; 3635 } 3636 umtxq_signal(&key, 1); 3637 } 3638 umtxq_unbusy(&key); 3639 umtxq_unlock(&key); 3640 umtx_key_release(&key); 3641 return (error); 3642 } 3643 #endif 3644 3645 static int 3646 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3647 { 3648 struct umtx_abs_timeout timo; 3649 struct umtx_q *uq; 3650 uint32_t count, flags; 3651 int error, rv; 3652 3653 uq = td->td_umtxq; 3654 flags = fuword32(&sem->_flags); 3655 if (timeout != NULL) 3656 umtx_abs_timeout_init2(&timo, timeout); 3657 3658 again: 3659 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3660 if (error != 0) 3661 return (error); 3662 umtxq_lock(&uq->uq_key); 3663 umtxq_busy(&uq->uq_key); 3664 umtxq_insert(uq); 3665 umtxq_unlock(&uq->uq_key); 3666 rv = fueword32(&sem->_count, &count); 3667 if (rv == -1) { 3668 umtxq_lock(&uq->uq_key); 3669 umtxq_unbusy(&uq->uq_key); 3670 umtxq_remove(uq); 3671 umtxq_unlock(&uq->uq_key); 3672 umtx_key_release(&uq->uq_key); 3673 return (EFAULT); 3674 } 3675 for (;;) { 3676 if (USEM_COUNT(count) != 0) { 3677 umtxq_lock(&uq->uq_key); 3678 umtxq_unbusy(&uq->uq_key); 3679 umtxq_remove(uq); 3680 umtxq_unlock(&uq->uq_key); 3681 umtx_key_release(&uq->uq_key); 3682 return (0); 3683 } 3684 if (count == USEM_HAS_WAITERS) 3685 break; 3686 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3687 if (rv == 0) 3688 break; 3689 umtxq_lock(&uq->uq_key); 3690 umtxq_unbusy(&uq->uq_key); 3691 umtxq_remove(uq); 3692 umtxq_unlock(&uq->uq_key); 3693 umtx_key_release(&uq->uq_key); 3694 if (rv == -1) 3695 return (EFAULT); 3696 rv = thread_check_susp(td, true); 3697 if (rv != 0) 3698 return (rv); 3699 goto again; 3700 } 3701 umtxq_lock(&uq->uq_key); 3702 umtxq_unbusy(&uq->uq_key); 3703 3704 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3705 3706 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3707 error = 0; 3708 else { 3709 umtxq_remove(uq); 3710 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3711 /* A relative timeout cannot be restarted. */ 3712 if (error == ERESTART) 3713 error = EINTR; 3714 if (error == EINTR) { 3715 kern_clock_gettime(curthread, timo.clockid, 3716 &timo.cur); 3717 timespecsub(&timo.end, &timo.cur, 3718 &timeout->_timeout); 3719 } 3720 } 3721 } 3722 umtxq_unlock(&uq->uq_key); 3723 umtx_key_release(&uq->uq_key); 3724 return (error); 3725 } 3726 3727 /* 3728 * Signal a userland semaphore. 3729 */ 3730 static int 3731 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3732 { 3733 struct umtx_key key; 3734 int error, cnt, rv; 3735 uint32_t count, flags; 3736 3737 rv = fueword32(&sem->_flags, &flags); 3738 if (rv == -1) 3739 return (EFAULT); 3740 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3741 return (error); 3742 umtxq_lock(&key); 3743 umtxq_busy(&key); 3744 cnt = umtxq_count(&key); 3745 if (cnt > 0) { 3746 /* 3747 * If this was the last sleeping thread, clear the waiters 3748 * flag in _count. 3749 */ 3750 if (cnt == 1) { 3751 umtxq_unlock(&key); 3752 rv = fueword32(&sem->_count, &count); 3753 while (rv != -1 && count & USEM_HAS_WAITERS) { 3754 rv = casueword32(&sem->_count, count, &count, 3755 count & ~USEM_HAS_WAITERS); 3756 if (rv == 1) { 3757 rv = thread_check_susp(td, true); 3758 if (rv != 0) 3759 break; 3760 } 3761 } 3762 if (rv == -1) 3763 error = EFAULT; 3764 else if (rv > 0) { 3765 error = rv; 3766 } 3767 umtxq_lock(&key); 3768 } 3769 3770 umtxq_signal(&key, 1); 3771 } 3772 umtxq_unbusy(&key); 3773 umtxq_unlock(&key); 3774 umtx_key_release(&key); 3775 return (error); 3776 } 3777 3778 #ifdef COMPAT_FREEBSD10 3779 int 3780 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3781 { 3782 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3783 } 3784 3785 int 3786 freebsd10__umtx_unlock(struct thread *td, 3787 struct freebsd10__umtx_unlock_args *uap) 3788 { 3789 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3790 } 3791 #endif 3792 3793 inline int 3794 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3795 { 3796 int error; 3797 3798 error = copyin(uaddr, tsp, sizeof(*tsp)); 3799 if (error == 0) { 3800 if (!timespecvalid_interval(tsp)) 3801 error = EINVAL; 3802 } 3803 return (error); 3804 } 3805 3806 static inline int 3807 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3808 { 3809 int error; 3810 3811 if (size <= sizeof(tp->_timeout)) { 3812 tp->_clockid = CLOCK_REALTIME; 3813 tp->_flags = 0; 3814 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3815 } else 3816 error = copyin(uaddr, tp, sizeof(*tp)); 3817 if (error != 0) 3818 return (error); 3819 if (!timespecvalid_interval(&tp->_timeout)) 3820 return (EINVAL); 3821 return (0); 3822 } 3823 3824 static int 3825 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3826 struct umtx_robust_lists_params *rb) 3827 { 3828 3829 if (size > sizeof(*rb)) 3830 return (EINVAL); 3831 return (copyin(uaddr, rb, size)); 3832 } 3833 3834 static int 3835 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3836 { 3837 3838 /* 3839 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3840 * and we're only called if sz >= sizeof(timespec) as supplied in the 3841 * copyops. 3842 */ 3843 KASSERT(sz >= sizeof(*tsp), 3844 ("umtx_copyops specifies incorrect sizes")); 3845 3846 return (copyout(tsp, uaddr, sizeof(*tsp))); 3847 } 3848 3849 #ifdef COMPAT_FREEBSD10 3850 static int 3851 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3852 const struct umtx_copyops *ops) 3853 { 3854 struct timespec *ts, timeout; 3855 int error; 3856 3857 /* Allow a null timespec (wait forever). */ 3858 if (uap->uaddr2 == NULL) 3859 ts = NULL; 3860 else { 3861 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3862 if (error != 0) 3863 return (error); 3864 ts = &timeout; 3865 } 3866 #ifdef COMPAT_FREEBSD32 3867 if (ops->compat32) 3868 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3869 #endif 3870 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3871 } 3872 3873 static int 3874 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3875 const struct umtx_copyops *ops) 3876 { 3877 #ifdef COMPAT_FREEBSD32 3878 if (ops->compat32) 3879 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3880 #endif 3881 return (do_unlock_umtx(td, uap->obj, uap->val)); 3882 } 3883 #endif /* COMPAT_FREEBSD10 */ 3884 3885 #if !defined(COMPAT_FREEBSD10) 3886 static int 3887 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3888 const struct umtx_copyops *ops __unused) 3889 { 3890 return (EOPNOTSUPP); 3891 } 3892 #endif /* COMPAT_FREEBSD10 */ 3893 3894 static int 3895 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3896 const struct umtx_copyops *ops) 3897 { 3898 struct _umtx_time timeout, *tm_p; 3899 int error; 3900 3901 if (uap->uaddr2 == NULL) 3902 tm_p = NULL; 3903 else { 3904 error = ops->copyin_umtx_time( 3905 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3906 if (error != 0) 3907 return (error); 3908 tm_p = &timeout; 3909 } 3910 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3911 } 3912 3913 static int 3914 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3915 const struct umtx_copyops *ops) 3916 { 3917 struct _umtx_time timeout, *tm_p; 3918 int error; 3919 3920 if (uap->uaddr2 == NULL) 3921 tm_p = NULL; 3922 else { 3923 error = ops->copyin_umtx_time( 3924 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3925 if (error != 0) 3926 return (error); 3927 tm_p = &timeout; 3928 } 3929 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3930 } 3931 3932 static int 3933 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3934 const struct umtx_copyops *ops) 3935 { 3936 struct _umtx_time *tm_p, timeout; 3937 int error; 3938 3939 if (uap->uaddr2 == NULL) 3940 tm_p = NULL; 3941 else { 3942 error = ops->copyin_umtx_time( 3943 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3944 if (error != 0) 3945 return (error); 3946 tm_p = &timeout; 3947 } 3948 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3949 } 3950 3951 static int 3952 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3953 const struct umtx_copyops *ops __unused) 3954 { 3955 3956 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3957 } 3958 3959 #define BATCH_SIZE 128 3960 static int 3961 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3962 { 3963 char *uaddrs[BATCH_SIZE], **upp; 3964 int count, error, i, pos, tocopy; 3965 3966 upp = (char **)uap->obj; 3967 error = 0; 3968 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3969 pos += tocopy) { 3970 tocopy = MIN(count, BATCH_SIZE); 3971 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3972 if (error != 0) 3973 break; 3974 for (i = 0; i < tocopy; ++i) { 3975 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3976 } 3977 maybe_yield(); 3978 } 3979 return (error); 3980 } 3981 3982 static int 3983 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3984 { 3985 uint32_t uaddrs[BATCH_SIZE], *upp; 3986 int count, error, i, pos, tocopy; 3987 3988 upp = (uint32_t *)uap->obj; 3989 error = 0; 3990 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3991 pos += tocopy) { 3992 tocopy = MIN(count, BATCH_SIZE); 3993 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3994 if (error != 0) 3995 break; 3996 for (i = 0; i < tocopy; ++i) { 3997 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3998 INT_MAX, 1); 3999 } 4000 maybe_yield(); 4001 } 4002 return (error); 4003 } 4004 4005 static int 4006 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4007 const struct umtx_copyops *ops) 4008 { 4009 4010 if (ops->compat32) 4011 return (__umtx_op_nwake_private_compat32(td, uap)); 4012 return (__umtx_op_nwake_private_native(td, uap)); 4013 } 4014 4015 static int 4016 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4017 const struct umtx_copyops *ops __unused) 4018 { 4019 4020 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4021 } 4022 4023 static int 4024 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4025 const struct umtx_copyops *ops) 4026 { 4027 struct _umtx_time *tm_p, timeout; 4028 int error; 4029 4030 /* Allow a null timespec (wait forever). */ 4031 if (uap->uaddr2 == NULL) 4032 tm_p = NULL; 4033 else { 4034 error = ops->copyin_umtx_time( 4035 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4036 if (error != 0) 4037 return (error); 4038 tm_p = &timeout; 4039 } 4040 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4041 } 4042 4043 static int 4044 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4045 const struct umtx_copyops *ops __unused) 4046 { 4047 4048 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4049 } 4050 4051 static int 4052 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4053 const struct umtx_copyops *ops) 4054 { 4055 struct _umtx_time *tm_p, timeout; 4056 int error; 4057 4058 /* Allow a null timespec (wait forever). */ 4059 if (uap->uaddr2 == NULL) 4060 tm_p = NULL; 4061 else { 4062 error = ops->copyin_umtx_time( 4063 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4064 if (error != 0) 4065 return (error); 4066 tm_p = &timeout; 4067 } 4068 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4069 } 4070 4071 static int 4072 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4073 const struct umtx_copyops *ops __unused) 4074 { 4075 4076 return (do_wake_umutex(td, uap->obj)); 4077 } 4078 4079 static int 4080 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4081 const struct umtx_copyops *ops __unused) 4082 { 4083 4084 return (do_unlock_umutex(td, uap->obj, false)); 4085 } 4086 4087 static int 4088 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4089 const struct umtx_copyops *ops __unused) 4090 { 4091 4092 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4093 } 4094 4095 static int 4096 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4097 const struct umtx_copyops *ops) 4098 { 4099 struct timespec *ts, timeout; 4100 int error; 4101 4102 /* Allow a null timespec (wait forever). */ 4103 if (uap->uaddr2 == NULL) 4104 ts = NULL; 4105 else { 4106 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4107 if (error != 0) 4108 return (error); 4109 ts = &timeout; 4110 } 4111 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4112 } 4113 4114 static int 4115 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4116 const struct umtx_copyops *ops __unused) 4117 { 4118 4119 return (do_cv_signal(td, uap->obj)); 4120 } 4121 4122 static int 4123 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4124 const struct umtx_copyops *ops __unused) 4125 { 4126 4127 return (do_cv_broadcast(td, uap->obj)); 4128 } 4129 4130 static int 4131 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4132 const struct umtx_copyops *ops) 4133 { 4134 struct _umtx_time timeout; 4135 int error; 4136 4137 /* Allow a null timespec (wait forever). */ 4138 if (uap->uaddr2 == NULL) { 4139 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4140 } else { 4141 error = ops->copyin_umtx_time(uap->uaddr2, 4142 (size_t)uap->uaddr1, &timeout); 4143 if (error != 0) 4144 return (error); 4145 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4146 } 4147 return (error); 4148 } 4149 4150 static int 4151 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4152 const struct umtx_copyops *ops) 4153 { 4154 struct _umtx_time timeout; 4155 int error; 4156 4157 /* Allow a null timespec (wait forever). */ 4158 if (uap->uaddr2 == NULL) { 4159 error = do_rw_wrlock(td, uap->obj, 0); 4160 } else { 4161 error = ops->copyin_umtx_time(uap->uaddr2, 4162 (size_t)uap->uaddr1, &timeout); 4163 if (error != 0) 4164 return (error); 4165 4166 error = do_rw_wrlock(td, uap->obj, &timeout); 4167 } 4168 return (error); 4169 } 4170 4171 static int 4172 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4173 const struct umtx_copyops *ops __unused) 4174 { 4175 4176 return (do_rw_unlock(td, uap->obj)); 4177 } 4178 4179 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4180 static int 4181 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4182 const struct umtx_copyops *ops) 4183 { 4184 struct _umtx_time *tm_p, timeout; 4185 int error; 4186 4187 /* Allow a null timespec (wait forever). */ 4188 if (uap->uaddr2 == NULL) 4189 tm_p = NULL; 4190 else { 4191 error = ops->copyin_umtx_time( 4192 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4193 if (error != 0) 4194 return (error); 4195 tm_p = &timeout; 4196 } 4197 return (do_sem_wait(td, uap->obj, tm_p)); 4198 } 4199 4200 static int 4201 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4202 const struct umtx_copyops *ops __unused) 4203 { 4204 4205 return (do_sem_wake(td, uap->obj)); 4206 } 4207 #endif 4208 4209 static int 4210 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4211 const struct umtx_copyops *ops __unused) 4212 { 4213 4214 return (do_wake2_umutex(td, uap->obj, uap->val)); 4215 } 4216 4217 static int 4218 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4219 const struct umtx_copyops *ops) 4220 { 4221 struct _umtx_time *tm_p, timeout; 4222 size_t uasize; 4223 int error; 4224 4225 /* Allow a null timespec (wait forever). */ 4226 if (uap->uaddr2 == NULL) { 4227 uasize = 0; 4228 tm_p = NULL; 4229 } else { 4230 uasize = (size_t)uap->uaddr1; 4231 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4232 if (error != 0) 4233 return (error); 4234 tm_p = &timeout; 4235 } 4236 error = do_sem2_wait(td, uap->obj, tm_p); 4237 if (error == EINTR && uap->uaddr2 != NULL && 4238 (timeout._flags & UMTX_ABSTIME) == 0 && 4239 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4240 error = ops->copyout_timeout( 4241 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4242 uasize - ops->umtx_time_sz, &timeout._timeout); 4243 if (error == 0) { 4244 error = EINTR; 4245 } 4246 } 4247 4248 return (error); 4249 } 4250 4251 static int 4252 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4253 const struct umtx_copyops *ops __unused) 4254 { 4255 4256 return (do_sem2_wake(td, uap->obj)); 4257 } 4258 4259 #define USHM_OBJ_UMTX(o) \ 4260 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4261 4262 #define USHMF_REG_LINKED 0x0001 4263 #define USHMF_OBJ_LINKED 0x0002 4264 struct umtx_shm_reg { 4265 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4266 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4267 struct umtx_key ushm_key; 4268 struct ucred *ushm_cred; 4269 struct shmfd *ushm_obj; 4270 u_int ushm_refcnt; 4271 u_int ushm_flags; 4272 }; 4273 4274 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4275 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4276 4277 static uma_zone_t umtx_shm_reg_zone; 4278 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4279 static struct mtx umtx_shm_lock; 4280 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4281 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4282 4283 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4284 4285 static void 4286 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4287 { 4288 struct umtx_shm_reg_head d; 4289 struct umtx_shm_reg *reg, *reg1; 4290 4291 TAILQ_INIT(&d); 4292 mtx_lock(&umtx_shm_lock); 4293 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4294 mtx_unlock(&umtx_shm_lock); 4295 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4296 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4297 umtx_shm_free_reg(reg); 4298 } 4299 } 4300 4301 static struct task umtx_shm_reg_delfree_task = 4302 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4303 4304 static struct umtx_shm_reg * 4305 umtx_shm_find_reg_locked(const struct umtx_key *key) 4306 { 4307 struct umtx_shm_reg *reg; 4308 struct umtx_shm_reg_head *reg_head; 4309 4310 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4311 mtx_assert(&umtx_shm_lock, MA_OWNED); 4312 reg_head = &umtx_shm_registry[key->hash]; 4313 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4314 KASSERT(reg->ushm_key.shared, 4315 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4316 if (reg->ushm_key.info.shared.object == 4317 key->info.shared.object && 4318 reg->ushm_key.info.shared.offset == 4319 key->info.shared.offset) { 4320 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4321 KASSERT(reg->ushm_refcnt > 0, 4322 ("reg %p refcnt 0 onlist", reg)); 4323 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4324 ("reg %p not linked", reg)); 4325 reg->ushm_refcnt++; 4326 return (reg); 4327 } 4328 } 4329 return (NULL); 4330 } 4331 4332 static struct umtx_shm_reg * 4333 umtx_shm_find_reg(const struct umtx_key *key) 4334 { 4335 struct umtx_shm_reg *reg; 4336 4337 mtx_lock(&umtx_shm_lock); 4338 reg = umtx_shm_find_reg_locked(key); 4339 mtx_unlock(&umtx_shm_lock); 4340 return (reg); 4341 } 4342 4343 static void 4344 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4345 { 4346 4347 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4348 crfree(reg->ushm_cred); 4349 shm_drop(reg->ushm_obj); 4350 uma_zfree(umtx_shm_reg_zone, reg); 4351 } 4352 4353 static bool 4354 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4355 { 4356 bool res; 4357 4358 mtx_assert(&umtx_shm_lock, MA_OWNED); 4359 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4360 reg->ushm_refcnt--; 4361 res = reg->ushm_refcnt == 0; 4362 if (res || force) { 4363 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4364 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4365 reg, ushm_reg_link); 4366 reg->ushm_flags &= ~USHMF_REG_LINKED; 4367 } 4368 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4369 LIST_REMOVE(reg, ushm_obj_link); 4370 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4371 } 4372 } 4373 return (res); 4374 } 4375 4376 static void 4377 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4378 { 4379 vm_object_t object; 4380 bool dofree; 4381 4382 if (force) { 4383 object = reg->ushm_obj->shm_object; 4384 VM_OBJECT_WLOCK(object); 4385 vm_object_set_flag(object, OBJ_UMTXDEAD); 4386 VM_OBJECT_WUNLOCK(object); 4387 } 4388 mtx_lock(&umtx_shm_lock); 4389 dofree = umtx_shm_unref_reg_locked(reg, force); 4390 mtx_unlock(&umtx_shm_lock); 4391 if (dofree) 4392 umtx_shm_free_reg(reg); 4393 } 4394 4395 void 4396 umtx_shm_object_init(vm_object_t object) 4397 { 4398 4399 LIST_INIT(USHM_OBJ_UMTX(object)); 4400 } 4401 4402 void 4403 umtx_shm_object_terminated(vm_object_t object) 4404 { 4405 struct umtx_shm_reg *reg, *reg1; 4406 bool dofree; 4407 4408 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4409 return; 4410 4411 dofree = false; 4412 mtx_lock(&umtx_shm_lock); 4413 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4414 if (umtx_shm_unref_reg_locked(reg, true)) { 4415 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4416 ushm_reg_link); 4417 dofree = true; 4418 } 4419 } 4420 mtx_unlock(&umtx_shm_lock); 4421 if (dofree) 4422 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4423 } 4424 4425 static int 4426 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4427 struct umtx_shm_reg **res) 4428 { 4429 struct umtx_shm_reg *reg, *reg1; 4430 struct ucred *cred; 4431 int error; 4432 4433 reg = umtx_shm_find_reg(key); 4434 if (reg != NULL) { 4435 *res = reg; 4436 return (0); 4437 } 4438 cred = td->td_ucred; 4439 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4440 return (ENOMEM); 4441 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4442 reg->ushm_refcnt = 1; 4443 bcopy(key, ®->ushm_key, sizeof(*key)); 4444 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4445 reg->ushm_cred = crhold(cred); 4446 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4447 if (error != 0) { 4448 umtx_shm_free_reg(reg); 4449 return (error); 4450 } 4451 mtx_lock(&umtx_shm_lock); 4452 reg1 = umtx_shm_find_reg_locked(key); 4453 if (reg1 != NULL) { 4454 mtx_unlock(&umtx_shm_lock); 4455 umtx_shm_free_reg(reg); 4456 *res = reg1; 4457 return (0); 4458 } 4459 reg->ushm_refcnt++; 4460 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4461 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4462 ushm_obj_link); 4463 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4464 mtx_unlock(&umtx_shm_lock); 4465 *res = reg; 4466 return (0); 4467 } 4468 4469 static int 4470 umtx_shm_alive(struct thread *td, void *addr) 4471 { 4472 vm_map_t map; 4473 vm_map_entry_t entry; 4474 vm_object_t object; 4475 vm_pindex_t pindex; 4476 vm_prot_t prot; 4477 int res, ret; 4478 boolean_t wired; 4479 4480 map = &td->td_proc->p_vmspace->vm_map; 4481 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4482 &object, &pindex, &prot, &wired); 4483 if (res != KERN_SUCCESS) 4484 return (EFAULT); 4485 if (object == NULL) 4486 ret = EINVAL; 4487 else 4488 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4489 vm_map_lookup_done(map, entry); 4490 return (ret); 4491 } 4492 4493 static void 4494 umtx_shm_init(void) 4495 { 4496 int i; 4497 4498 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4499 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4500 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4501 for (i = 0; i < nitems(umtx_shm_registry); i++) 4502 TAILQ_INIT(&umtx_shm_registry[i]); 4503 } 4504 4505 static int 4506 umtx_shm(struct thread *td, void *addr, u_int flags) 4507 { 4508 struct umtx_key key; 4509 struct umtx_shm_reg *reg; 4510 struct file *fp; 4511 int error, fd; 4512 4513 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4514 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4515 return (EINVAL); 4516 if ((flags & UMTX_SHM_ALIVE) != 0) 4517 return (umtx_shm_alive(td, addr)); 4518 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4519 if (error != 0) 4520 return (error); 4521 KASSERT(key.shared == 1, ("non-shared key")); 4522 if ((flags & UMTX_SHM_CREAT) != 0) { 4523 error = umtx_shm_create_reg(td, &key, ®); 4524 } else { 4525 reg = umtx_shm_find_reg(&key); 4526 if (reg == NULL) 4527 error = ESRCH; 4528 } 4529 umtx_key_release(&key); 4530 if (error != 0) 4531 return (error); 4532 KASSERT(reg != NULL, ("no reg")); 4533 if ((flags & UMTX_SHM_DESTROY) != 0) { 4534 umtx_shm_unref_reg(reg, true); 4535 } else { 4536 #if 0 4537 #ifdef MAC 4538 error = mac_posixshm_check_open(td->td_ucred, 4539 reg->ushm_obj, FFLAGS(O_RDWR)); 4540 if (error == 0) 4541 #endif 4542 error = shm_access(reg->ushm_obj, td->td_ucred, 4543 FFLAGS(O_RDWR)); 4544 if (error == 0) 4545 #endif 4546 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4547 if (error == 0) { 4548 shm_hold(reg->ushm_obj); 4549 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4550 &shm_ops); 4551 td->td_retval[0] = fd; 4552 fdrop(fp, td); 4553 } 4554 } 4555 umtx_shm_unref_reg(reg, false); 4556 return (error); 4557 } 4558 4559 static int 4560 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4561 const struct umtx_copyops *ops __unused) 4562 { 4563 4564 return (umtx_shm(td, uap->uaddr1, uap->val)); 4565 } 4566 4567 static int 4568 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4569 const struct umtx_copyops *ops) 4570 { 4571 struct umtx_robust_lists_params rb; 4572 int error; 4573 4574 if (ops->compat32) { 4575 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4576 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4577 td->td_rb_inact != 0)) 4578 return (EBUSY); 4579 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4580 return (EBUSY); 4581 } 4582 4583 bzero(&rb, sizeof(rb)); 4584 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4585 if (error != 0) 4586 return (error); 4587 4588 if (ops->compat32) 4589 td->td_pflags2 |= TDP2_COMPAT32RB; 4590 4591 td->td_rb_list = rb.robust_list_offset; 4592 td->td_rbp_list = rb.robust_priv_list_offset; 4593 td->td_rb_inact = rb.robust_inact_offset; 4594 return (0); 4595 } 4596 4597 #if defined(__i386__) || defined(__amd64__) 4598 /* 4599 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4600 * 32-bit time_t there. Other architectures just need the i386 definitions 4601 * along with their standard compat32. 4602 */ 4603 struct timespecx32 { 4604 int64_t tv_sec; 4605 int32_t tv_nsec; 4606 }; 4607 4608 struct umtx_timex32 { 4609 struct timespecx32 _timeout; 4610 uint32_t _flags; 4611 uint32_t _clockid; 4612 }; 4613 4614 #ifndef __i386__ 4615 #define timespeci386 timespec32 4616 #define umtx_timei386 umtx_time32 4617 #endif 4618 #else /* !__i386__ && !__amd64__ */ 4619 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4620 struct timespeci386 { 4621 int32_t tv_sec; 4622 int32_t tv_nsec; 4623 }; 4624 4625 struct umtx_timei386 { 4626 struct timespeci386 _timeout; 4627 uint32_t _flags; 4628 uint32_t _clockid; 4629 }; 4630 4631 #if defined(__LP64__) 4632 #define timespecx32 timespec32 4633 #define umtx_timex32 umtx_time32 4634 #endif 4635 #endif 4636 4637 static int 4638 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4639 struct umtx_robust_lists_params *rbp) 4640 { 4641 struct umtx_robust_lists_params_compat32 rb32; 4642 int error; 4643 4644 if (size > sizeof(rb32)) 4645 return (EINVAL); 4646 bzero(&rb32, sizeof(rb32)); 4647 error = copyin(uaddr, &rb32, size); 4648 if (error != 0) 4649 return (error); 4650 CP(rb32, *rbp, robust_list_offset); 4651 CP(rb32, *rbp, robust_priv_list_offset); 4652 CP(rb32, *rbp, robust_inact_offset); 4653 return (0); 4654 } 4655 4656 #ifndef __i386__ 4657 static inline int 4658 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4659 { 4660 struct timespeci386 ts32; 4661 int error; 4662 4663 error = copyin(uaddr, &ts32, sizeof(ts32)); 4664 if (error == 0) { 4665 if (!timespecvalid_interval(&ts32)) 4666 error = EINVAL; 4667 else { 4668 CP(ts32, *tsp, tv_sec); 4669 CP(ts32, *tsp, tv_nsec); 4670 } 4671 } 4672 return (error); 4673 } 4674 4675 static inline int 4676 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4677 { 4678 struct umtx_timei386 t32; 4679 int error; 4680 4681 t32._clockid = CLOCK_REALTIME; 4682 t32._flags = 0; 4683 if (size <= sizeof(t32._timeout)) 4684 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4685 else 4686 error = copyin(uaddr, &t32, sizeof(t32)); 4687 if (error != 0) 4688 return (error); 4689 if (!timespecvalid_interval(&t32._timeout)) 4690 return (EINVAL); 4691 TS_CP(t32, *tp, _timeout); 4692 CP(t32, *tp, _flags); 4693 CP(t32, *tp, _clockid); 4694 return (0); 4695 } 4696 4697 static int 4698 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4699 { 4700 struct timespeci386 remain32 = { 4701 .tv_sec = tsp->tv_sec, 4702 .tv_nsec = tsp->tv_nsec, 4703 }; 4704 4705 /* 4706 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4707 * and we're only called if sz >= sizeof(timespec) as supplied in the 4708 * copyops. 4709 */ 4710 KASSERT(sz >= sizeof(remain32), 4711 ("umtx_copyops specifies incorrect sizes")); 4712 4713 return (copyout(&remain32, uaddr, sizeof(remain32))); 4714 } 4715 #endif /* !__i386__ */ 4716 4717 #if defined(__i386__) || defined(__LP64__) 4718 static inline int 4719 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4720 { 4721 struct timespecx32 ts32; 4722 int error; 4723 4724 error = copyin(uaddr, &ts32, sizeof(ts32)); 4725 if (error == 0) { 4726 if (!timespecvalid_interval(&ts32)) 4727 error = EINVAL; 4728 else { 4729 CP(ts32, *tsp, tv_sec); 4730 CP(ts32, *tsp, tv_nsec); 4731 } 4732 } 4733 return (error); 4734 } 4735 4736 static inline int 4737 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4738 { 4739 struct umtx_timex32 t32; 4740 int error; 4741 4742 t32._clockid = CLOCK_REALTIME; 4743 t32._flags = 0; 4744 if (size <= sizeof(t32._timeout)) 4745 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4746 else 4747 error = copyin(uaddr, &t32, sizeof(t32)); 4748 if (error != 0) 4749 return (error); 4750 if (!timespecvalid_interval(&t32._timeout)) 4751 return (EINVAL); 4752 TS_CP(t32, *tp, _timeout); 4753 CP(t32, *tp, _flags); 4754 CP(t32, *tp, _clockid); 4755 return (0); 4756 } 4757 4758 static int 4759 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4760 { 4761 struct timespecx32 remain32 = { 4762 .tv_sec = tsp->tv_sec, 4763 .tv_nsec = tsp->tv_nsec, 4764 }; 4765 4766 /* 4767 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4768 * and we're only called if sz >= sizeof(timespec) as supplied in the 4769 * copyops. 4770 */ 4771 KASSERT(sz >= sizeof(remain32), 4772 ("umtx_copyops specifies incorrect sizes")); 4773 4774 return (copyout(&remain32, uaddr, sizeof(remain32))); 4775 } 4776 #endif /* __i386__ || __LP64__ */ 4777 4778 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4779 const struct umtx_copyops *umtx_ops); 4780 4781 static const _umtx_op_func op_table[] = { 4782 #ifdef COMPAT_FREEBSD10 4783 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4784 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4785 #else 4786 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4787 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4788 #endif 4789 [UMTX_OP_WAIT] = __umtx_op_wait, 4790 [UMTX_OP_WAKE] = __umtx_op_wake, 4791 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4792 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4793 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4794 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4795 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4796 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4797 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4798 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4799 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4800 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4801 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4802 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4803 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4804 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4805 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4806 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4807 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4808 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4809 #else 4810 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4811 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4812 #endif 4813 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4814 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4815 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4816 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4817 [UMTX_OP_SHM] = __umtx_op_shm, 4818 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4819 }; 4820 4821 static const struct umtx_copyops umtx_native_ops = { 4822 .copyin_timeout = umtx_copyin_timeout, 4823 .copyin_umtx_time = umtx_copyin_umtx_time, 4824 .copyin_robust_lists = umtx_copyin_robust_lists, 4825 .copyout_timeout = umtx_copyout_timeout, 4826 .timespec_sz = sizeof(struct timespec), 4827 .umtx_time_sz = sizeof(struct _umtx_time), 4828 }; 4829 4830 #ifndef __i386__ 4831 static const struct umtx_copyops umtx_native_opsi386 = { 4832 .copyin_timeout = umtx_copyin_timeouti386, 4833 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4834 .copyin_robust_lists = umtx_copyin_robust_lists32, 4835 .copyout_timeout = umtx_copyout_timeouti386, 4836 .timespec_sz = sizeof(struct timespeci386), 4837 .umtx_time_sz = sizeof(struct umtx_timei386), 4838 .compat32 = true, 4839 }; 4840 #endif 4841 4842 #if defined(__i386__) || defined(__LP64__) 4843 /* i386 can emulate other 32-bit archs, too! */ 4844 static const struct umtx_copyops umtx_native_opsx32 = { 4845 .copyin_timeout = umtx_copyin_timeoutx32, 4846 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4847 .copyin_robust_lists = umtx_copyin_robust_lists32, 4848 .copyout_timeout = umtx_copyout_timeoutx32, 4849 .timespec_sz = sizeof(struct timespecx32), 4850 .umtx_time_sz = sizeof(struct umtx_timex32), 4851 .compat32 = true, 4852 }; 4853 4854 #ifdef COMPAT_FREEBSD32 4855 #ifdef __amd64__ 4856 #define umtx_native_ops32 umtx_native_opsi386 4857 #else 4858 #define umtx_native_ops32 umtx_native_opsx32 4859 #endif 4860 #endif /* COMPAT_FREEBSD32 */ 4861 #endif /* __i386__ || __LP64__ */ 4862 4863 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4864 4865 static int 4866 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4867 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4868 { 4869 struct _umtx_op_args uap = { 4870 .obj = obj, 4871 .op = op & ~UMTX_OP__FLAGS, 4872 .val = val, 4873 .uaddr1 = uaddr1, 4874 .uaddr2 = uaddr2 4875 }; 4876 4877 if ((uap.op >= nitems(op_table))) 4878 return (EINVAL); 4879 return ((*op_table[uap.op])(td, &uap, ops)); 4880 } 4881 4882 int 4883 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4884 { 4885 static const struct umtx_copyops *umtx_ops; 4886 4887 umtx_ops = &umtx_native_ops; 4888 #ifdef __LP64__ 4889 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4890 if ((uap->op & UMTX_OP__I386) != 0) 4891 umtx_ops = &umtx_native_opsi386; 4892 else 4893 umtx_ops = &umtx_native_opsx32; 4894 } 4895 #elif !defined(__i386__) 4896 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4897 if ((uap->op & UMTX_OP__I386) != 0) 4898 umtx_ops = &umtx_native_opsi386; 4899 #else 4900 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4901 if ((uap->op & UMTX_OP__32BIT) != 0) 4902 umtx_ops = &umtx_native_opsx32; 4903 #endif 4904 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4905 uap->uaddr2, umtx_ops)); 4906 } 4907 4908 #ifdef COMPAT_FREEBSD32 4909 #ifdef COMPAT_FREEBSD10 4910 int 4911 freebsd10_freebsd32__umtx_lock(struct thread *td, 4912 struct freebsd10_freebsd32__umtx_lock_args *uap) 4913 { 4914 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4915 } 4916 4917 int 4918 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4919 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4920 { 4921 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4922 } 4923 #endif /* COMPAT_FREEBSD10 */ 4924 4925 int 4926 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4927 { 4928 4929 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4930 uap->uaddr2, &umtx_native_ops32)); 4931 } 4932 #endif /* COMPAT_FREEBSD32 */ 4933 4934 void 4935 umtx_thread_init(struct thread *td) 4936 { 4937 4938 td->td_umtxq = umtxq_alloc(); 4939 td->td_umtxq->uq_thread = td; 4940 } 4941 4942 void 4943 umtx_thread_fini(struct thread *td) 4944 { 4945 4946 umtxq_free(td->td_umtxq); 4947 } 4948 4949 /* 4950 * It will be called when new thread is created, e.g fork(). 4951 */ 4952 void 4953 umtx_thread_alloc(struct thread *td) 4954 { 4955 struct umtx_q *uq; 4956 4957 uq = td->td_umtxq; 4958 uq->uq_inherited_pri = PRI_MAX; 4959 4960 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4961 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4962 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4963 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4964 } 4965 4966 /* 4967 * exec() hook. 4968 * 4969 * Clear robust lists for all process' threads, not delaying the 4970 * cleanup to thread exit, since the relevant address space is 4971 * destroyed right now. 4972 */ 4973 void 4974 umtx_exec(struct proc *p) 4975 { 4976 struct thread *td; 4977 4978 KASSERT(p == curproc, ("need curproc")); 4979 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4980 (p->p_flag & P_STOPPED_SINGLE) != 0, 4981 ("curproc must be single-threaded")); 4982 /* 4983 * There is no need to lock the list as only this thread can be 4984 * running. 4985 */ 4986 FOREACH_THREAD_IN_PROC(p, td) { 4987 KASSERT(td == curthread || 4988 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4989 ("running thread %p %p", p, td)); 4990 umtx_thread_cleanup(td); 4991 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4992 } 4993 } 4994 4995 /* 4996 * thread exit hook. 4997 */ 4998 void 4999 umtx_thread_exit(struct thread *td) 5000 { 5001 5002 umtx_thread_cleanup(td); 5003 } 5004 5005 static int 5006 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5007 { 5008 u_long res1; 5009 uint32_t res32; 5010 int error; 5011 5012 if (compat32) { 5013 error = fueword32((void *)ptr, &res32); 5014 if (error == 0) 5015 res1 = res32; 5016 } else { 5017 error = fueword((void *)ptr, &res1); 5018 } 5019 if (error == 0) 5020 *res = res1; 5021 else 5022 error = EFAULT; 5023 return (error); 5024 } 5025 5026 static void 5027 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5028 bool compat32) 5029 { 5030 struct umutex32 m32; 5031 5032 if (compat32) { 5033 memcpy(&m32, m, sizeof(m32)); 5034 *rb_list = m32.m_rb_lnk; 5035 } else { 5036 *rb_list = m->m_rb_lnk; 5037 } 5038 } 5039 5040 static int 5041 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5042 bool compat32) 5043 { 5044 struct umutex m; 5045 int error; 5046 5047 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5048 error = copyin((void *)rbp, &m, sizeof(m)); 5049 if (error != 0) 5050 return (error); 5051 if (rb_list != NULL) 5052 umtx_read_rb_list(td, &m, rb_list, compat32); 5053 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5054 return (EINVAL); 5055 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5056 /* inact is cleared after unlock, allow the inconsistency */ 5057 return (inact ? 0 : EINVAL); 5058 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5059 } 5060 5061 static void 5062 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5063 const char *name, bool compat32) 5064 { 5065 int error, i; 5066 uintptr_t rbp; 5067 bool inact; 5068 5069 if (rb_list == 0) 5070 return; 5071 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5072 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5073 if (rbp == *rb_inact) { 5074 inact = true; 5075 *rb_inact = 0; 5076 } else 5077 inact = false; 5078 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5079 } 5080 if (i == umtx_max_rb && umtx_verbose_rb) { 5081 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5082 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5083 } 5084 if (error != 0 && umtx_verbose_rb) { 5085 uprintf("comm %s pid %d: handling %srb error %d\n", 5086 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5087 } 5088 } 5089 5090 /* 5091 * Clean up umtx data. 5092 */ 5093 static void 5094 umtx_thread_cleanup(struct thread *td) 5095 { 5096 struct umtx_q *uq; 5097 struct umtx_pi *pi; 5098 uintptr_t rb_inact; 5099 bool compat32; 5100 5101 /* 5102 * Disown pi mutexes. 5103 */ 5104 uq = td->td_umtxq; 5105 if (uq != NULL) { 5106 if (uq->uq_inherited_pri != PRI_MAX || 5107 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5108 mtx_lock(&umtx_lock); 5109 uq->uq_inherited_pri = PRI_MAX; 5110 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5111 pi->pi_owner = NULL; 5112 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5113 } 5114 mtx_unlock(&umtx_lock); 5115 } 5116 sched_lend_user_prio_cond(td, PRI_MAX); 5117 } 5118 5119 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5120 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5121 5122 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5123 return; 5124 5125 /* 5126 * Handle terminated robust mutexes. Must be done after 5127 * robust pi disown, otherwise unlock could see unowned 5128 * entries. 5129 */ 5130 rb_inact = td->td_rb_inact; 5131 if (rb_inact != 0) 5132 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5133 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5134 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5135 if (rb_inact != 0) 5136 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5137 } 5138