1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 #include "opt_umtx_profiling.h" 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/limits.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mman.h> 46 #include <sys/mutex.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/rwlock.h> 52 #include <sys/sbuf.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sysproto.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/taskqueue.h> 60 #include <sys/time.h> 61 #include <sys/eventhandler.h> 62 #include <sys/umtx.h> 63 #include <sys/umtxvar.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/uma.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 74 #include <machine/atomic.h> 75 #include <machine/cpu.h> 76 77 #include <compat/freebsd32/freebsd32.h> 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 91 #ifdef INVARIANTS 92 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 93 struct umtxq_chain *uc; \ 94 \ 95 uc = umtxq_getchain(key); \ 96 mtx_assert(&uc->uc_lock, MA_OWNED); \ 97 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 98 } while (0) 99 #else 100 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 101 #endif 102 103 /* 104 * Don't propagate time-sharing priority, there is a security reason, 105 * a user can simply introduce PI-mutex, let thread A lock the mutex, 106 * and let another thread B block on the mutex, because B is 107 * sleeping, its priority will be boosted, this causes A's priority to 108 * be boosted via priority propagating too and will never be lowered even 109 * if it is using 100%CPU, this is unfair to other processes. 110 */ 111 112 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 113 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 114 PRI_MAX_TIMESHARE : (td)->td_user_pri) 115 116 #define GOLDEN_RATIO_PRIME 2654404609U 117 #ifndef UMTX_CHAINS 118 #define UMTX_CHAINS 512 119 #endif 120 #define UMTX_SHIFTS (__WORD_BIT - 9) 121 122 #define GET_SHARE(flags) \ 123 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 124 125 #define BUSY_SPINS 200 126 127 struct umtx_copyops { 128 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 129 int (*copyin_umtx_time)(const void *uaddr, size_t size, 130 struct _umtx_time *tp); 131 int (*copyin_robust_lists)(const void *uaddr, size_t size, 132 struct umtx_robust_lists_params *rbp); 133 int (*copyout_timeout)(void *uaddr, size_t size, 134 struct timespec *tsp); 135 const size_t timespec_sz; 136 const size_t umtx_time_sz; 137 const bool compat32; 138 }; 139 140 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 141 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 142 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 143 144 int umtx_shm_vnobj_persistent = 0; 145 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 146 &umtx_shm_vnobj_persistent, 0, 147 "False forces destruction of umtx attached to file, on last close"); 148 static int umtx_max_rb = 1000; 149 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 150 &umtx_max_rb, 0, 151 "Maximum number of robust mutexes allowed for each thread"); 152 153 static uma_zone_t umtx_pi_zone; 154 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 155 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 156 static int umtx_pi_allocated; 157 158 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 159 "umtx debug"); 160 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 161 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 162 static int umtx_verbose_rb = 1; 163 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 164 &umtx_verbose_rb, 0, 165 ""); 166 167 #ifdef UMTX_PROFILING 168 static long max_length; 169 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 170 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 171 "umtx chain stats"); 172 #endif 173 174 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 175 const struct _umtx_time *umtxtime); 176 177 static void umtx_shm_init(void); 178 static void umtxq_sysinit(void *); 179 static void umtxq_hash(struct umtx_key *key); 180 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 181 bool rb); 182 static void umtx_thread_cleanup(struct thread *td); 183 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 184 185 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 186 187 static struct mtx umtx_lock; 188 189 #ifdef UMTX_PROFILING 190 static void 191 umtx_init_profiling(void) 192 { 193 struct sysctl_oid *chain_oid; 194 char chain_name[10]; 195 int i; 196 197 for (i = 0; i < UMTX_CHAINS; ++i) { 198 snprintf(chain_name, sizeof(chain_name), "%d", i); 199 chain_oid = SYSCTL_ADD_NODE(NULL, 200 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 201 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 202 "umtx hash stats"); 203 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 204 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 206 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 207 } 208 } 209 210 static int 211 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 212 { 213 char buf[512]; 214 struct sbuf sb; 215 struct umtxq_chain *uc; 216 u_int fract, i, j, tot, whole; 217 u_int sf0, sf1, sf2, sf3, sf4; 218 u_int si0, si1, si2, si3, si4; 219 u_int sw0, sw1, sw2, sw3, sw4; 220 221 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 222 for (i = 0; i < 2; i++) { 223 tot = 0; 224 for (j = 0; j < UMTX_CHAINS; ++j) { 225 uc = &umtxq_chains[i][j]; 226 mtx_lock(&uc->uc_lock); 227 tot += uc->max_length; 228 mtx_unlock(&uc->uc_lock); 229 } 230 if (tot == 0) 231 sbuf_printf(&sb, "%u) Empty ", i); 232 else { 233 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 234 si0 = si1 = si2 = si3 = si4 = 0; 235 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 236 for (j = 0; j < UMTX_CHAINS; j++) { 237 uc = &umtxq_chains[i][j]; 238 mtx_lock(&uc->uc_lock); 239 whole = uc->max_length * 100; 240 mtx_unlock(&uc->uc_lock); 241 fract = (whole % tot) * 100; 242 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 243 sf0 = fract; 244 si0 = j; 245 sw0 = whole; 246 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 247 sf1)) { 248 sf1 = fract; 249 si1 = j; 250 sw1 = whole; 251 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 252 sf2)) { 253 sf2 = fract; 254 si2 = j; 255 sw2 = whole; 256 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 257 sf3)) { 258 sf3 = fract; 259 si3 = j; 260 sw3 = whole; 261 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 262 sf4)) { 263 sf4 = fract; 264 si4 = j; 265 sw4 = whole; 266 } 267 } 268 sbuf_printf(&sb, "queue %u:\n", i); 269 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 270 sf0 / tot, si0); 271 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 272 sf1 / tot, si1); 273 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 274 sf2 / tot, si2); 275 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 276 sf3 / tot, si3); 277 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 278 sf4 / tot, si4); 279 } 280 } 281 sbuf_trim(&sb); 282 sbuf_finish(&sb); 283 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 284 sbuf_delete(&sb); 285 return (0); 286 } 287 288 static int 289 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 290 { 291 struct umtxq_chain *uc; 292 u_int i, j; 293 int clear, error; 294 295 clear = 0; 296 error = sysctl_handle_int(oidp, &clear, 0, req); 297 if (error != 0 || req->newptr == NULL) 298 return (error); 299 300 if (clear != 0) { 301 for (i = 0; i < 2; ++i) { 302 for (j = 0; j < UMTX_CHAINS; ++j) { 303 uc = &umtxq_chains[i][j]; 304 mtx_lock(&uc->uc_lock); 305 uc->length = 0; 306 uc->max_length = 0; 307 mtx_unlock(&uc->uc_lock); 308 } 309 } 310 } 311 return (0); 312 } 313 314 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 315 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 316 sysctl_debug_umtx_chains_clear, "I", 317 "Clear umtx chains statistics"); 318 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 319 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 320 sysctl_debug_umtx_chains_peaks, "A", 321 "Highest peaks in chains max length"); 322 #endif 323 324 static void 325 umtxq_sysinit(void *arg __unused) 326 { 327 int i, j; 328 329 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 330 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 331 for (i = 0; i < 2; ++i) { 332 for (j = 0; j < UMTX_CHAINS; ++j) { 333 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 334 MTX_DEF | MTX_DUPOK); 335 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 337 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 338 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 339 umtxq_chains[i][j].uc_busy = 0; 340 umtxq_chains[i][j].uc_waiters = 0; 341 #ifdef UMTX_PROFILING 342 umtxq_chains[i][j].length = 0; 343 umtxq_chains[i][j].max_length = 0; 344 #endif 345 } 346 } 347 #ifdef UMTX_PROFILING 348 umtx_init_profiling(); 349 #endif 350 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 351 umtx_shm_init(); 352 } 353 354 struct umtx_q * 355 umtxq_alloc(void) 356 { 357 struct umtx_q *uq; 358 359 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 360 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 361 M_WAITOK | M_ZERO); 362 TAILQ_INIT(&uq->uq_spare_queue->head); 363 TAILQ_INIT(&uq->uq_pi_contested); 364 uq->uq_inherited_pri = PRI_MAX; 365 return (uq); 366 } 367 368 void 369 umtxq_free(struct umtx_q *uq) 370 { 371 372 MPASS(uq->uq_spare_queue != NULL); 373 free(uq->uq_spare_queue, M_UMTX); 374 free(uq, M_UMTX); 375 } 376 377 static inline void 378 umtxq_hash(struct umtx_key *key) 379 { 380 unsigned n; 381 382 n = (uintptr_t)key->info.both.a + key->info.both.b; 383 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 384 } 385 386 struct umtxq_chain * 387 umtxq_getchain(struct umtx_key *key) 388 { 389 390 if (key->type <= TYPE_SEM) 391 return (&umtxq_chains[1][key->hash]); 392 return (&umtxq_chains[0][key->hash]); 393 } 394 395 /* 396 * Set chain to busy state when following operation 397 * may be blocked (kernel mutex can not be used). 398 */ 399 void 400 umtxq_busy(struct umtx_key *key) 401 { 402 struct umtxq_chain *uc; 403 404 uc = umtxq_getchain(key); 405 mtx_assert(&uc->uc_lock, MA_OWNED); 406 if (uc->uc_busy) { 407 #ifdef SMP 408 if (smp_cpus > 1) { 409 int count = BUSY_SPINS; 410 if (count > 0) { 411 umtxq_unlock(key); 412 while (uc->uc_busy && --count > 0) 413 cpu_spinwait(); 414 umtxq_lock(key); 415 } 416 } 417 #endif 418 while (uc->uc_busy) { 419 uc->uc_waiters++; 420 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 421 uc->uc_waiters--; 422 } 423 } 424 uc->uc_busy = 1; 425 } 426 427 /* 428 * Unbusy a chain. 429 */ 430 void 431 umtxq_unbusy(struct umtx_key *key) 432 { 433 struct umtxq_chain *uc; 434 435 uc = umtxq_getchain(key); 436 mtx_assert(&uc->uc_lock, MA_OWNED); 437 KASSERT(uc->uc_busy != 0, ("not busy")); 438 uc->uc_busy = 0; 439 if (uc->uc_waiters) 440 wakeup_one(uc); 441 } 442 443 void 444 umtxq_busy_unlocked(struct umtx_key *key) 445 { 446 umtxq_lock(key); 447 umtxq_busy(key); 448 umtxq_unlock(key); 449 } 450 451 void 452 umtxq_unbusy_unlocked(struct umtx_key *key) 453 { 454 umtxq_lock(key); 455 umtxq_unbusy(key); 456 umtxq_unlock(key); 457 } 458 459 static struct umtxq_queue * 460 umtxq_queue_lookup(struct umtx_key *key, int q) 461 { 462 struct umtxq_queue *uh; 463 struct umtxq_chain *uc; 464 465 uc = umtxq_getchain(key); 466 UMTXQ_LOCKED_ASSERT(uc); 467 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 468 if (umtx_key_match(&uh->key, key)) 469 return (uh); 470 } 471 472 return (NULL); 473 } 474 475 void 476 umtxq_insert_queue(struct umtx_q *uq, int q) 477 { 478 struct umtxq_queue *uh; 479 struct umtxq_chain *uc; 480 481 uc = umtxq_getchain(&uq->uq_key); 482 UMTXQ_LOCKED_ASSERT(uc); 483 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 484 uh = umtxq_queue_lookup(&uq->uq_key, q); 485 if (uh != NULL) { 486 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 487 } else { 488 uh = uq->uq_spare_queue; 489 uh->key = uq->uq_key; 490 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 491 #ifdef UMTX_PROFILING 492 uc->length++; 493 if (uc->length > uc->max_length) { 494 uc->max_length = uc->length; 495 if (uc->max_length > max_length) 496 max_length = uc->max_length; 497 } 498 #endif 499 } 500 uq->uq_spare_queue = NULL; 501 502 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 503 uh->length++; 504 uq->uq_flags |= UQF_UMTXQ; 505 uq->uq_cur_queue = uh; 506 return; 507 } 508 509 void 510 umtxq_remove_queue(struct umtx_q *uq, int q) 511 { 512 struct umtxq_chain *uc; 513 struct umtxq_queue *uh; 514 515 uc = umtxq_getchain(&uq->uq_key); 516 UMTXQ_LOCKED_ASSERT(uc); 517 if (uq->uq_flags & UQF_UMTXQ) { 518 uh = uq->uq_cur_queue; 519 TAILQ_REMOVE(&uh->head, uq, uq_link); 520 uh->length--; 521 uq->uq_flags &= ~UQF_UMTXQ; 522 if (TAILQ_EMPTY(&uh->head)) { 523 KASSERT(uh->length == 0, 524 ("inconsistent umtxq_queue length")); 525 #ifdef UMTX_PROFILING 526 uc->length--; 527 #endif 528 LIST_REMOVE(uh, link); 529 } else { 530 uh = LIST_FIRST(&uc->uc_spare_queue); 531 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 532 LIST_REMOVE(uh, link); 533 } 534 uq->uq_spare_queue = uh; 535 uq->uq_cur_queue = NULL; 536 } 537 } 538 539 /* 540 * Check if there are multiple waiters 541 */ 542 int 543 umtxq_count(struct umtx_key *key) 544 { 545 struct umtxq_queue *uh; 546 547 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 548 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 549 if (uh != NULL) 550 return (uh->length); 551 return (0); 552 } 553 554 /* 555 * Check if there are multiple PI waiters and returns first 556 * waiter. 557 */ 558 static int 559 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 560 { 561 struct umtxq_queue *uh; 562 563 *first = NULL; 564 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 565 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 566 if (uh != NULL) { 567 *first = TAILQ_FIRST(&uh->head); 568 return (uh->length); 569 } 570 return (0); 571 } 572 573 /* 574 * Wake up threads waiting on an userland object by a bit mask. 575 */ 576 int 577 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 578 { 579 struct umtxq_queue *uh; 580 struct umtx_q *uq, *uq_temp; 581 int ret; 582 583 ret = 0; 584 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 585 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 586 if (uh == NULL) 587 return (0); 588 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 589 if ((uq->uq_bitset & bitset) == 0) 590 continue; 591 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 592 wakeup_one(uq); 593 if (++ret >= n_wake) 594 break; 595 } 596 return (ret); 597 } 598 599 /* 600 * Wake up threads waiting on an userland object. 601 */ 602 603 static int 604 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 605 { 606 struct umtxq_queue *uh; 607 struct umtx_q *uq; 608 int ret; 609 610 ret = 0; 611 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 612 uh = umtxq_queue_lookup(key, q); 613 if (uh != NULL) { 614 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 615 umtxq_remove_queue(uq, q); 616 wakeup(uq); 617 if (++ret >= n_wake) 618 return (ret); 619 } 620 } 621 return (ret); 622 } 623 624 /* 625 * Wake up specified thread. 626 */ 627 static inline void 628 umtxq_signal_thread(struct umtx_q *uq) 629 { 630 631 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 632 umtxq_remove(uq); 633 wakeup(uq); 634 } 635 636 /* 637 * Wake up a maximum of n_wake threads that are waiting on an userland 638 * object identified by key. The remaining threads are removed from queue 639 * identified by key and added to the queue identified by key2 (requeued). 640 * The n_requeue specifies an upper limit on the number of threads that 641 * are requeued to the second queue. 642 */ 643 int 644 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 645 int n_requeue) 646 { 647 struct umtxq_queue *uh; 648 struct umtx_q *uq, *uq_temp; 649 int ret; 650 651 ret = 0; 652 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 653 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 654 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 655 if (uh == NULL) 656 return (0); 657 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 658 if (++ret <= n_wake) { 659 umtxq_remove(uq); 660 wakeup_one(uq); 661 } else { 662 umtxq_remove(uq); 663 uq->uq_key = *key2; 664 umtxq_insert(uq); 665 if (ret - n_wake == n_requeue) 666 break; 667 } 668 } 669 return (ret); 670 } 671 672 static inline int 673 tstohz(const struct timespec *tsp) 674 { 675 struct timeval tv; 676 677 TIMESPEC_TO_TIMEVAL(&tv, tsp); 678 return tvtohz(&tv); 679 } 680 681 void 682 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 683 int absolute, const struct timespec *timeout) 684 { 685 686 timo->clockid = clockid; 687 if (!absolute) { 688 timo->is_abs_real = false; 689 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 690 timespecadd(&timo->cur, timeout, &timo->end); 691 } else { 692 timo->end = *timeout; 693 timo->is_abs_real = clockid == CLOCK_REALTIME || 694 clockid == CLOCK_REALTIME_FAST || 695 clockid == CLOCK_REALTIME_PRECISE || 696 clockid == CLOCK_TAI || 697 clockid == CLOCK_SECOND; 698 } 699 } 700 701 static void 702 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 703 const struct _umtx_time *umtxtime) 704 { 705 706 umtx_abs_timeout_init(timo, umtxtime->_clockid, 707 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 708 } 709 710 static void 711 umtx_abs_timeout_enforce_min(sbintime_t *sbt) 712 { 713 sbintime_t when, mint; 714 715 mint = curproc->p_umtx_min_timeout; 716 if (__predict_false(mint != 0)) { 717 when = sbinuptime() + mint; 718 if (*sbt < when) 719 *sbt = when; 720 } 721 } 722 723 static int 724 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 725 int *flags) 726 { 727 struct bintime bt, bbt; 728 struct timespec tts; 729 sbintime_t rem; 730 731 switch (timo->clockid) { 732 733 /* Clocks that can be converted into absolute time. */ 734 case CLOCK_REALTIME: 735 case CLOCK_REALTIME_PRECISE: 736 case CLOCK_REALTIME_FAST: 737 case CLOCK_MONOTONIC: 738 case CLOCK_MONOTONIC_PRECISE: 739 case CLOCK_MONOTONIC_FAST: 740 case CLOCK_UPTIME: 741 case CLOCK_UPTIME_PRECISE: 742 case CLOCK_UPTIME_FAST: 743 case CLOCK_SECOND: 744 timespec2bintime(&timo->end, &bt); 745 switch (timo->clockid) { 746 case CLOCK_REALTIME: 747 case CLOCK_REALTIME_PRECISE: 748 case CLOCK_REALTIME_FAST: 749 case CLOCK_SECOND: 750 getboottimebin(&bbt); 751 bintime_sub(&bt, &bbt); 752 break; 753 } 754 if (bt.sec < 0) 755 return (ETIMEDOUT); 756 if (bt.sec >= (SBT_MAX >> 32)) { 757 *sbt = 0; 758 *flags = 0; 759 return (0); 760 } 761 *sbt = bttosbt(bt); 762 umtx_abs_timeout_enforce_min(sbt); 763 764 /* 765 * Check if the absolute time should be aligned to 766 * avoid firing multiple timer events in non-periodic 767 * timer mode. 768 */ 769 switch (timo->clockid) { 770 case CLOCK_REALTIME_FAST: 771 case CLOCK_MONOTONIC_FAST: 772 case CLOCK_UPTIME_FAST: 773 rem = *sbt % tc_tick_sbt; 774 if (__predict_true(rem != 0)) 775 *sbt += tc_tick_sbt - rem; 776 break; 777 case CLOCK_SECOND: 778 rem = *sbt % SBT_1S; 779 if (__predict_true(rem != 0)) 780 *sbt += SBT_1S - rem; 781 break; 782 } 783 *flags = C_ABSOLUTE; 784 return (0); 785 786 /* Clocks that has to be periodically polled. */ 787 case CLOCK_VIRTUAL: 788 case CLOCK_PROF: 789 case CLOCK_THREAD_CPUTIME_ID: 790 case CLOCK_PROCESS_CPUTIME_ID: 791 case CLOCK_TAI: /* Boot time is not necessarily stable in TAI */ 792 default: 793 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 794 if (timespeccmp(&timo->end, &timo->cur, <=)) 795 return (ETIMEDOUT); 796 timespecsub(&timo->end, &timo->cur, &tts); 797 *sbt = tick_sbt * tstohz(&tts); 798 *flags = C_HARDCLOCK; 799 return (0); 800 } 801 } 802 803 static uint32_t 804 umtx_unlock_val(uint32_t flags, bool rb) 805 { 806 807 if (rb) 808 return (UMUTEX_RB_OWNERDEAD); 809 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 810 return (UMUTEX_RB_NOTRECOV); 811 else 812 return (UMUTEX_UNOWNED); 813 814 } 815 816 /* 817 * Put thread into sleep state, before sleeping, check if 818 * thread was removed from umtx queue. 819 */ 820 int 821 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 822 struct umtx_abs_timeout *timo) 823 { 824 struct umtxq_chain *uc; 825 sbintime_t sbt = 0; 826 int error, flags = 0; 827 828 uc = umtxq_getchain(&uq->uq_key); 829 UMTXQ_LOCKED_ASSERT(uc); 830 for (;;) { 831 if (!(uq->uq_flags & UQF_UMTXQ)) { 832 error = 0; 833 break; 834 } 835 if (timo != NULL) { 836 if (timo->is_abs_real) 837 curthread->td_rtcgen = 838 atomic_load_acq_int(&rtc_generation); 839 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 840 if (error != 0) 841 break; 842 } 843 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 844 sbt, 0, flags); 845 uc = umtxq_getchain(&uq->uq_key); 846 mtx_lock(&uc->uc_lock); 847 if (error == EINTR || error == ERESTART) 848 break; 849 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 850 error = ETIMEDOUT; 851 break; 852 } 853 } 854 855 curthread->td_rtcgen = 0; 856 return (error); 857 } 858 859 /* 860 * Convert userspace address into unique logical address. 861 */ 862 int 863 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 864 { 865 struct thread *td = curthread; 866 vm_map_t map; 867 vm_map_entry_t entry; 868 vm_pindex_t pindex; 869 vm_prot_t prot; 870 boolean_t wired; 871 872 key->type = type; 873 if (share == THREAD_SHARE) { 874 key->shared = 0; 875 key->info.private.vs = td->td_proc->p_vmspace; 876 key->info.private.addr = (uintptr_t)addr; 877 } else { 878 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 879 map = &td->td_proc->p_vmspace->vm_map; 880 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 881 &entry, &key->info.shared.object, &pindex, &prot, 882 &wired) != KERN_SUCCESS) { 883 return (EFAULT); 884 } 885 886 if ((share == PROCESS_SHARE) || 887 (share == AUTO_SHARE && 888 VM_INHERIT_SHARE == entry->inheritance)) { 889 key->shared = 1; 890 key->info.shared.offset = (vm_offset_t)addr - 891 entry->start + entry->offset; 892 vm_object_reference(key->info.shared.object); 893 } else { 894 key->shared = 0; 895 key->info.private.vs = td->td_proc->p_vmspace; 896 key->info.private.addr = (uintptr_t)addr; 897 } 898 vm_map_lookup_done(map, entry); 899 } 900 901 umtxq_hash(key); 902 return (0); 903 } 904 905 /* 906 * Release key. 907 */ 908 void 909 umtx_key_release(struct umtx_key *key) 910 { 911 if (key->shared) 912 vm_object_deallocate(key->info.shared.object); 913 } 914 915 #ifdef COMPAT_FREEBSD10 916 /* 917 * Lock a umtx object. 918 */ 919 static int 920 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 921 const struct timespec *timeout) 922 { 923 struct umtx_abs_timeout timo; 924 struct umtx_q *uq; 925 u_long owner; 926 u_long old; 927 int error = 0; 928 929 uq = td->td_umtxq; 930 if (timeout != NULL) 931 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 932 933 /* 934 * Care must be exercised when dealing with umtx structure. It 935 * can fault on any access. 936 */ 937 for (;;) { 938 /* 939 * Try the uncontested case. This should be done in userland. 940 */ 941 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 942 943 /* The acquire succeeded. */ 944 if (owner == UMTX_UNOWNED) 945 return (0); 946 947 /* The address was invalid. */ 948 if (owner == -1) 949 return (EFAULT); 950 951 /* If no one owns it but it is contested try to acquire it. */ 952 if (owner == UMTX_CONTESTED) { 953 owner = casuword(&umtx->u_owner, 954 UMTX_CONTESTED, id | UMTX_CONTESTED); 955 956 if (owner == UMTX_CONTESTED) 957 return (0); 958 959 /* The address was invalid. */ 960 if (owner == -1) 961 return (EFAULT); 962 963 error = thread_check_susp(td, false); 964 if (error != 0) 965 break; 966 967 /* If this failed the lock has changed, restart. */ 968 continue; 969 } 970 971 /* 972 * If we caught a signal, we have retried and now 973 * exit immediately. 974 */ 975 if (error != 0) 976 break; 977 978 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 979 AUTO_SHARE, &uq->uq_key)) != 0) 980 return (error); 981 982 umtxq_lock(&uq->uq_key); 983 umtxq_busy(&uq->uq_key); 984 umtxq_insert(uq); 985 umtxq_unbusy(&uq->uq_key); 986 umtxq_unlock(&uq->uq_key); 987 988 /* 989 * Set the contested bit so that a release in user space 990 * knows to use the system call for unlock. If this fails 991 * either some one else has acquired the lock or it has been 992 * released. 993 */ 994 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 995 996 /* The address was invalid. */ 997 if (old == -1) { 998 umtxq_lock(&uq->uq_key); 999 umtxq_remove(uq); 1000 umtxq_unlock(&uq->uq_key); 1001 umtx_key_release(&uq->uq_key); 1002 return (EFAULT); 1003 } 1004 1005 /* 1006 * We set the contested bit, sleep. Otherwise the lock changed 1007 * and we need to retry or we lost a race to the thread 1008 * unlocking the umtx. 1009 */ 1010 umtxq_lock(&uq->uq_key); 1011 if (old == owner) 1012 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1013 &timo); 1014 umtxq_remove(uq); 1015 umtxq_unlock(&uq->uq_key); 1016 umtx_key_release(&uq->uq_key); 1017 1018 if (error == 0) 1019 error = thread_check_susp(td, false); 1020 } 1021 1022 if (timeout == NULL) { 1023 /* Mutex locking is restarted if it is interrupted. */ 1024 if (error == EINTR) 1025 error = ERESTART; 1026 } else { 1027 /* Timed-locking is not restarted. */ 1028 if (error == ERESTART) 1029 error = EINTR; 1030 } 1031 return (error); 1032 } 1033 1034 /* 1035 * Unlock a umtx object. 1036 */ 1037 static int 1038 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1039 { 1040 struct umtx_key key; 1041 u_long owner; 1042 u_long old; 1043 int error; 1044 int count; 1045 1046 /* 1047 * Make sure we own this mtx. 1048 */ 1049 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1050 if (owner == -1) 1051 return (EFAULT); 1052 1053 if ((owner & ~UMTX_CONTESTED) != id) 1054 return (EPERM); 1055 1056 /* This should be done in userland */ 1057 if ((owner & UMTX_CONTESTED) == 0) { 1058 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1059 if (old == -1) 1060 return (EFAULT); 1061 if (old == owner) 1062 return (0); 1063 owner = old; 1064 } 1065 1066 /* We should only ever be in here for contested locks */ 1067 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1068 &key)) != 0) 1069 return (error); 1070 1071 umtxq_lock(&key); 1072 umtxq_busy(&key); 1073 count = umtxq_count(&key); 1074 umtxq_unlock(&key); 1075 1076 /* 1077 * When unlocking the umtx, it must be marked as unowned if 1078 * there is zero or one thread only waiting for it. 1079 * Otherwise, it must be marked as contested. 1080 */ 1081 old = casuword(&umtx->u_owner, owner, 1082 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1083 umtxq_lock(&key); 1084 umtxq_signal(&key,1); 1085 umtxq_unbusy(&key); 1086 umtxq_unlock(&key); 1087 umtx_key_release(&key); 1088 if (old == -1) 1089 return (EFAULT); 1090 if (old != owner) 1091 return (EINVAL); 1092 return (0); 1093 } 1094 1095 #ifdef COMPAT_FREEBSD32 1096 1097 /* 1098 * Lock a umtx object. 1099 */ 1100 static int 1101 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1102 const struct timespec *timeout) 1103 { 1104 struct umtx_abs_timeout timo; 1105 struct umtx_q *uq; 1106 uint32_t owner; 1107 uint32_t old; 1108 int error = 0; 1109 1110 uq = td->td_umtxq; 1111 1112 if (timeout != NULL) 1113 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1114 1115 /* 1116 * Care must be exercised when dealing with umtx structure. It 1117 * can fault on any access. 1118 */ 1119 for (;;) { 1120 /* 1121 * Try the uncontested case. This should be done in userland. 1122 */ 1123 owner = casuword32(m, UMUTEX_UNOWNED, id); 1124 1125 /* The acquire succeeded. */ 1126 if (owner == UMUTEX_UNOWNED) 1127 return (0); 1128 1129 /* The address was invalid. */ 1130 if (owner == -1) 1131 return (EFAULT); 1132 1133 /* If no one owns it but it is contested try to acquire it. */ 1134 if (owner == UMUTEX_CONTESTED) { 1135 owner = casuword32(m, 1136 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1137 if (owner == UMUTEX_CONTESTED) 1138 return (0); 1139 1140 /* The address was invalid. */ 1141 if (owner == -1) 1142 return (EFAULT); 1143 1144 error = thread_check_susp(td, false); 1145 if (error != 0) 1146 break; 1147 1148 /* If this failed the lock has changed, restart. */ 1149 continue; 1150 } 1151 1152 /* 1153 * If we caught a signal, we have retried and now 1154 * exit immediately. 1155 */ 1156 if (error != 0) 1157 return (error); 1158 1159 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1160 AUTO_SHARE, &uq->uq_key)) != 0) 1161 return (error); 1162 1163 umtxq_lock(&uq->uq_key); 1164 umtxq_busy(&uq->uq_key); 1165 umtxq_insert(uq); 1166 umtxq_unbusy(&uq->uq_key); 1167 umtxq_unlock(&uq->uq_key); 1168 1169 /* 1170 * Set the contested bit so that a release in user space 1171 * knows to use the system call for unlock. If this fails 1172 * either some one else has acquired the lock or it has been 1173 * released. 1174 */ 1175 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1176 1177 /* The address was invalid. */ 1178 if (old == -1) { 1179 umtxq_lock(&uq->uq_key); 1180 umtxq_remove(uq); 1181 umtxq_unlock(&uq->uq_key); 1182 umtx_key_release(&uq->uq_key); 1183 return (EFAULT); 1184 } 1185 1186 /* 1187 * We set the contested bit, sleep. Otherwise the lock changed 1188 * and we need to retry or we lost a race to the thread 1189 * unlocking the umtx. 1190 */ 1191 umtxq_lock(&uq->uq_key); 1192 if (old == owner) 1193 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1194 NULL : &timo); 1195 umtxq_remove(uq); 1196 umtxq_unlock(&uq->uq_key); 1197 umtx_key_release(&uq->uq_key); 1198 1199 if (error == 0) 1200 error = thread_check_susp(td, false); 1201 } 1202 1203 if (timeout == NULL) { 1204 /* Mutex locking is restarted if it is interrupted. */ 1205 if (error == EINTR) 1206 error = ERESTART; 1207 } else { 1208 /* Timed-locking is not restarted. */ 1209 if (error == ERESTART) 1210 error = EINTR; 1211 } 1212 return (error); 1213 } 1214 1215 /* 1216 * Unlock a umtx object. 1217 */ 1218 static int 1219 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1220 { 1221 struct umtx_key key; 1222 uint32_t owner; 1223 uint32_t old; 1224 int error; 1225 int count; 1226 1227 /* 1228 * Make sure we own this mtx. 1229 */ 1230 owner = fuword32(m); 1231 if (owner == -1) 1232 return (EFAULT); 1233 1234 if ((owner & ~UMUTEX_CONTESTED) != id) 1235 return (EPERM); 1236 1237 /* This should be done in userland */ 1238 if ((owner & UMUTEX_CONTESTED) == 0) { 1239 old = casuword32(m, owner, UMUTEX_UNOWNED); 1240 if (old == -1) 1241 return (EFAULT); 1242 if (old == owner) 1243 return (0); 1244 owner = old; 1245 } 1246 1247 /* We should only ever be in here for contested locks */ 1248 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1249 &key)) != 0) 1250 return (error); 1251 1252 umtxq_lock(&key); 1253 umtxq_busy(&key); 1254 count = umtxq_count(&key); 1255 umtxq_unlock(&key); 1256 1257 /* 1258 * When unlocking the umtx, it must be marked as unowned if 1259 * there is zero or one thread only waiting for it. 1260 * Otherwise, it must be marked as contested. 1261 */ 1262 old = casuword32(m, owner, 1263 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1264 umtxq_lock(&key); 1265 umtxq_signal(&key,1); 1266 umtxq_unbusy(&key); 1267 umtxq_unlock(&key); 1268 umtx_key_release(&key); 1269 if (old == -1) 1270 return (EFAULT); 1271 if (old != owner) 1272 return (EINVAL); 1273 return (0); 1274 } 1275 #endif /* COMPAT_FREEBSD32 */ 1276 #endif /* COMPAT_FREEBSD10 */ 1277 1278 /* 1279 * Fetch and compare value, sleep on the address if value is not changed. 1280 */ 1281 static int 1282 do_wait(struct thread *td, void *addr, u_long id, 1283 struct _umtx_time *timeout, int compat32, int is_private) 1284 { 1285 struct umtx_abs_timeout timo; 1286 struct umtx_q *uq; 1287 u_long tmp; 1288 uint32_t tmp32; 1289 int error = 0; 1290 1291 uq = td->td_umtxq; 1292 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1293 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1294 return (error); 1295 1296 if (timeout != NULL) 1297 umtx_abs_timeout_init2(&timo, timeout); 1298 1299 umtxq_lock(&uq->uq_key); 1300 umtxq_insert(uq); 1301 umtxq_unlock(&uq->uq_key); 1302 if (compat32 == 0) { 1303 error = fueword(addr, &tmp); 1304 if (error != 0) 1305 error = EFAULT; 1306 } else { 1307 error = fueword32(addr, &tmp32); 1308 if (error == 0) 1309 tmp = tmp32; 1310 else 1311 error = EFAULT; 1312 } 1313 umtxq_lock(&uq->uq_key); 1314 if (error == 0) { 1315 if (tmp == id) 1316 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1317 NULL : &timo); 1318 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1319 error = 0; 1320 else 1321 umtxq_remove(uq); 1322 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1323 umtxq_remove(uq); 1324 } 1325 umtxq_unlock(&uq->uq_key); 1326 umtx_key_release(&uq->uq_key); 1327 if (error == ERESTART) 1328 error = EINTR; 1329 return (error); 1330 } 1331 1332 /* 1333 * Wake up threads sleeping on the specified address. 1334 */ 1335 int 1336 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1337 { 1338 struct umtx_key key; 1339 int ret; 1340 1341 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1342 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1343 return (ret); 1344 umtxq_lock(&key); 1345 umtxq_signal(&key, n_wake); 1346 umtxq_unlock(&key); 1347 umtx_key_release(&key); 1348 return (0); 1349 } 1350 1351 /* 1352 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1353 */ 1354 static int 1355 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1356 struct _umtx_time *timeout, int mode) 1357 { 1358 struct umtx_abs_timeout timo; 1359 struct umtx_q *uq; 1360 uint32_t owner, old, id; 1361 int error, rv; 1362 1363 id = td->td_tid; 1364 uq = td->td_umtxq; 1365 error = 0; 1366 if (timeout != NULL) 1367 umtx_abs_timeout_init2(&timo, timeout); 1368 1369 /* 1370 * Care must be exercised when dealing with umtx structure. It 1371 * can fault on any access. 1372 */ 1373 for (;;) { 1374 rv = fueword32(&m->m_owner, &owner); 1375 if (rv == -1) 1376 return (EFAULT); 1377 if (mode == _UMUTEX_WAIT) { 1378 if (owner == UMUTEX_UNOWNED || 1379 owner == UMUTEX_CONTESTED || 1380 owner == UMUTEX_RB_OWNERDEAD || 1381 owner == UMUTEX_RB_NOTRECOV) 1382 return (0); 1383 } else { 1384 /* 1385 * Robust mutex terminated. Kernel duty is to 1386 * return EOWNERDEAD to the userspace. The 1387 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1388 * by the common userspace code. 1389 */ 1390 if (owner == UMUTEX_RB_OWNERDEAD) { 1391 rv = casueword32(&m->m_owner, 1392 UMUTEX_RB_OWNERDEAD, &owner, 1393 id | UMUTEX_CONTESTED); 1394 if (rv == -1) 1395 return (EFAULT); 1396 if (rv == 0) { 1397 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1398 return (EOWNERDEAD); /* success */ 1399 } 1400 MPASS(rv == 1); 1401 rv = thread_check_susp(td, false); 1402 if (rv != 0) 1403 return (rv); 1404 continue; 1405 } 1406 if (owner == UMUTEX_RB_NOTRECOV) 1407 return (ENOTRECOVERABLE); 1408 1409 /* 1410 * Try the uncontested case. This should be 1411 * done in userland. 1412 */ 1413 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1414 &owner, id); 1415 /* The address was invalid. */ 1416 if (rv == -1) 1417 return (EFAULT); 1418 1419 /* The acquire succeeded. */ 1420 if (rv == 0) { 1421 MPASS(owner == UMUTEX_UNOWNED); 1422 return (0); 1423 } 1424 1425 /* 1426 * If no one owns it but it is contested try 1427 * to acquire it. 1428 */ 1429 MPASS(rv == 1); 1430 if (owner == UMUTEX_CONTESTED) { 1431 rv = casueword32(&m->m_owner, 1432 UMUTEX_CONTESTED, &owner, 1433 id | UMUTEX_CONTESTED); 1434 /* The address was invalid. */ 1435 if (rv == -1) 1436 return (EFAULT); 1437 if (rv == 0) { 1438 MPASS(owner == UMUTEX_CONTESTED); 1439 return (0); 1440 } 1441 if (rv == 1) { 1442 rv = thread_check_susp(td, false); 1443 if (rv != 0) 1444 return (rv); 1445 } 1446 1447 /* 1448 * If this failed the lock has 1449 * changed, restart. 1450 */ 1451 continue; 1452 } 1453 1454 /* rv == 1 but not contested, likely store failure */ 1455 rv = thread_check_susp(td, false); 1456 if (rv != 0) 1457 return (rv); 1458 } 1459 1460 if (mode == _UMUTEX_TRY) 1461 return (EBUSY); 1462 1463 /* 1464 * If we caught a signal, we have retried and now 1465 * exit immediately. 1466 */ 1467 if (error != 0) 1468 return (error); 1469 1470 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1471 GET_SHARE(flags), &uq->uq_key)) != 0) 1472 return (error); 1473 1474 umtxq_lock(&uq->uq_key); 1475 umtxq_busy(&uq->uq_key); 1476 umtxq_insert(uq); 1477 umtxq_unlock(&uq->uq_key); 1478 1479 /* 1480 * Set the contested bit so that a release in user space 1481 * knows to use the system call for unlock. If this fails 1482 * either some one else has acquired the lock or it has been 1483 * released. 1484 */ 1485 rv = casueword32(&m->m_owner, owner, &old, 1486 owner | UMUTEX_CONTESTED); 1487 1488 /* The address was invalid or casueword failed to store. */ 1489 if (rv == -1 || rv == 1) { 1490 umtxq_lock(&uq->uq_key); 1491 umtxq_remove(uq); 1492 umtxq_unbusy(&uq->uq_key); 1493 umtxq_unlock(&uq->uq_key); 1494 umtx_key_release(&uq->uq_key); 1495 if (rv == -1) 1496 return (EFAULT); 1497 if (rv == 1) { 1498 rv = thread_check_susp(td, false); 1499 if (rv != 0) 1500 return (rv); 1501 } 1502 continue; 1503 } 1504 1505 /* 1506 * We set the contested bit, sleep. Otherwise the lock changed 1507 * and we need to retry or we lost a race to the thread 1508 * unlocking the umtx. 1509 */ 1510 umtxq_lock(&uq->uq_key); 1511 umtxq_unbusy(&uq->uq_key); 1512 MPASS(old == owner); 1513 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1514 NULL : &timo); 1515 umtxq_remove(uq); 1516 umtxq_unlock(&uq->uq_key); 1517 umtx_key_release(&uq->uq_key); 1518 1519 if (error == 0) 1520 error = thread_check_susp(td, false); 1521 } 1522 1523 return (0); 1524 } 1525 1526 /* 1527 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1528 */ 1529 static int 1530 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1531 { 1532 struct umtx_key key; 1533 uint32_t owner, old, id, newlock; 1534 int error, count; 1535 1536 id = td->td_tid; 1537 1538 again: 1539 /* 1540 * Make sure we own this mtx. 1541 */ 1542 error = fueword32(&m->m_owner, &owner); 1543 if (error == -1) 1544 return (EFAULT); 1545 1546 if ((owner & ~UMUTEX_CONTESTED) != id) 1547 return (EPERM); 1548 1549 newlock = umtx_unlock_val(flags, rb); 1550 if ((owner & UMUTEX_CONTESTED) == 0) { 1551 error = casueword32(&m->m_owner, owner, &old, newlock); 1552 if (error == -1) 1553 return (EFAULT); 1554 if (error == 1) { 1555 error = thread_check_susp(td, false); 1556 if (error != 0) 1557 return (error); 1558 goto again; 1559 } 1560 MPASS(old == owner); 1561 return (0); 1562 } 1563 1564 /* We should only ever be in here for contested locks */ 1565 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1566 &key)) != 0) 1567 return (error); 1568 1569 umtxq_lock(&key); 1570 umtxq_busy(&key); 1571 count = umtxq_count(&key); 1572 umtxq_unlock(&key); 1573 1574 /* 1575 * When unlocking the umtx, it must be marked as unowned if 1576 * there is zero or one thread only waiting for it. 1577 * Otherwise, it must be marked as contested. 1578 */ 1579 if (count > 1) 1580 newlock |= UMUTEX_CONTESTED; 1581 error = casueword32(&m->m_owner, owner, &old, newlock); 1582 umtxq_lock(&key); 1583 umtxq_signal(&key, 1); 1584 umtxq_unbusy(&key); 1585 umtxq_unlock(&key); 1586 umtx_key_release(&key); 1587 if (error == -1) 1588 return (EFAULT); 1589 if (error == 1) { 1590 if (old != owner) 1591 return (EINVAL); 1592 error = thread_check_susp(td, false); 1593 if (error != 0) 1594 return (error); 1595 goto again; 1596 } 1597 return (0); 1598 } 1599 1600 /* 1601 * Check if the mutex is available and wake up a waiter, 1602 * only for simple mutex. 1603 */ 1604 static int 1605 do_wake_umutex(struct thread *td, struct umutex *m) 1606 { 1607 struct umtx_key key; 1608 uint32_t owner; 1609 uint32_t flags; 1610 int error; 1611 int count; 1612 1613 again: 1614 error = fueword32(&m->m_owner, &owner); 1615 if (error == -1) 1616 return (EFAULT); 1617 1618 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1619 owner != UMUTEX_RB_NOTRECOV) 1620 return (0); 1621 1622 error = fueword32(&m->m_flags, &flags); 1623 if (error == -1) 1624 return (EFAULT); 1625 1626 /* We should only ever be in here for contested locks */ 1627 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1628 &key)) != 0) 1629 return (error); 1630 1631 umtxq_lock(&key); 1632 umtxq_busy(&key); 1633 count = umtxq_count(&key); 1634 umtxq_unlock(&key); 1635 1636 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1637 owner != UMUTEX_RB_NOTRECOV) { 1638 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1639 UMUTEX_UNOWNED); 1640 if (error == -1) { 1641 error = EFAULT; 1642 } else if (error == 1) { 1643 umtxq_lock(&key); 1644 umtxq_unbusy(&key); 1645 umtxq_unlock(&key); 1646 umtx_key_release(&key); 1647 error = thread_check_susp(td, false); 1648 if (error != 0) 1649 return (error); 1650 goto again; 1651 } 1652 } 1653 1654 umtxq_lock(&key); 1655 if (error == 0 && count != 0) { 1656 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1657 owner == UMUTEX_RB_OWNERDEAD || 1658 owner == UMUTEX_RB_NOTRECOV); 1659 umtxq_signal(&key, 1); 1660 } 1661 umtxq_unbusy(&key); 1662 umtxq_unlock(&key); 1663 umtx_key_release(&key); 1664 return (error); 1665 } 1666 1667 /* 1668 * Check if the mutex has waiters and tries to fix contention bit. 1669 */ 1670 static int 1671 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1672 { 1673 struct umtx_key key; 1674 uint32_t owner, old; 1675 int type; 1676 int error; 1677 int count; 1678 1679 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1680 UMUTEX_ROBUST)) { 1681 case 0: 1682 case UMUTEX_ROBUST: 1683 type = TYPE_NORMAL_UMUTEX; 1684 break; 1685 case UMUTEX_PRIO_INHERIT: 1686 type = TYPE_PI_UMUTEX; 1687 break; 1688 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1689 type = TYPE_PI_ROBUST_UMUTEX; 1690 break; 1691 case UMUTEX_PRIO_PROTECT: 1692 type = TYPE_PP_UMUTEX; 1693 break; 1694 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1695 type = TYPE_PP_ROBUST_UMUTEX; 1696 break; 1697 default: 1698 return (EINVAL); 1699 } 1700 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1701 return (error); 1702 1703 owner = 0; 1704 umtxq_lock(&key); 1705 umtxq_busy(&key); 1706 count = umtxq_count(&key); 1707 umtxq_unlock(&key); 1708 1709 error = fueword32(&m->m_owner, &owner); 1710 if (error == -1) 1711 error = EFAULT; 1712 1713 /* 1714 * Only repair contention bit if there is a waiter, this means 1715 * the mutex is still being referenced by userland code, 1716 * otherwise don't update any memory. 1717 */ 1718 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1719 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1720 error = casueword32(&m->m_owner, owner, &old, 1721 owner | UMUTEX_CONTESTED); 1722 if (error == -1) { 1723 error = EFAULT; 1724 break; 1725 } 1726 if (error == 0) { 1727 MPASS(old == owner); 1728 break; 1729 } 1730 owner = old; 1731 error = thread_check_susp(td, false); 1732 } 1733 1734 umtxq_lock(&key); 1735 if (error == EFAULT) { 1736 umtxq_signal(&key, INT_MAX); 1737 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1738 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1739 umtxq_signal(&key, 1); 1740 umtxq_unbusy(&key); 1741 umtxq_unlock(&key); 1742 umtx_key_release(&key); 1743 return (error); 1744 } 1745 1746 struct umtx_pi * 1747 umtx_pi_alloc(int flags) 1748 { 1749 struct umtx_pi *pi; 1750 1751 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1752 if (pi == NULL) 1753 return (NULL); 1754 1755 TAILQ_INIT(&pi->pi_blocked); 1756 atomic_add_int(&umtx_pi_allocated, 1); 1757 return (pi); 1758 } 1759 1760 void 1761 umtx_pi_free(struct umtx_pi *pi) 1762 { 1763 uma_zfree(umtx_pi_zone, pi); 1764 atomic_add_int(&umtx_pi_allocated, -1); 1765 } 1766 1767 /* 1768 * Adjust the thread's position on a pi_state after its priority has been 1769 * changed. 1770 */ 1771 static int 1772 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1773 { 1774 struct umtx_q *uq, *uq1, *uq2; 1775 struct thread *td1; 1776 1777 mtx_assert(&umtx_lock, MA_OWNED); 1778 if (pi == NULL) 1779 return (0); 1780 1781 uq = td->td_umtxq; 1782 1783 /* 1784 * Check if the thread needs to be moved on the blocked chain. 1785 * It needs to be moved if either its priority is lower than 1786 * the previous thread or higher than the next thread. 1787 */ 1788 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1789 uq2 = TAILQ_NEXT(uq, uq_lockq); 1790 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1791 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1792 /* 1793 * Remove thread from blocked chain and determine where 1794 * it should be moved to. 1795 */ 1796 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1797 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1798 td1 = uq1->uq_thread; 1799 MPASS(td1->td_proc->p_magic == P_MAGIC); 1800 if (UPRI(td1) > UPRI(td)) 1801 break; 1802 } 1803 1804 if (uq1 == NULL) 1805 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1806 else 1807 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1808 } 1809 return (1); 1810 } 1811 1812 static struct umtx_pi * 1813 umtx_pi_next(struct umtx_pi *pi) 1814 { 1815 struct umtx_q *uq_owner; 1816 1817 if (pi->pi_owner == NULL) 1818 return (NULL); 1819 uq_owner = pi->pi_owner->td_umtxq; 1820 if (uq_owner == NULL) 1821 return (NULL); 1822 return (uq_owner->uq_pi_blocked); 1823 } 1824 1825 /* 1826 * Floyd's Cycle-Finding Algorithm. 1827 */ 1828 static bool 1829 umtx_pi_check_loop(struct umtx_pi *pi) 1830 { 1831 struct umtx_pi *pi1; /* fast iterator */ 1832 1833 mtx_assert(&umtx_lock, MA_OWNED); 1834 if (pi == NULL) 1835 return (false); 1836 pi1 = pi; 1837 for (;;) { 1838 pi = umtx_pi_next(pi); 1839 if (pi == NULL) 1840 break; 1841 pi1 = umtx_pi_next(pi1); 1842 if (pi1 == NULL) 1843 break; 1844 pi1 = umtx_pi_next(pi1); 1845 if (pi1 == NULL) 1846 break; 1847 if (pi == pi1) 1848 return (true); 1849 } 1850 return (false); 1851 } 1852 1853 /* 1854 * Propagate priority when a thread is blocked on POSIX 1855 * PI mutex. 1856 */ 1857 static void 1858 umtx_propagate_priority(struct thread *td) 1859 { 1860 struct umtx_q *uq; 1861 struct umtx_pi *pi; 1862 int pri; 1863 1864 mtx_assert(&umtx_lock, MA_OWNED); 1865 pri = UPRI(td); 1866 uq = td->td_umtxq; 1867 pi = uq->uq_pi_blocked; 1868 if (pi == NULL) 1869 return; 1870 if (umtx_pi_check_loop(pi)) 1871 return; 1872 1873 for (;;) { 1874 td = pi->pi_owner; 1875 if (td == NULL || td == curthread) 1876 return; 1877 1878 MPASS(td->td_proc != NULL); 1879 MPASS(td->td_proc->p_magic == P_MAGIC); 1880 1881 thread_lock(td); 1882 if (td->td_lend_user_pri > pri) 1883 sched_lend_user_prio(td, pri); 1884 else { 1885 thread_unlock(td); 1886 break; 1887 } 1888 thread_unlock(td); 1889 1890 /* 1891 * Pick up the lock that td is blocked on. 1892 */ 1893 uq = td->td_umtxq; 1894 pi = uq->uq_pi_blocked; 1895 if (pi == NULL) 1896 break; 1897 /* Resort td on the list if needed. */ 1898 umtx_pi_adjust_thread(pi, td); 1899 } 1900 } 1901 1902 /* 1903 * Unpropagate priority for a PI mutex when a thread blocked on 1904 * it is interrupted by signal or resumed by others. 1905 */ 1906 static void 1907 umtx_repropagate_priority(struct umtx_pi *pi) 1908 { 1909 struct umtx_q *uq, *uq_owner; 1910 struct umtx_pi *pi2; 1911 int pri; 1912 1913 mtx_assert(&umtx_lock, MA_OWNED); 1914 1915 if (umtx_pi_check_loop(pi)) 1916 return; 1917 while (pi != NULL && pi->pi_owner != NULL) { 1918 pri = PRI_MAX; 1919 uq_owner = pi->pi_owner->td_umtxq; 1920 1921 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1922 uq = TAILQ_FIRST(&pi2->pi_blocked); 1923 if (uq != NULL) { 1924 if (pri > UPRI(uq->uq_thread)) 1925 pri = UPRI(uq->uq_thread); 1926 } 1927 } 1928 1929 if (pri > uq_owner->uq_inherited_pri) 1930 pri = uq_owner->uq_inherited_pri; 1931 thread_lock(pi->pi_owner); 1932 sched_lend_user_prio(pi->pi_owner, pri); 1933 thread_unlock(pi->pi_owner); 1934 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1935 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1936 } 1937 } 1938 1939 /* 1940 * Insert a PI mutex into owned list. 1941 */ 1942 static void 1943 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1944 { 1945 struct umtx_q *uq_owner; 1946 1947 uq_owner = owner->td_umtxq; 1948 mtx_assert(&umtx_lock, MA_OWNED); 1949 MPASS(pi->pi_owner == NULL); 1950 pi->pi_owner = owner; 1951 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1952 } 1953 1954 /* 1955 * Disown a PI mutex, and remove it from the owned list. 1956 */ 1957 static void 1958 umtx_pi_disown(struct umtx_pi *pi) 1959 { 1960 1961 mtx_assert(&umtx_lock, MA_OWNED); 1962 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1963 pi->pi_owner = NULL; 1964 } 1965 1966 /* 1967 * Claim ownership of a PI mutex. 1968 */ 1969 int 1970 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1971 { 1972 struct umtx_q *uq; 1973 int pri; 1974 1975 mtx_lock(&umtx_lock); 1976 if (pi->pi_owner == owner) { 1977 mtx_unlock(&umtx_lock); 1978 return (0); 1979 } 1980 1981 if (pi->pi_owner != NULL) { 1982 /* 1983 * userland may have already messed the mutex, sigh. 1984 */ 1985 mtx_unlock(&umtx_lock); 1986 return (EPERM); 1987 } 1988 umtx_pi_setowner(pi, owner); 1989 uq = TAILQ_FIRST(&pi->pi_blocked); 1990 if (uq != NULL) { 1991 pri = UPRI(uq->uq_thread); 1992 thread_lock(owner); 1993 if (pri < UPRI(owner)) 1994 sched_lend_user_prio(owner, pri); 1995 thread_unlock(owner); 1996 } 1997 mtx_unlock(&umtx_lock); 1998 return (0); 1999 } 2000 2001 /* 2002 * Adjust a thread's order position in its blocked PI mutex, 2003 * this may result new priority propagating process. 2004 */ 2005 void 2006 umtx_pi_adjust(struct thread *td, u_char oldpri) 2007 { 2008 struct umtx_q *uq; 2009 struct umtx_pi *pi; 2010 2011 uq = td->td_umtxq; 2012 mtx_lock(&umtx_lock); 2013 /* 2014 * Pick up the lock that td is blocked on. 2015 */ 2016 pi = uq->uq_pi_blocked; 2017 if (pi != NULL) { 2018 umtx_pi_adjust_thread(pi, td); 2019 umtx_repropagate_priority(pi); 2020 } 2021 mtx_unlock(&umtx_lock); 2022 } 2023 2024 /* 2025 * Sleep on a PI mutex. 2026 */ 2027 int 2028 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2029 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2030 { 2031 struct thread *td, *td1; 2032 struct umtx_q *uq1; 2033 int error, pri; 2034 #ifdef INVARIANTS 2035 struct umtxq_chain *uc; 2036 2037 uc = umtxq_getchain(&pi->pi_key); 2038 #endif 2039 error = 0; 2040 td = uq->uq_thread; 2041 KASSERT(td == curthread, ("inconsistent uq_thread")); 2042 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2043 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2044 umtxq_insert(uq); 2045 mtx_lock(&umtx_lock); 2046 if (pi->pi_owner == NULL) { 2047 mtx_unlock(&umtx_lock); 2048 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2049 mtx_lock(&umtx_lock); 2050 if (td1 != NULL) { 2051 if (pi->pi_owner == NULL) 2052 umtx_pi_setowner(pi, td1); 2053 PROC_UNLOCK(td1->td_proc); 2054 } 2055 } 2056 2057 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2058 pri = UPRI(uq1->uq_thread); 2059 if (pri > UPRI(td)) 2060 break; 2061 } 2062 2063 if (uq1 != NULL) 2064 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2065 else 2066 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2067 2068 uq->uq_pi_blocked = pi; 2069 thread_lock(td); 2070 td->td_flags |= TDF_UPIBLOCKED; 2071 thread_unlock(td); 2072 umtx_propagate_priority(td); 2073 mtx_unlock(&umtx_lock); 2074 umtxq_unbusy(&uq->uq_key); 2075 2076 error = umtxq_sleep(uq, wmesg, timo); 2077 umtxq_remove(uq); 2078 2079 mtx_lock(&umtx_lock); 2080 uq->uq_pi_blocked = NULL; 2081 thread_lock(td); 2082 td->td_flags &= ~TDF_UPIBLOCKED; 2083 thread_unlock(td); 2084 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2085 umtx_repropagate_priority(pi); 2086 mtx_unlock(&umtx_lock); 2087 umtxq_unlock(&uq->uq_key); 2088 2089 return (error); 2090 } 2091 2092 /* 2093 * Add reference count for a PI mutex. 2094 */ 2095 void 2096 umtx_pi_ref(struct umtx_pi *pi) 2097 { 2098 2099 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2100 pi->pi_refcount++; 2101 } 2102 2103 /* 2104 * Decrease reference count for a PI mutex, if the counter 2105 * is decreased to zero, its memory space is freed. 2106 */ 2107 void 2108 umtx_pi_unref(struct umtx_pi *pi) 2109 { 2110 struct umtxq_chain *uc; 2111 2112 uc = umtxq_getchain(&pi->pi_key); 2113 UMTXQ_LOCKED_ASSERT(uc); 2114 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2115 if (--pi->pi_refcount == 0) { 2116 mtx_lock(&umtx_lock); 2117 if (pi->pi_owner != NULL) 2118 umtx_pi_disown(pi); 2119 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2120 ("blocked queue not empty")); 2121 mtx_unlock(&umtx_lock); 2122 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2123 umtx_pi_free(pi); 2124 } 2125 } 2126 2127 /* 2128 * Find a PI mutex in hash table. 2129 */ 2130 struct umtx_pi * 2131 umtx_pi_lookup(struct umtx_key *key) 2132 { 2133 struct umtxq_chain *uc; 2134 struct umtx_pi *pi; 2135 2136 uc = umtxq_getchain(key); 2137 UMTXQ_LOCKED_ASSERT(uc); 2138 2139 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2140 if (umtx_key_match(&pi->pi_key, key)) { 2141 return (pi); 2142 } 2143 } 2144 return (NULL); 2145 } 2146 2147 /* 2148 * Insert a PI mutex into hash table. 2149 */ 2150 void 2151 umtx_pi_insert(struct umtx_pi *pi) 2152 { 2153 struct umtxq_chain *uc; 2154 2155 uc = umtxq_getchain(&pi->pi_key); 2156 UMTXQ_LOCKED_ASSERT(uc); 2157 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2158 } 2159 2160 /* 2161 * Drop a PI mutex and wakeup a top waiter. 2162 */ 2163 int 2164 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2165 { 2166 struct umtx_q *uq_first, *uq_first2, *uq_me; 2167 struct umtx_pi *pi, *pi2; 2168 int pri; 2169 2170 UMTXQ_ASSERT_LOCKED_BUSY(key); 2171 *count = umtxq_count_pi(key, &uq_first); 2172 if (uq_first != NULL) { 2173 mtx_lock(&umtx_lock); 2174 pi = uq_first->uq_pi_blocked; 2175 KASSERT(pi != NULL, ("pi == NULL?")); 2176 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2177 mtx_unlock(&umtx_lock); 2178 /* userland messed the mutex */ 2179 return (EPERM); 2180 } 2181 uq_me = td->td_umtxq; 2182 if (pi->pi_owner == td) 2183 umtx_pi_disown(pi); 2184 /* get highest priority thread which is still sleeping. */ 2185 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2186 while (uq_first != NULL && 2187 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2188 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2189 } 2190 pri = PRI_MAX; 2191 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2192 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2193 if (uq_first2 != NULL) { 2194 if (pri > UPRI(uq_first2->uq_thread)) 2195 pri = UPRI(uq_first2->uq_thread); 2196 } 2197 } 2198 thread_lock(td); 2199 sched_lend_user_prio(td, pri); 2200 thread_unlock(td); 2201 mtx_unlock(&umtx_lock); 2202 if (uq_first) 2203 umtxq_signal_thread(uq_first); 2204 } else { 2205 pi = umtx_pi_lookup(key); 2206 /* 2207 * A umtx_pi can exist if a signal or timeout removed the 2208 * last waiter from the umtxq, but there is still 2209 * a thread in do_lock_pi() holding the umtx_pi. 2210 */ 2211 if (pi != NULL) { 2212 /* 2213 * The umtx_pi can be unowned, such as when a thread 2214 * has just entered do_lock_pi(), allocated the 2215 * umtx_pi, and unlocked the umtxq. 2216 * If the current thread owns it, it must disown it. 2217 */ 2218 mtx_lock(&umtx_lock); 2219 if (pi->pi_owner == td) 2220 umtx_pi_disown(pi); 2221 mtx_unlock(&umtx_lock); 2222 } 2223 } 2224 return (0); 2225 } 2226 2227 /* 2228 * Lock a PI mutex. 2229 */ 2230 static int 2231 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2232 struct _umtx_time *timeout, int try) 2233 { 2234 struct umtx_abs_timeout timo; 2235 struct umtx_q *uq; 2236 struct umtx_pi *pi, *new_pi; 2237 uint32_t id, old_owner, owner, old; 2238 int error, rv; 2239 2240 id = td->td_tid; 2241 uq = td->td_umtxq; 2242 2243 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2244 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2245 &uq->uq_key)) != 0) 2246 return (error); 2247 2248 if (timeout != NULL) 2249 umtx_abs_timeout_init2(&timo, timeout); 2250 2251 umtxq_lock(&uq->uq_key); 2252 pi = umtx_pi_lookup(&uq->uq_key); 2253 if (pi == NULL) { 2254 new_pi = umtx_pi_alloc(M_NOWAIT); 2255 if (new_pi == NULL) { 2256 umtxq_unlock(&uq->uq_key); 2257 new_pi = umtx_pi_alloc(M_WAITOK); 2258 umtxq_lock(&uq->uq_key); 2259 pi = umtx_pi_lookup(&uq->uq_key); 2260 if (pi != NULL) { 2261 umtx_pi_free(new_pi); 2262 new_pi = NULL; 2263 } 2264 } 2265 if (new_pi != NULL) { 2266 new_pi->pi_key = uq->uq_key; 2267 umtx_pi_insert(new_pi); 2268 pi = new_pi; 2269 } 2270 } 2271 umtx_pi_ref(pi); 2272 umtxq_unlock(&uq->uq_key); 2273 2274 /* 2275 * Care must be exercised when dealing with umtx structure. It 2276 * can fault on any access. 2277 */ 2278 for (;;) { 2279 /* 2280 * Try the uncontested case. This should be done in userland. 2281 */ 2282 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2283 /* The address was invalid. */ 2284 if (rv == -1) { 2285 error = EFAULT; 2286 break; 2287 } 2288 /* The acquire succeeded. */ 2289 if (rv == 0) { 2290 MPASS(owner == UMUTEX_UNOWNED); 2291 error = 0; 2292 break; 2293 } 2294 2295 if (owner == UMUTEX_RB_NOTRECOV) { 2296 error = ENOTRECOVERABLE; 2297 break; 2298 } 2299 2300 /* 2301 * Nobody owns it, but the acquire failed. This can happen 2302 * with ll/sc atomics. 2303 */ 2304 if (owner == UMUTEX_UNOWNED) { 2305 error = thread_check_susp(td, true); 2306 if (error != 0) 2307 break; 2308 continue; 2309 } 2310 2311 /* 2312 * Avoid overwriting a possible error from sleep due 2313 * to the pending signal with suspension check result. 2314 */ 2315 if (error == 0) { 2316 error = thread_check_susp(td, true); 2317 if (error != 0) 2318 break; 2319 } 2320 2321 /* If no one owns it but it is contested try to acquire it. */ 2322 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2323 old_owner = owner; 2324 rv = casueword32(&m->m_owner, owner, &owner, 2325 id | UMUTEX_CONTESTED); 2326 /* The address was invalid. */ 2327 if (rv == -1) { 2328 error = EFAULT; 2329 break; 2330 } 2331 if (rv == 1) { 2332 if (error == 0) { 2333 error = thread_check_susp(td, true); 2334 if (error != 0) 2335 break; 2336 } 2337 2338 /* 2339 * If this failed the lock could 2340 * changed, restart. 2341 */ 2342 continue; 2343 } 2344 2345 MPASS(rv == 0); 2346 MPASS(owner == old_owner); 2347 umtxq_lock(&uq->uq_key); 2348 umtxq_busy(&uq->uq_key); 2349 error = umtx_pi_claim(pi, td); 2350 umtxq_unbusy(&uq->uq_key); 2351 umtxq_unlock(&uq->uq_key); 2352 if (error != 0) { 2353 /* 2354 * Since we're going to return an 2355 * error, restore the m_owner to its 2356 * previous, unowned state to avoid 2357 * compounding the problem. 2358 */ 2359 (void)casuword32(&m->m_owner, 2360 id | UMUTEX_CONTESTED, old_owner); 2361 } 2362 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2363 error = EOWNERDEAD; 2364 break; 2365 } 2366 2367 if ((owner & ~UMUTEX_CONTESTED) == id) { 2368 error = EDEADLK; 2369 break; 2370 } 2371 2372 if (try != 0) { 2373 error = EBUSY; 2374 break; 2375 } 2376 2377 /* 2378 * If we caught a signal, we have retried and now 2379 * exit immediately. 2380 */ 2381 if (error != 0) 2382 break; 2383 2384 umtxq_busy_unlocked(&uq->uq_key); 2385 2386 /* 2387 * Set the contested bit so that a release in user space 2388 * knows to use the system call for unlock. If this fails 2389 * either some one else has acquired the lock or it has been 2390 * released. 2391 */ 2392 rv = casueword32(&m->m_owner, owner, &old, owner | 2393 UMUTEX_CONTESTED); 2394 2395 /* The address was invalid. */ 2396 if (rv == -1) { 2397 umtxq_unbusy_unlocked(&uq->uq_key); 2398 error = EFAULT; 2399 break; 2400 } 2401 if (rv == 1) { 2402 umtxq_unbusy_unlocked(&uq->uq_key); 2403 error = thread_check_susp(td, true); 2404 if (error != 0) 2405 break; 2406 2407 /* 2408 * The lock changed and we need to retry or we 2409 * lost a race to the thread unlocking the 2410 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2411 * value for owner is impossible there. 2412 */ 2413 continue; 2414 } 2415 2416 umtxq_lock(&uq->uq_key); 2417 2418 /* We set the contested bit, sleep. */ 2419 MPASS(old == owner); 2420 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2421 "umtxpi", timeout == NULL ? NULL : &timo, 2422 (flags & USYNC_PROCESS_SHARED) != 0); 2423 if (error != 0) 2424 continue; 2425 2426 error = thread_check_susp(td, false); 2427 if (error != 0) 2428 break; 2429 } 2430 2431 umtxq_lock(&uq->uq_key); 2432 umtx_pi_unref(pi); 2433 umtxq_unlock(&uq->uq_key); 2434 2435 umtx_key_release(&uq->uq_key); 2436 return (error); 2437 } 2438 2439 /* 2440 * Unlock a PI mutex. 2441 */ 2442 static int 2443 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2444 { 2445 struct umtx_key key; 2446 uint32_t id, new_owner, old, owner; 2447 int count, error; 2448 2449 id = td->td_tid; 2450 2451 usrloop: 2452 /* 2453 * Make sure we own this mtx. 2454 */ 2455 error = fueword32(&m->m_owner, &owner); 2456 if (error == -1) 2457 return (EFAULT); 2458 2459 if ((owner & ~UMUTEX_CONTESTED) != id) 2460 return (EPERM); 2461 2462 new_owner = umtx_unlock_val(flags, rb); 2463 2464 /* This should be done in userland */ 2465 if ((owner & UMUTEX_CONTESTED) == 0) { 2466 error = casueword32(&m->m_owner, owner, &old, new_owner); 2467 if (error == -1) 2468 return (EFAULT); 2469 if (error == 1) { 2470 error = thread_check_susp(td, true); 2471 if (error != 0) 2472 return (error); 2473 goto usrloop; 2474 } 2475 if (old == owner) 2476 return (0); 2477 owner = old; 2478 } 2479 2480 /* We should only ever be in here for contested locks */ 2481 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2482 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2483 &key)) != 0) 2484 return (error); 2485 2486 umtxq_lock(&key); 2487 umtxq_busy(&key); 2488 error = umtx_pi_drop(td, &key, rb, &count); 2489 if (error != 0) { 2490 umtxq_unbusy(&key); 2491 umtxq_unlock(&key); 2492 umtx_key_release(&key); 2493 /* userland messed the mutex */ 2494 return (error); 2495 } 2496 umtxq_unlock(&key); 2497 2498 /* 2499 * When unlocking the umtx, it must be marked as unowned if 2500 * there is zero or one thread only waiting for it. 2501 * Otherwise, it must be marked as contested. 2502 */ 2503 2504 if (count > 1) 2505 new_owner |= UMUTEX_CONTESTED; 2506 again: 2507 error = casueword32(&m->m_owner, owner, &old, new_owner); 2508 if (error == 1) { 2509 error = thread_check_susp(td, false); 2510 if (error == 0) 2511 goto again; 2512 } 2513 umtxq_unbusy_unlocked(&key); 2514 umtx_key_release(&key); 2515 if (error == -1) 2516 return (EFAULT); 2517 if (error == 0 && old != owner) 2518 return (EINVAL); 2519 return (error); 2520 } 2521 2522 /* 2523 * Lock a PP mutex. 2524 */ 2525 static int 2526 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2527 struct _umtx_time *timeout, int try) 2528 { 2529 struct umtx_abs_timeout timo; 2530 struct umtx_q *uq, *uq2; 2531 struct umtx_pi *pi; 2532 uint32_t ceiling; 2533 uint32_t owner, id; 2534 int error, pri, old_inherited_pri, new_pri, rv; 2535 bool su; 2536 2537 id = td->td_tid; 2538 uq = td->td_umtxq; 2539 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2540 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2541 &uq->uq_key)) != 0) 2542 return (error); 2543 2544 if (timeout != NULL) 2545 umtx_abs_timeout_init2(&timo, timeout); 2546 2547 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2548 for (;;) { 2549 old_inherited_pri = uq->uq_inherited_pri; 2550 umtxq_busy_unlocked(&uq->uq_key); 2551 2552 rv = fueword32(&m->m_ceilings[0], &ceiling); 2553 if (rv == -1) { 2554 error = EFAULT; 2555 goto out; 2556 } 2557 ceiling = RTP_PRIO_MAX - ceiling; 2558 if (ceiling > RTP_PRIO_MAX) { 2559 error = EINVAL; 2560 goto out; 2561 } 2562 new_pri = PRI_MIN_REALTIME + ceiling; 2563 2564 if (td->td_base_user_pri < new_pri) { 2565 error = EINVAL; 2566 goto out; 2567 } 2568 if (su) { 2569 mtx_lock(&umtx_lock); 2570 if (new_pri < uq->uq_inherited_pri) { 2571 uq->uq_inherited_pri = new_pri; 2572 thread_lock(td); 2573 if (new_pri < UPRI(td)) 2574 sched_lend_user_prio(td, new_pri); 2575 thread_unlock(td); 2576 } 2577 mtx_unlock(&umtx_lock); 2578 } 2579 2580 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2581 id | UMUTEX_CONTESTED); 2582 /* The address was invalid. */ 2583 if (rv == -1) { 2584 error = EFAULT; 2585 break; 2586 } 2587 if (rv == 0) { 2588 MPASS(owner == UMUTEX_CONTESTED); 2589 error = 0; 2590 break; 2591 } 2592 /* rv == 1 */ 2593 if (owner == UMUTEX_RB_OWNERDEAD) { 2594 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2595 &owner, id | UMUTEX_CONTESTED); 2596 if (rv == -1) { 2597 error = EFAULT; 2598 break; 2599 } 2600 if (rv == 0) { 2601 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2602 error = EOWNERDEAD; /* success */ 2603 break; 2604 } 2605 2606 /* 2607 * rv == 1, only check for suspension if we 2608 * did not already catched a signal. If we 2609 * get an error from the check, the same 2610 * condition is checked by the umtxq_sleep() 2611 * call below, so we should obliterate the 2612 * error to not skip the last loop iteration. 2613 */ 2614 if (error == 0) { 2615 error = thread_check_susp(td, false); 2616 if (error == 0 && try == 0) { 2617 umtxq_unbusy_unlocked(&uq->uq_key); 2618 continue; 2619 } 2620 error = 0; 2621 } 2622 } else if (owner == UMUTEX_RB_NOTRECOV) { 2623 error = ENOTRECOVERABLE; 2624 } else if (owner == UMUTEX_CONTESTED) { 2625 /* Spurious failure, retry. */ 2626 umtxq_unbusy_unlocked(&uq->uq_key); 2627 continue; 2628 } 2629 2630 if (try != 0) 2631 error = EBUSY; 2632 2633 /* 2634 * If we caught a signal, we have retried and now 2635 * exit immediately. 2636 */ 2637 if (error != 0) 2638 break; 2639 2640 umtxq_lock(&uq->uq_key); 2641 umtxq_insert(uq); 2642 umtxq_unbusy(&uq->uq_key); 2643 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2644 NULL : &timo); 2645 umtxq_remove(uq); 2646 umtxq_unlock(&uq->uq_key); 2647 2648 mtx_lock(&umtx_lock); 2649 uq->uq_inherited_pri = old_inherited_pri; 2650 pri = PRI_MAX; 2651 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2652 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2653 if (uq2 != NULL) { 2654 if (pri > UPRI(uq2->uq_thread)) 2655 pri = UPRI(uq2->uq_thread); 2656 } 2657 } 2658 if (pri > uq->uq_inherited_pri) 2659 pri = uq->uq_inherited_pri; 2660 thread_lock(td); 2661 sched_lend_user_prio(td, pri); 2662 thread_unlock(td); 2663 mtx_unlock(&umtx_lock); 2664 } 2665 2666 if (error != 0 && error != EOWNERDEAD) { 2667 mtx_lock(&umtx_lock); 2668 uq->uq_inherited_pri = old_inherited_pri; 2669 pri = PRI_MAX; 2670 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2671 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2672 if (uq2 != NULL) { 2673 if (pri > UPRI(uq2->uq_thread)) 2674 pri = UPRI(uq2->uq_thread); 2675 } 2676 } 2677 if (pri > uq->uq_inherited_pri) 2678 pri = uq->uq_inherited_pri; 2679 thread_lock(td); 2680 sched_lend_user_prio(td, pri); 2681 thread_unlock(td); 2682 mtx_unlock(&umtx_lock); 2683 } 2684 2685 out: 2686 umtxq_unbusy_unlocked(&uq->uq_key); 2687 umtx_key_release(&uq->uq_key); 2688 return (error); 2689 } 2690 2691 /* 2692 * Unlock a PP mutex. 2693 */ 2694 static int 2695 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2696 { 2697 struct umtx_key key; 2698 struct umtx_q *uq, *uq2; 2699 struct umtx_pi *pi; 2700 uint32_t id, owner, rceiling; 2701 int error, pri, new_inherited_pri; 2702 bool su; 2703 2704 id = td->td_tid; 2705 uq = td->td_umtxq; 2706 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2707 2708 /* 2709 * Make sure we own this mtx. 2710 */ 2711 error = fueword32(&m->m_owner, &owner); 2712 if (error == -1) 2713 return (EFAULT); 2714 2715 if ((owner & ~UMUTEX_CONTESTED) != id) 2716 return (EPERM); 2717 2718 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2719 if (error != 0) 2720 return (error); 2721 2722 if (rceiling == -1) 2723 new_inherited_pri = PRI_MAX; 2724 else { 2725 rceiling = RTP_PRIO_MAX - rceiling; 2726 if (rceiling > RTP_PRIO_MAX) 2727 return (EINVAL); 2728 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2729 } 2730 2731 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2732 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2733 &key)) != 0) 2734 return (error); 2735 umtxq_busy_unlocked(&key); 2736 2737 /* 2738 * For priority protected mutex, always set unlocked state 2739 * to UMUTEX_CONTESTED, so that userland always enters kernel 2740 * to lock the mutex, it is necessary because thread priority 2741 * has to be adjusted for such mutex. 2742 */ 2743 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2744 UMUTEX_CONTESTED); 2745 2746 umtxq_lock(&key); 2747 if (error == 0) 2748 umtxq_signal(&key, 1); 2749 umtxq_unbusy(&key); 2750 umtxq_unlock(&key); 2751 2752 if (error == -1) 2753 error = EFAULT; 2754 else { 2755 mtx_lock(&umtx_lock); 2756 if (su || new_inherited_pri == PRI_MAX) 2757 uq->uq_inherited_pri = new_inherited_pri; 2758 pri = PRI_MAX; 2759 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2760 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2761 if (uq2 != NULL) { 2762 if (pri > UPRI(uq2->uq_thread)) 2763 pri = UPRI(uq2->uq_thread); 2764 } 2765 } 2766 if (pri > uq->uq_inherited_pri) 2767 pri = uq->uq_inherited_pri; 2768 thread_lock(td); 2769 sched_lend_user_prio(td, pri); 2770 thread_unlock(td); 2771 mtx_unlock(&umtx_lock); 2772 } 2773 umtx_key_release(&key); 2774 return (error); 2775 } 2776 2777 static int 2778 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2779 uint32_t *old_ceiling) 2780 { 2781 struct umtx_q *uq; 2782 uint32_t flags, id, owner, save_ceiling; 2783 int error, rv, rv1; 2784 2785 error = fueword32(&m->m_flags, &flags); 2786 if (error == -1) 2787 return (EFAULT); 2788 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2789 return (EINVAL); 2790 if (ceiling > RTP_PRIO_MAX) 2791 return (EINVAL); 2792 id = td->td_tid; 2793 uq = td->td_umtxq; 2794 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2795 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2796 &uq->uq_key)) != 0) 2797 return (error); 2798 for (;;) { 2799 umtxq_busy_unlocked(&uq->uq_key); 2800 2801 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2802 if (rv == -1) { 2803 error = EFAULT; 2804 break; 2805 } 2806 2807 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2808 id | UMUTEX_CONTESTED); 2809 if (rv == -1) { 2810 error = EFAULT; 2811 break; 2812 } 2813 2814 if (rv == 0) { 2815 MPASS(owner == UMUTEX_CONTESTED); 2816 rv = suword32(&m->m_ceilings[0], ceiling); 2817 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2818 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2819 break; 2820 } 2821 2822 if ((owner & ~UMUTEX_CONTESTED) == id) { 2823 rv = suword32(&m->m_ceilings[0], ceiling); 2824 error = rv == 0 ? 0 : EFAULT; 2825 break; 2826 } 2827 2828 if (owner == UMUTEX_RB_OWNERDEAD) { 2829 error = EOWNERDEAD; 2830 break; 2831 } else if (owner == UMUTEX_RB_NOTRECOV) { 2832 error = ENOTRECOVERABLE; 2833 break; 2834 } else if (owner == UMUTEX_CONTESTED) { 2835 /* Spurious failure, retry. */ 2836 umtxq_unbusy_unlocked(&uq->uq_key); 2837 continue; 2838 } 2839 2840 /* 2841 * If we caught a signal, we have retried and now 2842 * exit immediately. 2843 */ 2844 if (error != 0) 2845 break; 2846 2847 /* 2848 * We set the contested bit, sleep. Otherwise the lock changed 2849 * and we need to retry or we lost a race to the thread 2850 * unlocking the umtx. 2851 */ 2852 umtxq_lock(&uq->uq_key); 2853 umtxq_insert(uq); 2854 umtxq_unbusy(&uq->uq_key); 2855 error = umtxq_sleep(uq, "umtxpp", NULL); 2856 umtxq_remove(uq); 2857 umtxq_unlock(&uq->uq_key); 2858 } 2859 umtxq_lock(&uq->uq_key); 2860 if (error == 0) 2861 umtxq_signal(&uq->uq_key, INT_MAX); 2862 umtxq_unbusy(&uq->uq_key); 2863 umtxq_unlock(&uq->uq_key); 2864 umtx_key_release(&uq->uq_key); 2865 if (error == 0 && old_ceiling != NULL) { 2866 rv = suword32(old_ceiling, save_ceiling); 2867 error = rv == 0 ? 0 : EFAULT; 2868 } 2869 return (error); 2870 } 2871 2872 /* 2873 * Lock a userland POSIX mutex. 2874 */ 2875 static int 2876 do_lock_umutex(struct thread *td, struct umutex *m, 2877 struct _umtx_time *timeout, int mode) 2878 { 2879 uint32_t flags; 2880 int error; 2881 2882 error = fueword32(&m->m_flags, &flags); 2883 if (error == -1) 2884 return (EFAULT); 2885 2886 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2887 case 0: 2888 error = do_lock_normal(td, m, flags, timeout, mode); 2889 break; 2890 case UMUTEX_PRIO_INHERIT: 2891 error = do_lock_pi(td, m, flags, timeout, mode); 2892 break; 2893 case UMUTEX_PRIO_PROTECT: 2894 error = do_lock_pp(td, m, flags, timeout, mode); 2895 break; 2896 default: 2897 return (EINVAL); 2898 } 2899 if (timeout == NULL) { 2900 if (error == EINTR && mode != _UMUTEX_WAIT) 2901 error = ERESTART; 2902 } else { 2903 /* Timed-locking is not restarted. */ 2904 if (error == ERESTART) 2905 error = EINTR; 2906 } 2907 return (error); 2908 } 2909 2910 /* 2911 * Unlock a userland POSIX mutex. 2912 */ 2913 static int 2914 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2915 { 2916 uint32_t flags; 2917 int error; 2918 2919 error = fueword32(&m->m_flags, &flags); 2920 if (error == -1) 2921 return (EFAULT); 2922 2923 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2924 case 0: 2925 return (do_unlock_normal(td, m, flags, rb)); 2926 case UMUTEX_PRIO_INHERIT: 2927 return (do_unlock_pi(td, m, flags, rb)); 2928 case UMUTEX_PRIO_PROTECT: 2929 return (do_unlock_pp(td, m, flags, rb)); 2930 } 2931 2932 return (EINVAL); 2933 } 2934 2935 static int 2936 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2937 struct timespec *timeout, u_long wflags) 2938 { 2939 struct umtx_abs_timeout timo; 2940 struct umtx_q *uq; 2941 uint32_t flags, clockid, hasw; 2942 int error; 2943 2944 uq = td->td_umtxq; 2945 error = fueword32(&cv->c_flags, &flags); 2946 if (error == -1) 2947 return (EFAULT); 2948 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2949 if (error != 0) 2950 return (error); 2951 2952 if ((wflags & CVWAIT_CLOCKID) != 0) { 2953 error = fueword32(&cv->c_clockid, &clockid); 2954 if (error == -1) { 2955 umtx_key_release(&uq->uq_key); 2956 return (EFAULT); 2957 } 2958 if ((clockid < CLOCK_REALTIME || 2959 clockid >= CLOCK_THREAD_CPUTIME_ID) && 2960 clockid != CLOCK_TAI) { 2961 /* hmm, only HW clock id will work. */ 2962 umtx_key_release(&uq->uq_key); 2963 return (EINVAL); 2964 } 2965 } else { 2966 clockid = CLOCK_REALTIME; 2967 } 2968 2969 umtxq_lock(&uq->uq_key); 2970 umtxq_busy(&uq->uq_key); 2971 umtxq_insert(uq); 2972 umtxq_unlock(&uq->uq_key); 2973 2974 /* 2975 * Set c_has_waiters to 1 before releasing user mutex, also 2976 * don't modify cache line when unnecessary. 2977 */ 2978 error = fueword32(&cv->c_has_waiters, &hasw); 2979 if (error == 0 && hasw == 0) 2980 error = suword32(&cv->c_has_waiters, 1); 2981 if (error != 0) { 2982 umtxq_lock(&uq->uq_key); 2983 umtxq_remove(uq); 2984 umtxq_unbusy(&uq->uq_key); 2985 error = EFAULT; 2986 goto out; 2987 } 2988 2989 umtxq_unbusy_unlocked(&uq->uq_key); 2990 2991 error = do_unlock_umutex(td, m, false); 2992 2993 if (timeout != NULL) 2994 umtx_abs_timeout_init(&timo, clockid, 2995 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2996 2997 umtxq_lock(&uq->uq_key); 2998 if (error == 0) { 2999 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 3000 NULL : &timo); 3001 } 3002 3003 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3004 error = 0; 3005 else { 3006 /* 3007 * This must be timeout,interrupted by signal or 3008 * surprious wakeup, clear c_has_waiter flag when 3009 * necessary. 3010 */ 3011 umtxq_busy(&uq->uq_key); 3012 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 3013 int oldlen = uq->uq_cur_queue->length; 3014 umtxq_remove(uq); 3015 if (oldlen == 1) { 3016 umtxq_unlock(&uq->uq_key); 3017 if (suword32(&cv->c_has_waiters, 0) != 0 && 3018 error == 0) 3019 error = EFAULT; 3020 umtxq_lock(&uq->uq_key); 3021 } 3022 } 3023 umtxq_unbusy(&uq->uq_key); 3024 if (error == ERESTART) 3025 error = EINTR; 3026 } 3027 out: 3028 umtxq_unlock(&uq->uq_key); 3029 umtx_key_release(&uq->uq_key); 3030 return (error); 3031 } 3032 3033 /* 3034 * Signal a userland condition variable. 3035 */ 3036 static int 3037 do_cv_signal(struct thread *td, struct ucond *cv) 3038 { 3039 struct umtx_key key; 3040 int error, cnt, nwake; 3041 uint32_t flags; 3042 3043 error = fueword32(&cv->c_flags, &flags); 3044 if (error == -1) 3045 return (EFAULT); 3046 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3047 return (error); 3048 umtxq_lock(&key); 3049 umtxq_busy(&key); 3050 cnt = umtxq_count(&key); 3051 nwake = umtxq_signal(&key, 1); 3052 if (cnt <= nwake) { 3053 umtxq_unlock(&key); 3054 error = suword32(&cv->c_has_waiters, 0); 3055 if (error == -1) 3056 error = EFAULT; 3057 umtxq_lock(&key); 3058 } 3059 umtxq_unbusy(&key); 3060 umtxq_unlock(&key); 3061 umtx_key_release(&key); 3062 return (error); 3063 } 3064 3065 static int 3066 do_cv_broadcast(struct thread *td, struct ucond *cv) 3067 { 3068 struct umtx_key key; 3069 int error; 3070 uint32_t flags; 3071 3072 error = fueword32(&cv->c_flags, &flags); 3073 if (error == -1) 3074 return (EFAULT); 3075 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3076 return (error); 3077 3078 umtxq_lock(&key); 3079 umtxq_busy(&key); 3080 umtxq_signal(&key, INT_MAX); 3081 umtxq_unlock(&key); 3082 3083 error = suword32(&cv->c_has_waiters, 0); 3084 if (error == -1) 3085 error = EFAULT; 3086 3087 umtxq_unbusy_unlocked(&key); 3088 3089 umtx_key_release(&key); 3090 return (error); 3091 } 3092 3093 static int 3094 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3095 struct _umtx_time *timeout) 3096 { 3097 struct umtx_abs_timeout timo; 3098 struct umtx_q *uq; 3099 uint32_t flags, wrflags; 3100 int32_t state, oldstate; 3101 int32_t blocked_readers; 3102 int error, error1, rv; 3103 3104 uq = td->td_umtxq; 3105 error = fueword32(&rwlock->rw_flags, &flags); 3106 if (error == -1) 3107 return (EFAULT); 3108 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3109 if (error != 0) 3110 return (error); 3111 3112 if (timeout != NULL) 3113 umtx_abs_timeout_init2(&timo, timeout); 3114 3115 wrflags = URWLOCK_WRITE_OWNER; 3116 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3117 wrflags |= URWLOCK_WRITE_WAITERS; 3118 3119 for (;;) { 3120 rv = fueword32(&rwlock->rw_state, &state); 3121 if (rv == -1) { 3122 umtx_key_release(&uq->uq_key); 3123 return (EFAULT); 3124 } 3125 3126 /* try to lock it */ 3127 while (!(state & wrflags)) { 3128 if (__predict_false(URWLOCK_READER_COUNT(state) == 3129 URWLOCK_MAX_READERS)) { 3130 umtx_key_release(&uq->uq_key); 3131 return (EAGAIN); 3132 } 3133 rv = casueword32(&rwlock->rw_state, state, 3134 &oldstate, state + 1); 3135 if (rv == -1) { 3136 umtx_key_release(&uq->uq_key); 3137 return (EFAULT); 3138 } 3139 if (rv == 0) { 3140 MPASS(oldstate == state); 3141 umtx_key_release(&uq->uq_key); 3142 return (0); 3143 } 3144 error = thread_check_susp(td, true); 3145 if (error != 0) 3146 break; 3147 state = oldstate; 3148 } 3149 3150 if (error) 3151 break; 3152 3153 /* grab monitor lock */ 3154 umtxq_busy_unlocked(&uq->uq_key); 3155 3156 /* 3157 * re-read the state, in case it changed between the try-lock above 3158 * and the check below 3159 */ 3160 rv = fueword32(&rwlock->rw_state, &state); 3161 if (rv == -1) 3162 error = EFAULT; 3163 3164 /* set read contention bit */ 3165 while (error == 0 && (state & wrflags) && 3166 !(state & URWLOCK_READ_WAITERS)) { 3167 rv = casueword32(&rwlock->rw_state, state, 3168 &oldstate, state | URWLOCK_READ_WAITERS); 3169 if (rv == -1) { 3170 error = EFAULT; 3171 break; 3172 } 3173 if (rv == 0) { 3174 MPASS(oldstate == state); 3175 goto sleep; 3176 } 3177 state = oldstate; 3178 error = thread_check_susp(td, false); 3179 if (error != 0) 3180 break; 3181 } 3182 if (error != 0) { 3183 umtxq_unbusy_unlocked(&uq->uq_key); 3184 break; 3185 } 3186 3187 /* state is changed while setting flags, restart */ 3188 if (!(state & wrflags)) { 3189 umtxq_unbusy_unlocked(&uq->uq_key); 3190 error = thread_check_susp(td, true); 3191 if (error != 0) 3192 break; 3193 continue; 3194 } 3195 3196 sleep: 3197 /* 3198 * Contention bit is set, before sleeping, increase 3199 * read waiter count. 3200 */ 3201 rv = fueword32(&rwlock->rw_blocked_readers, 3202 &blocked_readers); 3203 if (rv == 0) 3204 rv = suword32(&rwlock->rw_blocked_readers, 3205 blocked_readers + 1); 3206 if (rv == -1) { 3207 umtxq_unbusy_unlocked(&uq->uq_key); 3208 error = EFAULT; 3209 break; 3210 } 3211 3212 while (state & wrflags) { 3213 umtxq_lock(&uq->uq_key); 3214 umtxq_insert(uq); 3215 umtxq_unbusy(&uq->uq_key); 3216 3217 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3218 NULL : &timo); 3219 3220 umtxq_busy(&uq->uq_key); 3221 umtxq_remove(uq); 3222 umtxq_unlock(&uq->uq_key); 3223 if (error) 3224 break; 3225 rv = fueword32(&rwlock->rw_state, &state); 3226 if (rv == -1) { 3227 error = EFAULT; 3228 break; 3229 } 3230 } 3231 3232 /* decrease read waiter count, and may clear read contention bit */ 3233 rv = fueword32(&rwlock->rw_blocked_readers, 3234 &blocked_readers); 3235 if (rv == 0) 3236 rv = suword32(&rwlock->rw_blocked_readers, 3237 blocked_readers - 1); 3238 if (rv == -1) { 3239 umtxq_unbusy_unlocked(&uq->uq_key); 3240 error = EFAULT; 3241 break; 3242 } 3243 if (blocked_readers == 1) { 3244 rv = fueword32(&rwlock->rw_state, &state); 3245 if (rv == -1) { 3246 umtxq_unbusy_unlocked(&uq->uq_key); 3247 error = EFAULT; 3248 break; 3249 } 3250 for (;;) { 3251 rv = casueword32(&rwlock->rw_state, state, 3252 &oldstate, state & ~URWLOCK_READ_WAITERS); 3253 if (rv == -1) { 3254 error = EFAULT; 3255 break; 3256 } 3257 if (rv == 0) { 3258 MPASS(oldstate == state); 3259 break; 3260 } 3261 state = oldstate; 3262 error1 = thread_check_susp(td, false); 3263 if (error1 != 0) { 3264 if (error == 0) 3265 error = error1; 3266 break; 3267 } 3268 } 3269 } 3270 3271 umtxq_unbusy_unlocked(&uq->uq_key); 3272 if (error != 0) 3273 break; 3274 } 3275 umtx_key_release(&uq->uq_key); 3276 if (error == ERESTART) 3277 error = EINTR; 3278 return (error); 3279 } 3280 3281 static int 3282 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3283 { 3284 struct umtx_abs_timeout timo; 3285 struct umtx_q *uq; 3286 uint32_t flags; 3287 int32_t state, oldstate; 3288 int32_t blocked_writers; 3289 int32_t blocked_readers; 3290 int error, error1, rv; 3291 3292 uq = td->td_umtxq; 3293 error = fueword32(&rwlock->rw_flags, &flags); 3294 if (error == -1) 3295 return (EFAULT); 3296 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3297 if (error != 0) 3298 return (error); 3299 3300 if (timeout != NULL) 3301 umtx_abs_timeout_init2(&timo, timeout); 3302 3303 blocked_readers = 0; 3304 for (;;) { 3305 rv = fueword32(&rwlock->rw_state, &state); 3306 if (rv == -1) { 3307 umtx_key_release(&uq->uq_key); 3308 return (EFAULT); 3309 } 3310 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3311 URWLOCK_READER_COUNT(state) == 0) { 3312 rv = casueword32(&rwlock->rw_state, state, 3313 &oldstate, state | URWLOCK_WRITE_OWNER); 3314 if (rv == -1) { 3315 umtx_key_release(&uq->uq_key); 3316 return (EFAULT); 3317 } 3318 if (rv == 0) { 3319 MPASS(oldstate == state); 3320 umtx_key_release(&uq->uq_key); 3321 return (0); 3322 } 3323 state = oldstate; 3324 error = thread_check_susp(td, true); 3325 if (error != 0) 3326 break; 3327 } 3328 3329 if (error) { 3330 if ((state & (URWLOCK_WRITE_OWNER | 3331 URWLOCK_WRITE_WAITERS)) == 0 && 3332 blocked_readers != 0) { 3333 umtxq_lock(&uq->uq_key); 3334 umtxq_busy(&uq->uq_key); 3335 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3336 UMTX_SHARED_QUEUE); 3337 umtxq_unbusy(&uq->uq_key); 3338 umtxq_unlock(&uq->uq_key); 3339 } 3340 3341 break; 3342 } 3343 3344 /* grab monitor lock */ 3345 umtxq_busy_unlocked(&uq->uq_key); 3346 3347 /* 3348 * Re-read the state, in case it changed between the 3349 * try-lock above and the check below. 3350 */ 3351 rv = fueword32(&rwlock->rw_state, &state); 3352 if (rv == -1) 3353 error = EFAULT; 3354 3355 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3356 URWLOCK_READER_COUNT(state) != 0) && 3357 (state & URWLOCK_WRITE_WAITERS) == 0) { 3358 rv = casueword32(&rwlock->rw_state, state, 3359 &oldstate, state | URWLOCK_WRITE_WAITERS); 3360 if (rv == -1) { 3361 error = EFAULT; 3362 break; 3363 } 3364 if (rv == 0) { 3365 MPASS(oldstate == state); 3366 goto sleep; 3367 } 3368 state = oldstate; 3369 error = thread_check_susp(td, false); 3370 if (error != 0) 3371 break; 3372 } 3373 if (error != 0) { 3374 umtxq_unbusy_unlocked(&uq->uq_key); 3375 break; 3376 } 3377 3378 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3379 URWLOCK_READER_COUNT(state) == 0) { 3380 umtxq_unbusy_unlocked(&uq->uq_key); 3381 error = thread_check_susp(td, false); 3382 if (error != 0) 3383 break; 3384 continue; 3385 } 3386 sleep: 3387 rv = fueword32(&rwlock->rw_blocked_writers, 3388 &blocked_writers); 3389 if (rv == 0) 3390 rv = suword32(&rwlock->rw_blocked_writers, 3391 blocked_writers + 1); 3392 if (rv == -1) { 3393 umtxq_unbusy_unlocked(&uq->uq_key); 3394 error = EFAULT; 3395 break; 3396 } 3397 3398 while ((state & URWLOCK_WRITE_OWNER) || 3399 URWLOCK_READER_COUNT(state) != 0) { 3400 umtxq_lock(&uq->uq_key); 3401 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3402 umtxq_unbusy(&uq->uq_key); 3403 3404 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3405 NULL : &timo); 3406 3407 umtxq_busy(&uq->uq_key); 3408 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3409 umtxq_unlock(&uq->uq_key); 3410 if (error) 3411 break; 3412 rv = fueword32(&rwlock->rw_state, &state); 3413 if (rv == -1) { 3414 error = EFAULT; 3415 break; 3416 } 3417 } 3418 3419 rv = fueword32(&rwlock->rw_blocked_writers, 3420 &blocked_writers); 3421 if (rv == 0) 3422 rv = suword32(&rwlock->rw_blocked_writers, 3423 blocked_writers - 1); 3424 if (rv == -1) { 3425 umtxq_unbusy_unlocked(&uq->uq_key); 3426 error = EFAULT; 3427 break; 3428 } 3429 if (blocked_writers == 1) { 3430 rv = fueword32(&rwlock->rw_state, &state); 3431 if (rv == -1) { 3432 umtxq_unbusy_unlocked(&uq->uq_key); 3433 error = EFAULT; 3434 break; 3435 } 3436 for (;;) { 3437 rv = casueword32(&rwlock->rw_state, state, 3438 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3439 if (rv == -1) { 3440 error = EFAULT; 3441 break; 3442 } 3443 if (rv == 0) { 3444 MPASS(oldstate == state); 3445 break; 3446 } 3447 state = oldstate; 3448 error1 = thread_check_susp(td, false); 3449 /* 3450 * We are leaving the URWLOCK_WRITE_WAITERS 3451 * behind, but this should not harm the 3452 * correctness. 3453 */ 3454 if (error1 != 0) { 3455 if (error == 0) 3456 error = error1; 3457 break; 3458 } 3459 } 3460 rv = fueword32(&rwlock->rw_blocked_readers, 3461 &blocked_readers); 3462 if (rv == -1) { 3463 umtxq_unbusy_unlocked(&uq->uq_key); 3464 error = EFAULT; 3465 break; 3466 } 3467 } else 3468 blocked_readers = 0; 3469 3470 umtxq_unbusy_unlocked(&uq->uq_key); 3471 } 3472 3473 umtx_key_release(&uq->uq_key); 3474 if (error == ERESTART) 3475 error = EINTR; 3476 return (error); 3477 } 3478 3479 static int 3480 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3481 { 3482 struct umtx_q *uq; 3483 uint32_t flags; 3484 int32_t state, oldstate; 3485 int error, rv, q, count; 3486 3487 uq = td->td_umtxq; 3488 error = fueword32(&rwlock->rw_flags, &flags); 3489 if (error == -1) 3490 return (EFAULT); 3491 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3492 if (error != 0) 3493 return (error); 3494 3495 error = fueword32(&rwlock->rw_state, &state); 3496 if (error == -1) { 3497 error = EFAULT; 3498 goto out; 3499 } 3500 if (state & URWLOCK_WRITE_OWNER) { 3501 for (;;) { 3502 rv = casueword32(&rwlock->rw_state, state, 3503 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3504 if (rv == -1) { 3505 error = EFAULT; 3506 goto out; 3507 } 3508 if (rv == 1) { 3509 state = oldstate; 3510 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3511 error = EPERM; 3512 goto out; 3513 } 3514 error = thread_check_susp(td, true); 3515 if (error != 0) 3516 goto out; 3517 } else 3518 break; 3519 } 3520 } else if (URWLOCK_READER_COUNT(state) != 0) { 3521 for (;;) { 3522 rv = casueword32(&rwlock->rw_state, state, 3523 &oldstate, state - 1); 3524 if (rv == -1) { 3525 error = EFAULT; 3526 goto out; 3527 } 3528 if (rv == 1) { 3529 state = oldstate; 3530 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3531 error = EPERM; 3532 goto out; 3533 } 3534 error = thread_check_susp(td, true); 3535 if (error != 0) 3536 goto out; 3537 } else 3538 break; 3539 } 3540 } else { 3541 error = EPERM; 3542 goto out; 3543 } 3544 3545 count = 0; 3546 3547 if (!(flags & URWLOCK_PREFER_READER)) { 3548 if (state & URWLOCK_WRITE_WAITERS) { 3549 count = 1; 3550 q = UMTX_EXCLUSIVE_QUEUE; 3551 } else if (state & URWLOCK_READ_WAITERS) { 3552 count = INT_MAX; 3553 q = UMTX_SHARED_QUEUE; 3554 } 3555 } else { 3556 if (state & URWLOCK_READ_WAITERS) { 3557 count = INT_MAX; 3558 q = UMTX_SHARED_QUEUE; 3559 } else if (state & URWLOCK_WRITE_WAITERS) { 3560 count = 1; 3561 q = UMTX_EXCLUSIVE_QUEUE; 3562 } 3563 } 3564 3565 if (count) { 3566 umtxq_lock(&uq->uq_key); 3567 umtxq_busy(&uq->uq_key); 3568 umtxq_signal_queue(&uq->uq_key, count, q); 3569 umtxq_unbusy(&uq->uq_key); 3570 umtxq_unlock(&uq->uq_key); 3571 } 3572 out: 3573 umtx_key_release(&uq->uq_key); 3574 return (error); 3575 } 3576 3577 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3578 static int 3579 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3580 { 3581 struct umtx_abs_timeout timo; 3582 struct umtx_q *uq; 3583 uint32_t flags, count, count1; 3584 int error, rv, rv1; 3585 3586 uq = td->td_umtxq; 3587 error = fueword32(&sem->_flags, &flags); 3588 if (error == -1) 3589 return (EFAULT); 3590 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3591 if (error != 0) 3592 return (error); 3593 3594 if (timeout != NULL) 3595 umtx_abs_timeout_init2(&timo, timeout); 3596 3597 again: 3598 umtxq_lock(&uq->uq_key); 3599 umtxq_busy(&uq->uq_key); 3600 umtxq_insert(uq); 3601 umtxq_unlock(&uq->uq_key); 3602 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3603 if (rv != -1) 3604 rv1 = fueword32(&sem->_count, &count); 3605 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3606 if (rv == 0) 3607 rv = suword32(&sem->_has_waiters, 0); 3608 umtxq_lock(&uq->uq_key); 3609 umtxq_unbusy(&uq->uq_key); 3610 umtxq_remove(uq); 3611 umtxq_unlock(&uq->uq_key); 3612 if (rv == -1 || rv1 == -1) { 3613 error = EFAULT; 3614 goto out; 3615 } 3616 if (count != 0) { 3617 error = 0; 3618 goto out; 3619 } 3620 MPASS(rv == 1 && count1 == 0); 3621 rv = thread_check_susp(td, true); 3622 if (rv == 0) 3623 goto again; 3624 error = rv; 3625 goto out; 3626 } 3627 umtxq_lock(&uq->uq_key); 3628 umtxq_unbusy(&uq->uq_key); 3629 3630 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3631 3632 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3633 error = 0; 3634 else { 3635 umtxq_remove(uq); 3636 /* A relative timeout cannot be restarted. */ 3637 if (error == ERESTART && timeout != NULL && 3638 (timeout->_flags & UMTX_ABSTIME) == 0) 3639 error = EINTR; 3640 } 3641 umtxq_unlock(&uq->uq_key); 3642 out: 3643 umtx_key_release(&uq->uq_key); 3644 return (error); 3645 } 3646 3647 /* 3648 * Signal a userland semaphore. 3649 */ 3650 static int 3651 do_sem_wake(struct thread *td, struct _usem *sem) 3652 { 3653 struct umtx_key key; 3654 int error, cnt; 3655 uint32_t flags; 3656 3657 error = fueword32(&sem->_flags, &flags); 3658 if (error == -1) 3659 return (EFAULT); 3660 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3661 return (error); 3662 umtxq_lock(&key); 3663 umtxq_busy(&key); 3664 cnt = umtxq_count(&key); 3665 if (cnt > 0) { 3666 /* 3667 * Check if count is greater than 0, this means the memory is 3668 * still being referenced by user code, so we can safely 3669 * update _has_waiters flag. 3670 */ 3671 if (cnt == 1) { 3672 umtxq_unlock(&key); 3673 error = suword32(&sem->_has_waiters, 0); 3674 umtxq_lock(&key); 3675 if (error == -1) 3676 error = EFAULT; 3677 } 3678 umtxq_signal(&key, 1); 3679 } 3680 umtxq_unbusy(&key); 3681 umtxq_unlock(&key); 3682 umtx_key_release(&key); 3683 return (error); 3684 } 3685 #endif 3686 3687 static int 3688 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3689 { 3690 struct umtx_abs_timeout timo; 3691 struct umtx_q *uq; 3692 uint32_t count, flags; 3693 int error, rv; 3694 3695 uq = td->td_umtxq; 3696 flags = fuword32(&sem->_flags); 3697 if (timeout != NULL) 3698 umtx_abs_timeout_init2(&timo, timeout); 3699 3700 again: 3701 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3702 if (error != 0) 3703 return (error); 3704 umtxq_lock(&uq->uq_key); 3705 umtxq_busy(&uq->uq_key); 3706 umtxq_insert(uq); 3707 umtxq_unlock(&uq->uq_key); 3708 rv = fueword32(&sem->_count, &count); 3709 if (rv == -1) { 3710 umtxq_lock(&uq->uq_key); 3711 umtxq_unbusy(&uq->uq_key); 3712 umtxq_remove(uq); 3713 umtxq_unlock(&uq->uq_key); 3714 umtx_key_release(&uq->uq_key); 3715 return (EFAULT); 3716 } 3717 for (;;) { 3718 if (USEM_COUNT(count) != 0) { 3719 umtxq_lock(&uq->uq_key); 3720 umtxq_unbusy(&uq->uq_key); 3721 umtxq_remove(uq); 3722 umtxq_unlock(&uq->uq_key); 3723 umtx_key_release(&uq->uq_key); 3724 return (0); 3725 } 3726 if (count == USEM_HAS_WAITERS) 3727 break; 3728 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3729 if (rv == 0) 3730 break; 3731 umtxq_lock(&uq->uq_key); 3732 umtxq_unbusy(&uq->uq_key); 3733 umtxq_remove(uq); 3734 umtxq_unlock(&uq->uq_key); 3735 umtx_key_release(&uq->uq_key); 3736 if (rv == -1) 3737 return (EFAULT); 3738 rv = thread_check_susp(td, true); 3739 if (rv != 0) 3740 return (rv); 3741 goto again; 3742 } 3743 umtxq_lock(&uq->uq_key); 3744 umtxq_unbusy(&uq->uq_key); 3745 3746 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3747 3748 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3749 error = 0; 3750 else { 3751 umtxq_remove(uq); 3752 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3753 /* A relative timeout cannot be restarted. */ 3754 if (error == ERESTART) 3755 error = EINTR; 3756 if (error == EINTR) { 3757 kern_clock_gettime(curthread, timo.clockid, 3758 &timo.cur); 3759 timespecsub(&timo.end, &timo.cur, 3760 &timeout->_timeout); 3761 } 3762 } 3763 } 3764 umtxq_unlock(&uq->uq_key); 3765 umtx_key_release(&uq->uq_key); 3766 return (error); 3767 } 3768 3769 /* 3770 * Signal a userland semaphore. 3771 */ 3772 static int 3773 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3774 { 3775 struct umtx_key key; 3776 int error, cnt, rv; 3777 uint32_t count, flags; 3778 3779 rv = fueword32(&sem->_flags, &flags); 3780 if (rv == -1) 3781 return (EFAULT); 3782 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3783 return (error); 3784 umtxq_lock(&key); 3785 umtxq_busy(&key); 3786 cnt = umtxq_count(&key); 3787 if (cnt > 0) { 3788 /* 3789 * If this was the last sleeping thread, clear the waiters 3790 * flag in _count. 3791 */ 3792 if (cnt == 1) { 3793 umtxq_unlock(&key); 3794 rv = fueword32(&sem->_count, &count); 3795 while (rv != -1 && count & USEM_HAS_WAITERS) { 3796 rv = casueword32(&sem->_count, count, &count, 3797 count & ~USEM_HAS_WAITERS); 3798 if (rv == 1) { 3799 rv = thread_check_susp(td, false); 3800 if (rv != 0) 3801 break; 3802 } 3803 } 3804 if (rv == -1) 3805 error = EFAULT; 3806 else if (rv > 0) { 3807 error = rv; 3808 } 3809 umtxq_lock(&key); 3810 } 3811 3812 umtxq_signal(&key, 1); 3813 } 3814 umtxq_unbusy(&key); 3815 umtxq_unlock(&key); 3816 umtx_key_release(&key); 3817 return (error); 3818 } 3819 3820 #ifdef COMPAT_FREEBSD10 3821 int 3822 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3823 { 3824 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3825 } 3826 3827 int 3828 freebsd10__umtx_unlock(struct thread *td, 3829 struct freebsd10__umtx_unlock_args *uap) 3830 { 3831 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3832 } 3833 #endif 3834 3835 inline int 3836 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3837 { 3838 int error; 3839 3840 error = copyin(uaddr, tsp, sizeof(*tsp)); 3841 if (error == 0) { 3842 if (!timespecvalid_interval(tsp)) 3843 error = EINVAL; 3844 } 3845 return (error); 3846 } 3847 3848 static inline int 3849 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3850 { 3851 int error; 3852 3853 if (size <= sizeof(tp->_timeout)) { 3854 tp->_clockid = CLOCK_REALTIME; 3855 tp->_flags = 0; 3856 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3857 } else 3858 error = copyin(uaddr, tp, sizeof(*tp)); 3859 if (error != 0) 3860 return (error); 3861 if (!timespecvalid_interval(&tp->_timeout)) 3862 return (EINVAL); 3863 return (0); 3864 } 3865 3866 static int 3867 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3868 struct umtx_robust_lists_params *rb) 3869 { 3870 3871 if (size > sizeof(*rb)) 3872 return (EINVAL); 3873 return (copyin(uaddr, rb, size)); 3874 } 3875 3876 static int 3877 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3878 { 3879 3880 /* 3881 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3882 * and we're only called if sz >= sizeof(timespec) as supplied in the 3883 * copyops. 3884 */ 3885 KASSERT(sz >= sizeof(*tsp), 3886 ("umtx_copyops specifies incorrect sizes")); 3887 3888 return (copyout(tsp, uaddr, sizeof(*tsp))); 3889 } 3890 3891 #ifdef COMPAT_FREEBSD10 3892 static int 3893 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3894 const struct umtx_copyops *ops) 3895 { 3896 struct timespec *ts, timeout; 3897 int error; 3898 3899 /* Allow a null timespec (wait forever). */ 3900 if (uap->uaddr2 == NULL) 3901 ts = NULL; 3902 else { 3903 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3904 if (error != 0) 3905 return (error); 3906 ts = &timeout; 3907 } 3908 #ifdef COMPAT_FREEBSD32 3909 if (ops->compat32) 3910 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3911 #endif 3912 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3913 } 3914 3915 static int 3916 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3917 const struct umtx_copyops *ops) 3918 { 3919 #ifdef COMPAT_FREEBSD32 3920 if (ops->compat32) 3921 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3922 #endif 3923 return (do_unlock_umtx(td, uap->obj, uap->val)); 3924 } 3925 #endif /* COMPAT_FREEBSD10 */ 3926 3927 #if !defined(COMPAT_FREEBSD10) 3928 static int 3929 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3930 const struct umtx_copyops *ops __unused) 3931 { 3932 return (EOPNOTSUPP); 3933 } 3934 #endif /* COMPAT_FREEBSD10 */ 3935 3936 static int 3937 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3938 const struct umtx_copyops *ops) 3939 { 3940 struct _umtx_time timeout, *tm_p; 3941 int error; 3942 3943 if (uap->uaddr2 == NULL) 3944 tm_p = NULL; 3945 else { 3946 error = ops->copyin_umtx_time( 3947 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3948 if (error != 0) 3949 return (error); 3950 tm_p = &timeout; 3951 } 3952 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3953 } 3954 3955 static int 3956 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3957 const struct umtx_copyops *ops) 3958 { 3959 struct _umtx_time timeout, *tm_p; 3960 int error; 3961 3962 if (uap->uaddr2 == NULL) 3963 tm_p = NULL; 3964 else { 3965 error = ops->copyin_umtx_time( 3966 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3967 if (error != 0) 3968 return (error); 3969 tm_p = &timeout; 3970 } 3971 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3972 } 3973 3974 static int 3975 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3976 const struct umtx_copyops *ops) 3977 { 3978 struct _umtx_time *tm_p, timeout; 3979 int error; 3980 3981 if (uap->uaddr2 == NULL) 3982 tm_p = NULL; 3983 else { 3984 error = ops->copyin_umtx_time( 3985 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3986 if (error != 0) 3987 return (error); 3988 tm_p = &timeout; 3989 } 3990 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3991 } 3992 3993 static int 3994 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3995 const struct umtx_copyops *ops __unused) 3996 { 3997 3998 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3999 } 4000 4001 #define BATCH_SIZE 128 4002 static int 4003 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 4004 { 4005 char *uaddrs[BATCH_SIZE], **upp; 4006 int count, error, i, pos, tocopy; 4007 4008 upp = (char **)uap->obj; 4009 error = 0; 4010 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4011 pos += tocopy) { 4012 tocopy = MIN(count, BATCH_SIZE); 4013 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 4014 if (error != 0) 4015 break; 4016 for (i = 0; i < tocopy; ++i) { 4017 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 4018 } 4019 maybe_yield(); 4020 } 4021 return (error); 4022 } 4023 4024 static int 4025 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4026 { 4027 uint32_t uaddrs[BATCH_SIZE], *upp; 4028 int count, error, i, pos, tocopy; 4029 4030 upp = (uint32_t *)uap->obj; 4031 error = 0; 4032 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4033 pos += tocopy) { 4034 tocopy = MIN(count, BATCH_SIZE); 4035 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4036 if (error != 0) 4037 break; 4038 for (i = 0; i < tocopy; ++i) { 4039 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 4040 INT_MAX, 1); 4041 } 4042 maybe_yield(); 4043 } 4044 return (error); 4045 } 4046 4047 static int 4048 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4049 const struct umtx_copyops *ops) 4050 { 4051 4052 if (ops->compat32) 4053 return (__umtx_op_nwake_private_compat32(td, uap)); 4054 return (__umtx_op_nwake_private_native(td, uap)); 4055 } 4056 4057 static int 4058 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4059 const struct umtx_copyops *ops __unused) 4060 { 4061 4062 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4063 } 4064 4065 static int 4066 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4067 const struct umtx_copyops *ops) 4068 { 4069 struct _umtx_time *tm_p, timeout; 4070 int error; 4071 4072 /* Allow a null timespec (wait forever). */ 4073 if (uap->uaddr2 == NULL) 4074 tm_p = NULL; 4075 else { 4076 error = ops->copyin_umtx_time( 4077 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4078 if (error != 0) 4079 return (error); 4080 tm_p = &timeout; 4081 } 4082 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4083 } 4084 4085 static int 4086 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4087 const struct umtx_copyops *ops __unused) 4088 { 4089 4090 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4091 } 4092 4093 static int 4094 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4095 const struct umtx_copyops *ops) 4096 { 4097 struct _umtx_time *tm_p, timeout; 4098 int error; 4099 4100 /* Allow a null timespec (wait forever). */ 4101 if (uap->uaddr2 == NULL) 4102 tm_p = NULL; 4103 else { 4104 error = ops->copyin_umtx_time( 4105 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4106 if (error != 0) 4107 return (error); 4108 tm_p = &timeout; 4109 } 4110 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4111 } 4112 4113 static int 4114 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4115 const struct umtx_copyops *ops __unused) 4116 { 4117 4118 return (do_wake_umutex(td, uap->obj)); 4119 } 4120 4121 static int 4122 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4123 const struct umtx_copyops *ops __unused) 4124 { 4125 4126 return (do_unlock_umutex(td, uap->obj, false)); 4127 } 4128 4129 static int 4130 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4131 const struct umtx_copyops *ops __unused) 4132 { 4133 4134 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4135 } 4136 4137 static int 4138 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4139 const struct umtx_copyops *ops) 4140 { 4141 struct timespec *ts, timeout; 4142 int error; 4143 4144 /* Allow a null timespec (wait forever). */ 4145 if (uap->uaddr2 == NULL) 4146 ts = NULL; 4147 else { 4148 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4149 if (error != 0) 4150 return (error); 4151 ts = &timeout; 4152 } 4153 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4154 } 4155 4156 static int 4157 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4158 const struct umtx_copyops *ops __unused) 4159 { 4160 4161 return (do_cv_signal(td, uap->obj)); 4162 } 4163 4164 static int 4165 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4166 const struct umtx_copyops *ops __unused) 4167 { 4168 4169 return (do_cv_broadcast(td, uap->obj)); 4170 } 4171 4172 static int 4173 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4174 const struct umtx_copyops *ops) 4175 { 4176 struct _umtx_time timeout; 4177 int error; 4178 4179 /* Allow a null timespec (wait forever). */ 4180 if (uap->uaddr2 == NULL) { 4181 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4182 } else { 4183 error = ops->copyin_umtx_time(uap->uaddr2, 4184 (size_t)uap->uaddr1, &timeout); 4185 if (error != 0) 4186 return (error); 4187 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4188 } 4189 return (error); 4190 } 4191 4192 static int 4193 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4194 const struct umtx_copyops *ops) 4195 { 4196 struct _umtx_time timeout; 4197 int error; 4198 4199 /* Allow a null timespec (wait forever). */ 4200 if (uap->uaddr2 == NULL) { 4201 error = do_rw_wrlock(td, uap->obj, 0); 4202 } else { 4203 error = ops->copyin_umtx_time(uap->uaddr2, 4204 (size_t)uap->uaddr1, &timeout); 4205 if (error != 0) 4206 return (error); 4207 4208 error = do_rw_wrlock(td, uap->obj, &timeout); 4209 } 4210 return (error); 4211 } 4212 4213 static int 4214 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4215 const struct umtx_copyops *ops __unused) 4216 { 4217 4218 return (do_rw_unlock(td, uap->obj)); 4219 } 4220 4221 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4222 static int 4223 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4224 const struct umtx_copyops *ops) 4225 { 4226 struct _umtx_time *tm_p, timeout; 4227 int error; 4228 4229 /* Allow a null timespec (wait forever). */ 4230 if (uap->uaddr2 == NULL) 4231 tm_p = NULL; 4232 else { 4233 error = ops->copyin_umtx_time( 4234 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4235 if (error != 0) 4236 return (error); 4237 tm_p = &timeout; 4238 } 4239 return (do_sem_wait(td, uap->obj, tm_p)); 4240 } 4241 4242 static int 4243 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4244 const struct umtx_copyops *ops __unused) 4245 { 4246 4247 return (do_sem_wake(td, uap->obj)); 4248 } 4249 #endif 4250 4251 static int 4252 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4253 const struct umtx_copyops *ops __unused) 4254 { 4255 4256 return (do_wake2_umutex(td, uap->obj, uap->val)); 4257 } 4258 4259 static int 4260 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4261 const struct umtx_copyops *ops) 4262 { 4263 struct _umtx_time *tm_p, timeout; 4264 size_t uasize; 4265 int error; 4266 4267 /* Allow a null timespec (wait forever). */ 4268 if (uap->uaddr2 == NULL) { 4269 uasize = 0; 4270 tm_p = NULL; 4271 } else { 4272 uasize = (size_t)uap->uaddr1; 4273 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4274 if (error != 0) 4275 return (error); 4276 tm_p = &timeout; 4277 } 4278 error = do_sem2_wait(td, uap->obj, tm_p); 4279 if (error == EINTR && uap->uaddr2 != NULL && 4280 (timeout._flags & UMTX_ABSTIME) == 0 && 4281 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4282 error = ops->copyout_timeout( 4283 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4284 uasize - ops->umtx_time_sz, &timeout._timeout); 4285 if (error == 0) { 4286 error = EINTR; 4287 } 4288 } 4289 4290 return (error); 4291 } 4292 4293 static int 4294 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4295 const struct umtx_copyops *ops __unused) 4296 { 4297 4298 return (do_sem2_wake(td, uap->obj)); 4299 } 4300 4301 #define USHM_OBJ_UMTX(o) \ 4302 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4303 4304 #define USHMF_LINKED 0x0001 4305 struct umtx_shm_reg { 4306 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4307 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4308 struct umtx_key ushm_key; 4309 struct ucred *ushm_cred; 4310 struct shmfd *ushm_obj; 4311 u_int ushm_refcnt; 4312 u_int ushm_flags; 4313 }; 4314 4315 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4316 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4317 4318 static uma_zone_t umtx_shm_reg_zone; 4319 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4320 static struct mtx umtx_shm_lock; 4321 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4322 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4323 4324 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4325 4326 static void 4327 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4328 { 4329 struct umtx_shm_reg_head d; 4330 struct umtx_shm_reg *reg, *reg1; 4331 4332 TAILQ_INIT(&d); 4333 mtx_lock(&umtx_shm_lock); 4334 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4335 mtx_unlock(&umtx_shm_lock); 4336 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4337 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4338 umtx_shm_free_reg(reg); 4339 } 4340 } 4341 4342 static struct task umtx_shm_reg_delfree_task = 4343 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4344 4345 /* 4346 * Returns 0 if a SHM with the passed key is found in the registry, in which 4347 * case it is returned through 'oreg'. Otherwise, returns an error among ESRCH 4348 * (no corresponding SHM; ESRCH was chosen for compatibility, ENOENT would have 4349 * been preferable) or EOVERFLOW (there is a corresponding SHM, but reference 4350 * count would overflow, so can't return it), in which case '*oreg' is left 4351 * unchanged. 4352 */ 4353 static int 4354 umtx_shm_find_reg_locked(const struct umtx_key *key, 4355 struct umtx_shm_reg **const oreg) 4356 { 4357 struct umtx_shm_reg *reg; 4358 struct umtx_shm_reg_head *reg_head; 4359 4360 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4361 mtx_assert(&umtx_shm_lock, MA_OWNED); 4362 reg_head = &umtx_shm_registry[key->hash]; 4363 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4364 KASSERT(reg->ushm_key.shared, 4365 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4366 if (reg->ushm_key.info.shared.object == 4367 key->info.shared.object && 4368 reg->ushm_key.info.shared.offset == 4369 key->info.shared.offset) { 4370 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4371 KASSERT(reg->ushm_refcnt != 0, 4372 ("reg %p refcnt 0 onlist", reg)); 4373 KASSERT((reg->ushm_flags & USHMF_LINKED) != 0, 4374 ("reg %p not linked", reg)); 4375 /* 4376 * Don't let overflow happen, just deny a new reference 4377 * (this is additional protection against some reference 4378 * count leak, which is known not to be the case at the 4379 * time of this writing). 4380 */ 4381 if (__predict_false(reg->ushm_refcnt == UINT_MAX)) 4382 return (EOVERFLOW); 4383 reg->ushm_refcnt++; 4384 *oreg = reg; 4385 return (0); 4386 } 4387 } 4388 return (ESRCH); 4389 } 4390 4391 /* 4392 * Calls umtx_shm_find_reg_unlocked() under the 'umtx_shm_lock'. 4393 */ 4394 static int 4395 umtx_shm_find_reg(const struct umtx_key *key, struct umtx_shm_reg **const oreg) 4396 { 4397 int error; 4398 4399 mtx_lock(&umtx_shm_lock); 4400 error = umtx_shm_find_reg_locked(key, oreg); 4401 mtx_unlock(&umtx_shm_lock); 4402 return (error); 4403 } 4404 4405 static void 4406 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4407 { 4408 4409 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4410 crfree(reg->ushm_cred); 4411 shm_drop(reg->ushm_obj); 4412 uma_zfree(umtx_shm_reg_zone, reg); 4413 } 4414 4415 static bool 4416 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool linked_ref) 4417 { 4418 mtx_assert(&umtx_shm_lock, MA_OWNED); 4419 KASSERT(reg->ushm_refcnt != 0, ("ushm_reg %p refcnt 0", reg)); 4420 4421 if (linked_ref) { 4422 if ((reg->ushm_flags & USHMF_LINKED) == 0) 4423 /* 4424 * The reference tied to USHMF_LINKED has already been 4425 * released concurrently. 4426 */ 4427 return (false); 4428 4429 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], reg, 4430 ushm_reg_link); 4431 LIST_REMOVE(reg, ushm_obj_link); 4432 reg->ushm_flags &= ~USHMF_LINKED; 4433 } 4434 4435 reg->ushm_refcnt--; 4436 return (reg->ushm_refcnt == 0); 4437 } 4438 4439 static void 4440 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool linked_ref) 4441 { 4442 vm_object_t object; 4443 bool dofree; 4444 4445 if (linked_ref) { 4446 /* 4447 * Note: This may be executed multiple times on the same 4448 * shared-memory VM object in presence of concurrent callers 4449 * because 'umtx_shm_lock' is not held all along in umtx_shm() 4450 * and here. 4451 */ 4452 object = reg->ushm_obj->shm_object; 4453 VM_OBJECT_WLOCK(object); 4454 vm_object_set_flag(object, OBJ_UMTXDEAD); 4455 VM_OBJECT_WUNLOCK(object); 4456 } 4457 mtx_lock(&umtx_shm_lock); 4458 dofree = umtx_shm_unref_reg_locked(reg, linked_ref); 4459 mtx_unlock(&umtx_shm_lock); 4460 if (dofree) 4461 umtx_shm_free_reg(reg); 4462 } 4463 4464 void 4465 umtx_shm_object_init(vm_object_t object) 4466 { 4467 4468 LIST_INIT(USHM_OBJ_UMTX(object)); 4469 } 4470 4471 void 4472 umtx_shm_object_terminated(vm_object_t object) 4473 { 4474 struct umtx_shm_reg *reg, *reg1; 4475 bool dofree; 4476 4477 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4478 return; 4479 4480 dofree = false; 4481 mtx_lock(&umtx_shm_lock); 4482 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4483 if (umtx_shm_unref_reg_locked(reg, true)) { 4484 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4485 ushm_reg_link); 4486 dofree = true; 4487 } 4488 } 4489 mtx_unlock(&umtx_shm_lock); 4490 if (dofree) 4491 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4492 } 4493 4494 static int 4495 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4496 struct umtx_shm_reg **res) 4497 { 4498 struct shmfd *shm; 4499 struct umtx_shm_reg *reg, *reg1; 4500 struct ucred *cred; 4501 int error; 4502 4503 error = umtx_shm_find_reg(key, res); 4504 if (error != ESRCH) { 4505 /* 4506 * Either no error occured, and '*res' was filled, or EOVERFLOW 4507 * was returned, indicating a reference count limit, and we 4508 * won't create a duplicate registration. In both cases, we are 4509 * done. 4510 */ 4511 return (error); 4512 } 4513 /* No entry, we will create one. */ 4514 4515 cred = td->td_ucred; 4516 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4517 return (ENOMEM); 4518 shm = shm_alloc(td->td_ucred, O_RDWR, false); 4519 if (shm == NULL) { 4520 chgumtxcnt(cred->cr_ruidinfo, -1, 0); 4521 return (ENOMEM); 4522 } 4523 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4524 bcopy(key, ®->ushm_key, sizeof(*key)); 4525 reg->ushm_obj = shm; 4526 reg->ushm_cred = crhold(cred); 4527 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4528 if (error != 0) { 4529 umtx_shm_free_reg(reg); 4530 return (error); 4531 } 4532 mtx_lock(&umtx_shm_lock); 4533 /* Re-lookup as 'umtx_shm_lock' has been temporarily released. */ 4534 error = umtx_shm_find_reg_locked(key, ®1); 4535 switch (error) { 4536 case 0: 4537 mtx_unlock(&umtx_shm_lock); 4538 umtx_shm_free_reg(reg); 4539 *res = reg1; 4540 return (0); 4541 case ESRCH: 4542 break; 4543 default: 4544 mtx_unlock(&umtx_shm_lock); 4545 umtx_shm_free_reg(reg); 4546 return (error); 4547 } 4548 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4549 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4550 ushm_obj_link); 4551 reg->ushm_flags = USHMF_LINKED; 4552 /* 4553 * This is one reference for the registry and the list of shared 4554 * mutexes referenced by the VM object containing the lock pointer, and 4555 * another for the caller, which it will free after use. So, one of 4556 * these is tied to the presence of USHMF_LINKED. 4557 */ 4558 reg->ushm_refcnt = 2; 4559 mtx_unlock(&umtx_shm_lock); 4560 *res = reg; 4561 return (0); 4562 } 4563 4564 static int 4565 umtx_shm_alive(struct thread *td, void *addr) 4566 { 4567 vm_map_t map; 4568 vm_map_entry_t entry; 4569 vm_object_t object; 4570 vm_pindex_t pindex; 4571 vm_prot_t prot; 4572 int res, ret; 4573 boolean_t wired; 4574 4575 map = &td->td_proc->p_vmspace->vm_map; 4576 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4577 &object, &pindex, &prot, &wired); 4578 if (res != KERN_SUCCESS) 4579 return (EFAULT); 4580 if (object == NULL) 4581 ret = EINVAL; 4582 else 4583 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4584 vm_map_lookup_done(map, entry); 4585 return (ret); 4586 } 4587 4588 static void 4589 umtx_shm_init(void) 4590 { 4591 int i; 4592 4593 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4594 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4595 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4596 for (i = 0; i < nitems(umtx_shm_registry); i++) 4597 TAILQ_INIT(&umtx_shm_registry[i]); 4598 } 4599 4600 static int 4601 umtx_shm(struct thread *td, void *addr, u_int flags) 4602 { 4603 struct umtx_key key; 4604 struct umtx_shm_reg *reg; 4605 struct file *fp; 4606 int error, fd; 4607 4608 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4609 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4610 return (EINVAL); 4611 if ((flags & UMTX_SHM_ALIVE) != 0) 4612 return (umtx_shm_alive(td, addr)); 4613 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4614 if (error != 0) 4615 return (error); 4616 KASSERT(key.shared == 1, ("non-shared key")); 4617 error = (flags & UMTX_SHM_CREAT) != 0 ? 4618 umtx_shm_create_reg(td, &key, ®) : 4619 umtx_shm_find_reg(&key, ®); 4620 umtx_key_release(&key); 4621 if (error != 0) 4622 return (error); 4623 KASSERT(reg != NULL, ("no reg")); 4624 if ((flags & UMTX_SHM_DESTROY) != 0) { 4625 umtx_shm_unref_reg(reg, true); 4626 } else { 4627 #if 0 4628 #ifdef MAC 4629 error = mac_posixshm_check_open(td->td_ucred, 4630 reg->ushm_obj, FFLAGS(O_RDWR)); 4631 if (error == 0) 4632 #endif 4633 error = shm_access(reg->ushm_obj, td->td_ucred, 4634 FFLAGS(O_RDWR)); 4635 if (error == 0) 4636 #endif 4637 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4638 if (error == 0) { 4639 shm_hold(reg->ushm_obj); 4640 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4641 &shm_ops); 4642 td->td_retval[0] = fd; 4643 fdrop(fp, td); 4644 } 4645 } 4646 umtx_shm_unref_reg(reg, false); 4647 return (error); 4648 } 4649 4650 static int 4651 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4652 const struct umtx_copyops *ops __unused) 4653 { 4654 4655 return (umtx_shm(td, uap->uaddr1, uap->val)); 4656 } 4657 4658 static int 4659 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4660 const struct umtx_copyops *ops) 4661 { 4662 struct umtx_robust_lists_params rb; 4663 int error; 4664 4665 if (ops->compat32) { 4666 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4667 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4668 td->td_rb_inact != 0)) 4669 return (EBUSY); 4670 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4671 return (EBUSY); 4672 } 4673 4674 bzero(&rb, sizeof(rb)); 4675 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4676 if (error != 0) 4677 return (error); 4678 4679 if (ops->compat32) 4680 td->td_pflags2 |= TDP2_COMPAT32RB; 4681 4682 td->td_rb_list = rb.robust_list_offset; 4683 td->td_rbp_list = rb.robust_priv_list_offset; 4684 td->td_rb_inact = rb.robust_inact_offset; 4685 return (0); 4686 } 4687 4688 static int 4689 __umtx_op_get_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4690 const struct umtx_copyops *ops) 4691 { 4692 long val; 4693 int error, val1; 4694 4695 val = sbttons(td->td_proc->p_umtx_min_timeout); 4696 if (ops->compat32) { 4697 val1 = (int)val; 4698 error = copyout(&val1, uap->uaddr1, sizeof(val1)); 4699 } else { 4700 error = copyout(&val, uap->uaddr1, sizeof(val)); 4701 } 4702 return (error); 4703 } 4704 4705 static int 4706 __umtx_op_set_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4707 const struct umtx_copyops *ops) 4708 { 4709 if (uap->val < 0) 4710 return (EINVAL); 4711 td->td_proc->p_umtx_min_timeout = nstosbt(uap->val); 4712 return (0); 4713 } 4714 4715 #if defined(__i386__) || defined(__amd64__) 4716 /* 4717 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4718 * 32-bit time_t there. Other architectures just need the i386 definitions 4719 * along with their standard compat32. 4720 */ 4721 struct timespecx32 { 4722 int64_t tv_sec; 4723 int32_t tv_nsec; 4724 }; 4725 4726 struct umtx_timex32 { 4727 struct timespecx32 _timeout; 4728 uint32_t _flags; 4729 uint32_t _clockid; 4730 }; 4731 4732 #ifndef __i386__ 4733 #define timespeci386 timespec32 4734 #define umtx_timei386 umtx_time32 4735 #endif 4736 #else /* !__i386__ && !__amd64__ */ 4737 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4738 struct timespeci386 { 4739 int32_t tv_sec; 4740 int32_t tv_nsec; 4741 }; 4742 4743 struct umtx_timei386 { 4744 struct timespeci386 _timeout; 4745 uint32_t _flags; 4746 uint32_t _clockid; 4747 }; 4748 4749 #if defined(__LP64__) 4750 #define timespecx32 timespec32 4751 #define umtx_timex32 umtx_time32 4752 #endif 4753 #endif 4754 4755 static int 4756 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4757 struct umtx_robust_lists_params *rbp) 4758 { 4759 struct umtx_robust_lists_params_compat32 rb32; 4760 int error; 4761 4762 if (size > sizeof(rb32)) 4763 return (EINVAL); 4764 bzero(&rb32, sizeof(rb32)); 4765 error = copyin(uaddr, &rb32, size); 4766 if (error != 0) 4767 return (error); 4768 CP(rb32, *rbp, robust_list_offset); 4769 CP(rb32, *rbp, robust_priv_list_offset); 4770 CP(rb32, *rbp, robust_inact_offset); 4771 return (0); 4772 } 4773 4774 #ifndef __i386__ 4775 static inline int 4776 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4777 { 4778 struct timespeci386 ts32; 4779 int error; 4780 4781 error = copyin(uaddr, &ts32, sizeof(ts32)); 4782 if (error == 0) { 4783 if (!timespecvalid_interval(&ts32)) 4784 error = EINVAL; 4785 else { 4786 CP(ts32, *tsp, tv_sec); 4787 CP(ts32, *tsp, tv_nsec); 4788 } 4789 } 4790 return (error); 4791 } 4792 4793 static inline int 4794 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4795 { 4796 struct umtx_timei386 t32; 4797 int error; 4798 4799 t32._clockid = CLOCK_REALTIME; 4800 t32._flags = 0; 4801 if (size <= sizeof(t32._timeout)) 4802 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4803 else 4804 error = copyin(uaddr, &t32, sizeof(t32)); 4805 if (error != 0) 4806 return (error); 4807 if (!timespecvalid_interval(&t32._timeout)) 4808 return (EINVAL); 4809 TS_CP(t32, *tp, _timeout); 4810 CP(t32, *tp, _flags); 4811 CP(t32, *tp, _clockid); 4812 return (0); 4813 } 4814 4815 static int 4816 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4817 { 4818 struct timespeci386 remain32 = { 4819 .tv_sec = tsp->tv_sec, 4820 .tv_nsec = tsp->tv_nsec, 4821 }; 4822 4823 /* 4824 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4825 * and we're only called if sz >= sizeof(timespec) as supplied in the 4826 * copyops. 4827 */ 4828 KASSERT(sz >= sizeof(remain32), 4829 ("umtx_copyops specifies incorrect sizes")); 4830 4831 return (copyout(&remain32, uaddr, sizeof(remain32))); 4832 } 4833 #endif /* !__i386__ */ 4834 4835 #if defined(__i386__) || defined(__LP64__) 4836 static inline int 4837 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4838 { 4839 struct timespecx32 ts32; 4840 int error; 4841 4842 error = copyin(uaddr, &ts32, sizeof(ts32)); 4843 if (error == 0) { 4844 if (!timespecvalid_interval(&ts32)) 4845 error = EINVAL; 4846 else { 4847 CP(ts32, *tsp, tv_sec); 4848 CP(ts32, *tsp, tv_nsec); 4849 } 4850 } 4851 return (error); 4852 } 4853 4854 static inline int 4855 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4856 { 4857 struct umtx_timex32 t32; 4858 int error; 4859 4860 t32._clockid = CLOCK_REALTIME; 4861 t32._flags = 0; 4862 if (size <= sizeof(t32._timeout)) 4863 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4864 else 4865 error = copyin(uaddr, &t32, sizeof(t32)); 4866 if (error != 0) 4867 return (error); 4868 if (!timespecvalid_interval(&t32._timeout)) 4869 return (EINVAL); 4870 TS_CP(t32, *tp, _timeout); 4871 CP(t32, *tp, _flags); 4872 CP(t32, *tp, _clockid); 4873 return (0); 4874 } 4875 4876 static int 4877 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4878 { 4879 struct timespecx32 remain32 = { 4880 .tv_sec = tsp->tv_sec, 4881 .tv_nsec = tsp->tv_nsec, 4882 }; 4883 4884 /* 4885 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4886 * and we're only called if sz >= sizeof(timespec) as supplied in the 4887 * copyops. 4888 */ 4889 KASSERT(sz >= sizeof(remain32), 4890 ("umtx_copyops specifies incorrect sizes")); 4891 4892 return (copyout(&remain32, uaddr, sizeof(remain32))); 4893 } 4894 #endif /* __i386__ || __LP64__ */ 4895 4896 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4897 const struct umtx_copyops *umtx_ops); 4898 4899 static const _umtx_op_func op_table[] = { 4900 #ifdef COMPAT_FREEBSD10 4901 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4902 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4903 #else 4904 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4905 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4906 #endif 4907 [UMTX_OP_WAIT] = __umtx_op_wait, 4908 [UMTX_OP_WAKE] = __umtx_op_wake, 4909 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4910 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4911 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4912 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4913 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4914 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4915 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4916 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4917 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4918 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4919 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4920 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4921 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4922 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4923 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4924 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4925 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4926 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4927 #else 4928 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4929 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4930 #endif 4931 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4932 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4933 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4934 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4935 [UMTX_OP_SHM] = __umtx_op_shm, 4936 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4937 [UMTX_OP_GET_MIN_TIMEOUT] = __umtx_op_get_min_timeout, 4938 [UMTX_OP_SET_MIN_TIMEOUT] = __umtx_op_set_min_timeout, 4939 }; 4940 4941 static const struct umtx_copyops umtx_native_ops = { 4942 .copyin_timeout = umtx_copyin_timeout, 4943 .copyin_umtx_time = umtx_copyin_umtx_time, 4944 .copyin_robust_lists = umtx_copyin_robust_lists, 4945 .copyout_timeout = umtx_copyout_timeout, 4946 .timespec_sz = sizeof(struct timespec), 4947 .umtx_time_sz = sizeof(struct _umtx_time), 4948 }; 4949 4950 #ifndef __i386__ 4951 static const struct umtx_copyops umtx_native_opsi386 = { 4952 .copyin_timeout = umtx_copyin_timeouti386, 4953 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4954 .copyin_robust_lists = umtx_copyin_robust_lists32, 4955 .copyout_timeout = umtx_copyout_timeouti386, 4956 .timespec_sz = sizeof(struct timespeci386), 4957 .umtx_time_sz = sizeof(struct umtx_timei386), 4958 .compat32 = true, 4959 }; 4960 #endif 4961 4962 #if defined(__i386__) || defined(__LP64__) 4963 /* i386 can emulate other 32-bit archs, too! */ 4964 static const struct umtx_copyops umtx_native_opsx32 = { 4965 .copyin_timeout = umtx_copyin_timeoutx32, 4966 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4967 .copyin_robust_lists = umtx_copyin_robust_lists32, 4968 .copyout_timeout = umtx_copyout_timeoutx32, 4969 .timespec_sz = sizeof(struct timespecx32), 4970 .umtx_time_sz = sizeof(struct umtx_timex32), 4971 .compat32 = true, 4972 }; 4973 4974 #ifdef COMPAT_FREEBSD32 4975 #ifdef __amd64__ 4976 #define umtx_native_ops32 umtx_native_opsi386 4977 #else 4978 #define umtx_native_ops32 umtx_native_opsx32 4979 #endif 4980 #endif /* COMPAT_FREEBSD32 */ 4981 #endif /* __i386__ || __LP64__ */ 4982 4983 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4984 4985 static int 4986 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4987 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4988 { 4989 struct _umtx_op_args uap = { 4990 .obj = obj, 4991 .op = op & ~UMTX_OP__FLAGS, 4992 .val = val, 4993 .uaddr1 = uaddr1, 4994 .uaddr2 = uaddr2 4995 }; 4996 4997 if ((uap.op >= nitems(op_table))) 4998 return (EINVAL); 4999 return ((*op_table[uap.op])(td, &uap, ops)); 5000 } 5001 5002 int 5003 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 5004 { 5005 static const struct umtx_copyops *umtx_ops; 5006 5007 umtx_ops = &umtx_native_ops; 5008 #ifdef __LP64__ 5009 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 5010 if ((uap->op & UMTX_OP__I386) != 0) 5011 umtx_ops = &umtx_native_opsi386; 5012 else 5013 umtx_ops = &umtx_native_opsx32; 5014 } 5015 #elif !defined(__i386__) 5016 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 5017 if ((uap->op & UMTX_OP__I386) != 0) 5018 umtx_ops = &umtx_native_opsi386; 5019 #else 5020 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 5021 if ((uap->op & UMTX_OP__32BIT) != 0) 5022 umtx_ops = &umtx_native_opsx32; 5023 #endif 5024 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5025 uap->uaddr2, umtx_ops)); 5026 } 5027 5028 #ifdef COMPAT_FREEBSD32 5029 #ifdef COMPAT_FREEBSD10 5030 int 5031 freebsd10_freebsd32__umtx_lock(struct thread *td, 5032 struct freebsd10_freebsd32__umtx_lock_args *uap) 5033 { 5034 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 5035 } 5036 5037 int 5038 freebsd10_freebsd32__umtx_unlock(struct thread *td, 5039 struct freebsd10_freebsd32__umtx_unlock_args *uap) 5040 { 5041 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 5042 } 5043 #endif /* COMPAT_FREEBSD10 */ 5044 5045 int 5046 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 5047 { 5048 5049 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5050 uap->uaddr2, &umtx_native_ops32)); 5051 } 5052 #endif /* COMPAT_FREEBSD32 */ 5053 5054 void 5055 umtx_thread_init(struct thread *td) 5056 { 5057 5058 td->td_umtxq = umtxq_alloc(); 5059 td->td_umtxq->uq_thread = td; 5060 } 5061 5062 void 5063 umtx_thread_fini(struct thread *td) 5064 { 5065 5066 umtxq_free(td->td_umtxq); 5067 } 5068 5069 /* 5070 * It will be called when new thread is created, e.g fork(). 5071 */ 5072 void 5073 umtx_thread_alloc(struct thread *td) 5074 { 5075 struct umtx_q *uq; 5076 5077 uq = td->td_umtxq; 5078 uq->uq_inherited_pri = PRI_MAX; 5079 5080 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 5081 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 5082 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 5083 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 5084 } 5085 5086 /* 5087 * exec() hook. 5088 * 5089 * Clear robust lists for all process' threads, not delaying the 5090 * cleanup to thread exit, since the relevant address space is 5091 * destroyed right now. 5092 */ 5093 void 5094 umtx_exec(struct proc *p) 5095 { 5096 struct thread *td; 5097 5098 KASSERT(p == curproc, ("need curproc")); 5099 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 5100 (p->p_flag & P_STOPPED_SINGLE) != 0, 5101 ("curproc must be single-threaded")); 5102 /* 5103 * There is no need to lock the list as only this thread can be 5104 * running. 5105 */ 5106 FOREACH_THREAD_IN_PROC(p, td) { 5107 KASSERT(td == curthread || 5108 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 5109 ("running thread %p %p", p, td)); 5110 umtx_thread_cleanup(td); 5111 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 5112 } 5113 5114 p->p_umtx_min_timeout = 0; 5115 } 5116 5117 /* 5118 * thread exit hook. 5119 */ 5120 void 5121 umtx_thread_exit(struct thread *td) 5122 { 5123 5124 umtx_thread_cleanup(td); 5125 } 5126 5127 static int 5128 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5129 { 5130 u_long res1; 5131 uint32_t res32; 5132 int error; 5133 5134 if (compat32) { 5135 error = fueword32((void *)ptr, &res32); 5136 if (error == 0) 5137 res1 = res32; 5138 } else { 5139 error = fueword((void *)ptr, &res1); 5140 } 5141 if (error == 0) 5142 *res = res1; 5143 else 5144 error = EFAULT; 5145 return (error); 5146 } 5147 5148 static void 5149 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5150 bool compat32) 5151 { 5152 struct umutex32 m32; 5153 5154 if (compat32) { 5155 memcpy(&m32, m, sizeof(m32)); 5156 *rb_list = m32.m_rb_lnk; 5157 } else { 5158 *rb_list = m->m_rb_lnk; 5159 } 5160 } 5161 5162 static int 5163 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5164 bool compat32) 5165 { 5166 struct umutex m; 5167 int error; 5168 5169 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5170 error = copyin((void *)rbp, &m, sizeof(m)); 5171 if (error != 0) 5172 return (error); 5173 if (rb_list != NULL) 5174 umtx_read_rb_list(td, &m, rb_list, compat32); 5175 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5176 return (EINVAL); 5177 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5178 /* inact is cleared after unlock, allow the inconsistency */ 5179 return (inact ? 0 : EINVAL); 5180 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5181 } 5182 5183 static void 5184 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5185 const char *name, bool compat32) 5186 { 5187 int error, i; 5188 uintptr_t rbp; 5189 bool inact; 5190 5191 if (rb_list == 0) 5192 return; 5193 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5194 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5195 if (rbp == *rb_inact) { 5196 inact = true; 5197 *rb_inact = 0; 5198 } else 5199 inact = false; 5200 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5201 } 5202 if (i == umtx_max_rb && umtx_verbose_rb) { 5203 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5204 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5205 } 5206 if (error != 0 && umtx_verbose_rb) { 5207 uprintf("comm %s pid %d: handling %srb error %d\n", 5208 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5209 } 5210 } 5211 5212 /* 5213 * Clean up umtx data. 5214 */ 5215 static void 5216 umtx_thread_cleanup(struct thread *td) 5217 { 5218 struct umtx_q *uq; 5219 struct umtx_pi *pi; 5220 uintptr_t rb_inact; 5221 bool compat32; 5222 5223 /* 5224 * Disown pi mutexes. 5225 */ 5226 uq = td->td_umtxq; 5227 if (uq != NULL) { 5228 if (uq->uq_inherited_pri != PRI_MAX || 5229 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5230 mtx_lock(&umtx_lock); 5231 uq->uq_inherited_pri = PRI_MAX; 5232 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5233 pi->pi_owner = NULL; 5234 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5235 } 5236 mtx_unlock(&umtx_lock); 5237 } 5238 sched_lend_user_prio_cond(td, PRI_MAX); 5239 } 5240 5241 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5242 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5243 5244 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5245 return; 5246 5247 /* 5248 * Handle terminated robust mutexes. Must be done after 5249 * robust pi disown, otherwise unlock could see unowned 5250 * entries. 5251 */ 5252 rb_inact = td->td_rb_inact; 5253 if (rb_inact != 0) 5254 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5255 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5256 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5257 if (rb_inact != 0) 5258 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5259 } 5260